Remove byte-order mark from UTF-8 strings
The byte-order marks are useless for UTF-8, but that doesn't mean we don't find them in the wild. Get rid of them, they confuse the hell out of the collation functions. Reported by Kai Elwert.
This commit is contained in:
parent
055be880d4
commit
75dc4106a8
11
src/misc.c
11
src/misc.c
|
@ -423,7 +423,16 @@ unicode_fixup_string(char *str)
|
|||
|
||||
/* String is valid UTF-8 */
|
||||
if (!u8_check((uint8_t *)str, len))
|
||||
return str;
|
||||
{
|
||||
if (len >= 3)
|
||||
{
|
||||
/* Check for and strip byte-order mark */
|
||||
if (memcmp("\xef\xbb\xbf", str, 3) == 0)
|
||||
memmove(str, str + 3, len - 3 + 1);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
ret = u8_conv_from_encoding("ascii", iconveh_question_mark, str, len, NULL, NULL, &len);
|
||||
if (!ret)
|
||||
|
|
Loading…
Reference in New Issue