Remove byte-order mark from UTF-8 strings

The byte-order marks are useless for UTF-8, but that doesn't mean we don't
find them in the wild. Get rid of them, they confuse the hell out of the
collation functions.

Reported by Kai Elwert.
This commit is contained in:
Julien BLACHE 2011-04-30 18:52:33 +02:00
parent 055be880d4
commit 75dc4106a8
1 changed files with 10 additions and 1 deletions

View File

@ -423,7 +423,16 @@ unicode_fixup_string(char *str)
/* String is valid UTF-8 */
if (!u8_check((uint8_t *)str, len))
return str;
{
if (len >= 3)
{
/* Check for and strip byte-order mark */
if (memcmp("\xef\xbb\xbf", str, 3) == 0)
memmove(str, str + 3, len - 3 + 1);
}
return str;
}
ret = u8_conv_from_encoding("ascii", iconveh_question_mark, str, len, NULL, NULL, &len);
if (!ret)