Fixed bug that caused icy header metadata to be not correctly

encoded/converted. Characters above x7F were replaced by '?' character
although the rfc defines a ISO−8859−1 encoding for descriptive
field-content.

According to rfc2616 the field-content is defined as follows:
<the OCTETs making up the field-value and consisting of either *TEXT or
combinations of token, separators, and quoted-string>
The TEXT rule is only used for descriptive field contents and values
that are not intended to be interpreted by the message parser. Words of
*TEXT MAY contain characters from character sets other than ISO- 8859-1
only when encoded according to the rules of RFC 2047.

In the previous implementation the icy metadata was converted based on
fromcode "ascii".

Following incoming icy header field-values should be encoded as
"ISO−8859−1" before adding them to the metadata structure.

- misc.c unicode_fixup_string enhanced by an additional parameter to
define the fromcode
- misc.h unicode_fixup_string prototype updated
- filescanner.c function fixup_tags updated to stay compatible to the
previous implementation using fromcode "ascii"
- db.c function unicode_fixup_mfi updated to stay compatible to the
previous implementation using fromcode "ascii"
- http.c function metadata_header_get enhanced to encode the header
field-content as "ISO−8859−1" to comply with rfc2616
This commit is contained in:
stephan-01010011 2015-05-31 14:05:31 +01:00
parent 70fea7e459
commit 32727bd296
5 changed files with 25 additions and 8 deletions

View File

@ -465,7 +465,7 @@ unicode_fixup_mfi(struct media_file_info *mfi)
if (!*field)
continue;
ret = unicode_fixup_string(*field);
ret = unicode_fixup_string(*field,"ascii");
if (ret != *field)
{
free(*field);

View File

@ -588,7 +588,7 @@ fixup_tags(struct media_file_info *mfi)
/* fname is left untouched by unicode_fixup_mfi() for
* obvious reasons, so ensure it is proper UTF-8
*/
mfi->title = unicode_fixup_string(mfi->fname);
mfi->title = unicode_fixup_string(mfi->fname,"ascii");
if (mfi->title == mfi->fname)
mfi->title = strdup(mfi->fname);
}

View File

@ -408,6 +408,7 @@ metadata_header_get(struct http_icy_metadata *metadata, AVFormatContext *fmtctx)
uint8_t *buffer;
char *icy_token;
char *ptr;
const char *headerenc = "ISO88591";
av_opt_get(fmtctx, "icy_metadata_headers", AV_OPT_SEARCH_CHILDREN, &buffer);
if (!buffer)
@ -427,12 +428,27 @@ metadata_header_get(struct http_icy_metadata *metadata, AVFormatContext *fmtctx)
if (ptr[0] == ' ')
ptr++;
/*
Reference:
http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2
http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
Based on rfc2616 the field-content is defined as follows: <the OCTETs making up the field-value
and consisting of either *TEXT or combinations of token, separators, and quoted-string>
The TEXT rule is only used for descriptive field contents and values that are not intended to be interpreted
by the message parser. Words of *TEXT MAY contain characters from character sets other than ISO- 8859-1
only when encoded according to the rules of RFC 2047.
Incoming icy header field-values should be encoded as "ISO88591" before adding them to the metadata structure.
*/
if ((strncmp(icy_token, "icy-name", strlen("icy-name")) == 0) && !metadata->name)
metadata->name = strdup(ptr);
metadata->name = strdup(unicode_fixup_string(ptr,headerenc));
else if ((strncmp(icy_token, "icy-description", strlen("icy-description")) == 0) && !metadata->description)
metadata->description = strdup(ptr);
metadata->description = strdup(unicode_fixup_string(ptr,headerenc));
else if ((strncmp(icy_token, "icy-genre", strlen("icy-genre")) == 0) && !metadata->genre)
metadata->genre = strdup(ptr);
metadata->genre = strdup(unicode_fixup_string(ptr,headerenc));
icy_token = strtok(NULL, "\r\n");
}

View File

@ -486,8 +486,9 @@ m_realpath(const char *pathname)
return ret;
}
char *
unicode_fixup_string(char *str)
unicode_fixup_string(char *str, const char *fromcode)
{
uint8_t *ret;
size_t len;
@ -510,7 +511,7 @@ unicode_fixup_string(char *str)
return str;
}
ret = u8_conv_from_encoding("ascii", iconveh_question_mark, str, len, NULL, NULL, &len);
ret = u8_conv_from_encoding(fromcode, iconveh_question_mark, str, len, NULL, NULL, &len);
if (!ret)
{
DPRINTF(E_LOG, L_MISC, "Could not convert string '%s' to UTF-8: %s\n", str, strerror(errno));

View File

@ -65,7 +65,7 @@ char *
m_realpath(const char *pathname);
char *
unicode_fixup_string(char *str);
unicode_fixup_string(char *str, const char *fromcode);
char *
trimwhitespace(const char *str);