From 32727bd296f7447b0313aba60473ee8099ea0319 Mon Sep 17 00:00:00 2001 From: stephan-01010011 Date: Sun, 31 May 2015 14:05:31 +0100 Subject: [PATCH] =?UTF-8?q?Fixed=20bug=20that=20caused=20icy=20header=20me?= =?UTF-8?q?tadata=20to=20be=20not=20correctly=20encoded/converted.=20Chara?= =?UTF-8?q?cters=20above=20x7F=20were=20replaced=20by=20'=3F'=20character?= =?UTF-8?q?=20although=20the=20rfc=20defines=20a=20ISO=E2=88=928859?= =?UTF-8?q?=E2=88=921=20encoding=20for=20descriptive=20field-content.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to rfc2616 the field-content is defined as follows: The TEXT rule is only used for descriptive field contents and values that are not intended to be interpreted by the message parser. Words of *TEXT MAY contain characters from character sets other than ISO- 8859-1 only when encoded according to the rules of RFC 2047. In the previous implementation the icy metadata was converted based on fromcode "ascii". Following incoming icy header field-values should be encoded as "ISO−8859−1" before adding them to the metadata structure. - misc.c unicode_fixup_string enhanced by an additional parameter to define the fromcode - misc.h unicode_fixup_string prototype updated - filescanner.c function fixup_tags updated to stay compatible to the previous implementation using fromcode "ascii" - db.c function unicode_fixup_mfi updated to stay compatible to the previous implementation using fromcode "ascii" - http.c function metadata_header_get enhanced to encode the header field-content as "ISO−8859−1" to comply with rfc2616 --- src/db.c | 2 +- src/filescanner.c | 2 +- src/http.c | 22 +++++++++++++++++++--- src/misc.c | 5 +++-- src/misc.h | 2 +- 5 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/db.c b/src/db.c index 6e1d3429..6a24c101 100644 --- a/src/db.c +++ b/src/db.c @@ -465,7 +465,7 @@ unicode_fixup_mfi(struct media_file_info *mfi) if (!*field) continue; - ret = unicode_fixup_string(*field); + ret = unicode_fixup_string(*field,"ascii"); if (ret != *field) { free(*field); diff --git a/src/filescanner.c b/src/filescanner.c index 0c5cbd31..68b140ab 100644 --- a/src/filescanner.c +++ b/src/filescanner.c @@ -588,7 +588,7 @@ fixup_tags(struct media_file_info *mfi) /* fname is left untouched by unicode_fixup_mfi() for * obvious reasons, so ensure it is proper UTF-8 */ - mfi->title = unicode_fixup_string(mfi->fname); + mfi->title = unicode_fixup_string(mfi->fname,"ascii"); if (mfi->title == mfi->fname) mfi->title = strdup(mfi->fname); } diff --git a/src/http.c b/src/http.c index 7f9f763c..4d9323bf 100644 --- a/src/http.c +++ b/src/http.c @@ -408,6 +408,7 @@ metadata_header_get(struct http_icy_metadata *metadata, AVFormatContext *fmtctx) uint8_t *buffer; char *icy_token; char *ptr; + const char *headerenc = "ISO−8859−1"; av_opt_get(fmtctx, "icy_metadata_headers", AV_OPT_SEARCH_CHILDREN, &buffer); if (!buffer) @@ -427,12 +428,27 @@ metadata_header_get(struct http_icy_metadata *metadata, AVFormatContext *fmtctx) if (ptr[0] == ' ') ptr++; + + /* + Reference: + http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 + http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 + + Based on rfc2616 the field-content is defined as follows: + The TEXT rule is only used for descriptive field contents and values that are not intended to be interpreted + by the message parser. Words of *TEXT MAY contain characters from character sets other than ISO- 8859-1 + only when encoded according to the rules of RFC 2047. + + Incoming icy header field-values should be encoded as "ISO−8859−1" before adding them to the metadata structure. + */ + if ((strncmp(icy_token, "icy-name", strlen("icy-name")) == 0) && !metadata->name) - metadata->name = strdup(ptr); + metadata->name = strdup(unicode_fixup_string(ptr,headerenc)); else if ((strncmp(icy_token, "icy-description", strlen("icy-description")) == 0) && !metadata->description) - metadata->description = strdup(ptr); + metadata->description = strdup(unicode_fixup_string(ptr,headerenc)); else if ((strncmp(icy_token, "icy-genre", strlen("icy-genre")) == 0) && !metadata->genre) - metadata->genre = strdup(ptr); + metadata->genre = strdup(unicode_fixup_string(ptr,headerenc)); icy_token = strtok(NULL, "\r\n"); } diff --git a/src/misc.c b/src/misc.c index 246d4a6d..ab916f78 100644 --- a/src/misc.c +++ b/src/misc.c @@ -486,8 +486,9 @@ m_realpath(const char *pathname) return ret; } + char * -unicode_fixup_string(char *str) +unicode_fixup_string(char *str, const char *fromcode) { uint8_t *ret; size_t len; @@ -510,7 +511,7 @@ unicode_fixup_string(char *str) return str; } - ret = u8_conv_from_encoding("ascii", iconveh_question_mark, str, len, NULL, NULL, &len); + ret = u8_conv_from_encoding(fromcode, iconveh_question_mark, str, len, NULL, NULL, &len); if (!ret) { DPRINTF(E_LOG, L_MISC, "Could not convert string '%s' to UTF-8: %s\n", str, strerror(errno)); diff --git a/src/misc.h b/src/misc.h index e3026b42..9119211a 100644 --- a/src/misc.h +++ b/src/misc.h @@ -65,7 +65,7 @@ char * m_realpath(const char *pathname); char * -unicode_fixup_string(char *str); +unicode_fixup_string(char *str, const char *fromcode); char * trimwhitespace(const char *str);