Allow --disable-iconv

2025-11-25 03:56:21 -05:00 · 2007-09-04 07:26:50 +00:00
parent 9246bfe59e
commit 0be8f98a40
3 changed files with 303 additions and 10 deletions
--- a/configure.in
+++ b/configure.in
@@ -14,7 +14,6 @@ AC_PROG_CC
 dnl AC_PROG_YACC
 dnl AM_PROG_LEX
 AC_PROG_LIBTOOL
-AM_ICONV

 AC_CHECK_HEADERS([sys/wait.h])
 AC_CHECK_HEADERS([sys/param.h])
@@ -50,6 +49,7 @@ db_sqlite3=false
 use_ffmpeg=false;
 use_upnp=false;
 use_ssl=false
+use_iconv=true

 STATIC_LIBS=no

@@ -59,6 +59,13 @@ dnl fix freebsd's broken (?) libpthread
 AC_CHECK_LIB(c_r,pthread_creat,LIBS="${LIBS} -lc_r", [
  AC_CHECK_LIB(pthread,pthread_create,LIBS="${LIBS} -lpthread") ])

+AC_ARG_ENABLE(iconv, [  --enable-iconv          Enable iconv conversion],
+                        [ case "${enableval}" in
+			yes) use_iconv=true;;
+			no) use_iconv=false;;
+			*) AC_MSG_ERROR(bad value ${enableval} for --enable-iconv);;
+			esac ])
+
 AC_ARG_ENABLE(sqlite,[  --enable-sqlite         Enable the sqlite db backend],
 			[ case "${enableval}" in
 			yes) db_sqlite=true; have_sql=true;;
@@ -479,7 +486,11 @@ AC_CHECK_LIB(socket,socket,V_NETLIBS="-lsocket $V_NETLIBS",,)

 AC_SUBST(V_NETLIBS)

-dnl add the iconv stuff
-LDFLAGS="${LDFLAGS} ${LIBICONV}"
+
+if test x$use_iconv = xtrue; then
+   dnl add the iconv stuff
+   AM_ICONV
+   LDFLAGS="${LDFLAGS} ${LIBICONV}"
+fi

 AC_OUTPUT(src/Makefile src/plugins/Makefile admin-root/Makefile admin-root/lib-js/Makefile admin-root/lib-js/script.aculo.us/Makefile contrib/Makefile contrib/init.d/Makefile src/mdns/Makefile Makefile)
--- a/src/scan-mp3.c
+++ b/src/scan-mp3.c
@@ -39,6 +39,7 @@
 #include "err.h"
 #include "io.h"
 #include "mp3-scanner.h"
+#include "util.h"

 #if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4)
 # define _PACKED __attribute((packed))
@@ -350,15 +351,25 @@ int scan_mp3_get_mp3tags(char *file, MP3FILE *pmp3) {

            if(native_text) {
                have_utf8=1;
-                utf8_text = util_xtoutf8_alloc(native_text,strlen(native_text),
-                                               conversion_codepage);

-                if(!utf8_text) {
-                    utf8_text = (char*)id3_ucs4_latin1duplicate(native_text);
-                    if(utf8_text)
-                        mem_register(utf8_text,0);
+
+                utf8_text = (char*)id3_ucs4_utf8duplicate(native_text);
+                if(utf8_text)
+                    mem_register(utf8_text,0);
+
+                if(id3_field_gettextencoding(&pid3frame->fields[1]) ==
+                   ID3_FIELD_TEXTENCODING_ISO_8859_1) {
+#ifdef HAVE_ICONV
+                    /* this is kinda cheesy, but ucs4* == char* for 8859-1 */
+                    free(utf8_text);
+                    utf8_text = util_xtoutf8_alloc((unsigned char*)native_text,
+                                                   strlen((char*)native_text),
+                                                   conversion_codepage);
+#endif
                }

+
+
                if(!strcmp(pid3frame->id,"TIT2")) { /* Title */
                    used=1;
                    pmp3->title = utf8_text;
--- a/src/util.c
+++ b/src/util.c
@@ -66,6 +66,7 @@ int util_must_exit(void) {
    return config.stop;
 }

+#ifdef HAVE_ICONV
 int util_utf8toutf16(unsigned char *utf16, int dlen, unsigned char *utf8, int slen) {
    int result;
    DPRINTF(E_DBG,L_MISC,"Converting %s to utf-16le (slen=%d, dlen=%d)\n",utf8,slen,dlen);
@@ -177,6 +178,277 @@ int util_xtoy(unsigned char *dbuffer, int dlen, unsigned char *sbuffer, int slen
    return (csize != (size_t)-1);
 }

+#else
+
+/* homerolled conversions */
+int util_utf16_byte_len(unsigned char *utf16) {
+    unsigned char *src = utf16;
+    int len = 0;
+
+    while(1) {
+        if((src[0] == 0) && (src[1]==0))
+            return len;
+        len += 2;
+        src += 2;
+    }
+    return len; /* ?? */
+}
+
+/**
+ * calculate how long a utf16le string will be once converted
+ */
+int util_utf16toutf8_len(unsigned char *utf16, int len) {
+    unsigned char *src = utf16;
+    int out_len = 0;
+    uint32_t temp_dword;
+
+    while(src+2 <= utf16 + len) {
+        temp_dword = src[1] << 8 | src[0];
+
+        if((temp_dword & 0xFC00) == 0xD800) {
+            src += 2;
+            if(src + 2 <= utf16 + len) {
+                out_len += 4;
+            } else {
+                return -1;
+            }
+        } else {
+            if(temp_dword <= 0x7F)
+                out_len += 1;
+            else if(temp_dword <= 0x7FF)
+                out_len += 2;
+            else if(temp_dword <= 0xFFFF)
+                out_len += 3;
+        }
+
+        src += 2;
+    }
+    return out_len;
+}
+
+/**
+ * convert utf16 string to utf8.  This is a bit naive, but...
+ * Since utf-8 can't expand past 4 bytes per code point, and
+ * we're converting utf-16, we can't be more than 2n+1 bytes, so
+ * we'll just allocate that much.
+ *
+ * Probably it could be more efficiently calculated, but this will
+ * always work.  Besides, these are small strings, and will be freed
+ * after the db insert.
+ *
+ * We assume this is utf-16LE, as it comes from windows
+ *
+ * @param utf16 utf-16 to convert
+ * @param len length of utf-16 string
+ */
+
+int util_utf16toutf8(unsigned char *utf8, int dlen, unsigned char *utf16, int len) {
+    unsigned char *src=utf16;
+    unsigned char *dst;
+    unsigned int w1, w2;
+    int bytes;
+    int new_len;
+
+    if(!len)
+        return FALSE;
+
+    new_len = util_utf16toutf8_len(utf16,len);
+    if((new_len == -1) || (dlen <= new_len)) {
+        DPRINTF(E_LOG,L_MISC,"Cannot convert %s to utf8; E2BIG (%d vs %d)\n",utf8,new_len,dlen);
+        return FALSE;
+    }
+
+    dst=utf8;
+    while((src+2) <= utf16+len) {
+        w1=src[1] << 8 | src[0];
+        src += 2;
+        if((w1 & 0xFC00) == 0xD800) { // could be surrogate pair
+            if(src+2 > utf16+len) {
+                DPRINTF(E_INF,L_SCAN,"Invalid utf-16 in file\n");
+                return FALSE;
+            }
+            w2 = src[3] << 8 | src[2];
+            if((w2 & 0xFC00) != 0xDC00) {
+                DPRINTF(E_INF,L_SCAN,"Invalid utf-16 in file\n");
+                return FALSE;
+            }
+
+            // get bottom 10 of each
+            w1 = w1 & 0x03FF;
+            w1 = w1 << 10;
+            w1 = w1 | (w2 & 0x03FF);
+
+            // add back the 0x10000
+            w1 += 0x10000;
+        }
+
+        // now encode the original code point in utf-8
+        if (w1 < 0x80) {
+            *dst++ = w1;
+            bytes=0;
+        } else if (w1 < 0x800) {
+            *dst++ = 0xC0 | (w1 >> 6);
+            bytes=1;
+        } else if (w1 < 0x10000) {
+            *dst++ = 0xE0 | (w1 >> 12);
+            bytes=2;
+        } else {
+            *dst++ = 0xF0 | (w1 >> 18);
+            bytes=3;
+        }
+
+        while(bytes) {
+            *dst++ = 0x80 | ((w1 >> (6*(bytes-1))) & 0x3f);
+            bytes--;
+        }
+    }
+
+    *dst = '\x0';
+
+    return new_len;
+}
+
+/**
+ * calculate how long a utf8 string will be once converted
+ */
+int util_utf8toutf16_len(unsigned char *utf8) {
+    int len,out_len,trailing_bytes;
+    unsigned char *src = utf8;
+
+    len=(int)strlen((char *)utf8);
+    out_len = 0;
+
+    while(src < utf8 + len) {
+        trailing_bytes = 0;
+        if((*src & 0xE0) == 0xC0) trailing_bytes = 1;
+        else if((*src & 0xF0) == 0xE0) trailing_bytes = 2;
+        else if((*src & 0xF8) == 0xF0) trailing_bytes = 3;
+
+        if(src + trailing_bytes > utf8 + len)
+            return -1;
+
+        out_len += 2;
+        if(trailing_bytes == 3) /* surrogate pair */
+            out_len += 2;
+
+        src += (1 + trailing_bytes);
+    }
+
+    out_len += 1;
+    return out_len;
+}
+
+unsigned char *util_utf8toutf16_alloc(unsigned char *utf8) {
+    unsigned char *out;
+    int new_len;
+
+    new_len = util_utf8toutf16_len(utf8);
+    if(new_len == -1)
+        return NULL;
+
+    out = calloc(1,new_len + 2);
+    if(!util_utf8toutf16(out,new_len + 2,utf8,(int)strlen((char*)utf8))) {
+        free(out);
+        return NULL;
+    }
+
+    return out;
+}
+
+unsigned char *util_utf16touft8_alloc(unsigned char *utf16, int len) {
+    unsigned char *out;
+    int new_len;
+
+    new_len = util_utf16toutf8_len(utf16,len);
+    if(new_len == -1)
+        return NULL;
+
+    out = calloc(1,new_len + 1);
+    if(!util_utf16toutf8(out,new_len + 1,utf16,len)) {
+        free(out);
+        return NULL;
+    }
+    return out;
+}
+
+int util_utf8toutf16(unsigned char *utf16, int dlen, unsigned char *utf8, int len) {
+    unsigned char *src=utf8;
+    unsigned char *dst;
+    int new_len;
+    int trailing_bytes;
+    uint32_t utf32;
+    uint16_t temp_word;
+
+    len=(int)strlen((char*)utf8); /* ignore passed length, might be wrong! */
+    if(!len)
+        return FALSE;
+
+    new_len = util_utf8toutf16_len(utf8);
+    if((new_len == -1) || (dlen <= (new_len+1))) {
+        DPRINTF(E_LOG,L_MISC,"Cannot convert %s to utf16; E2BIG (%d vs %d)\n",utf8,new_len,dlen);
+        return FALSE;
+    }
+
+    dst=utf16;
+
+    while(src < utf8 + len) {
+        utf32=0;
+        trailing_bytes=0;
+
+        if((*src & 0xE0) == 0xC0) trailing_bytes = 1;
+        else if((*src & 0xF0) == 0xE0) trailing_bytes = 2;
+        else if((*src & 0xF8) == 0xF0) trailing_bytes = 3;
+
+        if(src + trailing_bytes > utf8 + len) {
+            DPRINTF(E_LOG,L_SCAN,"Invalid UTF8 string\n");
+            return FALSE;
+        }
+
+        switch(trailing_bytes) {
+        case 0:
+            utf32 = *src;
+            break;
+        case 1:
+            utf32 = ((src[0] & 0x1F) << 6) |
+                (src[1] & 0x3F);
+            break;
+        case 2:
+            utf32 = ((src[0] & 0x0F) << 12) |
+                ((src[1] & 0x3F) << 6) |
+                ((src[2] & 0x3F));
+            break;
+        case 3:
+            utf32 = ((src[0] & 0x07) << 18) |
+                ((src[1] & 0x3F) << 12) |
+                ((src[2] & 0x3F) << 6) |
+                ((src[3] & 0x3F));
+            break;
+        }
+
+        if(utf32 <= 0xFFFF) {
+            /* we are encoding LE style... */
+            *dst++ = utf32 & 0xFF;
+            *dst++ = (utf32 & 0xFF00) >> 8;
+        } else {
+            /* Encode with surrogates */
+            temp_word = 0xD800 | ((utf32 & 0x0FFC00) >> 10);
+            *dst++ = temp_word & 0xFF;
+            *dst++ = (temp_word & 0xFF00) >> 8;
+            temp_word = 0xDC00 | (utf32 & 0x3FF);
+            *dst++ = temp_word & 0xFF;
+            *dst++ = (temp_word & 0xFF00) >> 8;
+        }
+
+        src += (trailing_bytes + 1);
+    }
+
+    *dst++ = '\x0';
+    *dst = '\x0';
+    return new_len;
+}
+
+#endif
+
 void util_hexdump(unsigned char *block, int len) {
    char charmap[256];
    int index;
@@ -417,7 +689,6 @@ char *util_vasprintf(char *fmt, va_list ap) {
    if(!outbuf)
        DPRINTF(E_FATAL,L_MISC,"Could not allocate buffer in vasprintf\n");

-    ap2=ap; /* shut up lint warnings */
    VA_COPY(ap2,ap);

    while(1) {