use iconv for utf8/utf16 conversion, in preparation for codepage id3tags and file system conversion

This commit is contained in:
Ron Pedde 2007-08-23 03:55:10 +00:00
parent f2be31595f
commit d88132f6c9
5 changed files with 27 additions and 309 deletions

View File

@ -1349,6 +1349,7 @@ int io_file_open(IO_PRIVHANDLE *phandle, char *uri) {
uint32_t native_mode=0;
#ifdef WIN32
uint32_t native_permissions=0;
WCHAR utf16_path[PATH_MAX+1]; /* the real windows utf16 path */
#endif
ASSERT(phandle);

View File

@ -51,19 +51,6 @@ typedef struct {
extern int os_register(void);
extern int os_unregister(void);
extern char *os_configpath(void);
/* replacements for socket functions */
// extern int os_opensocket(unsigned short port);
// extern int os_acceptsocket(int fd, struct in_addr *hostaddr);
// extern int os_shutdown(int fd, int how);
// extern int os_waitfdtimed(int fd, struct timeval end);
// extern int os_close(int fd);
// extern int os_open(const char *filename, int oflag);
// extern FILE *os_fopen(const char *filename, const char *mode);
// extern int os_read(int fd,void *buffer,unsigned int count);
// extern int os_write(int fd, void *buffer, unsigned int count);
extern int os_getuid(void);
/* missing win32 functions */

View File

@ -18,7 +18,7 @@
#include <string.h>
#include <sys/types.h>
//#include <iconv.h>
#include <iconv.h>
#include "daapd.h"
#include "err.h"
@ -32,7 +32,6 @@ int _util_initialized=0;
/* Forwards */
//int _util_xtoy(unsigned char *dbuffer, size_t dlen, unsigned char *sbuffer, size_t slen, char *from, char *to);
void _util_mutex_init(void);
/**
@ -66,301 +65,45 @@ int util_must_exit(void) {
return config.stop;
}
int util_utf16_byte_len(unsigned char *utf16) {
unsigned char *src = utf16;
int len = 0;
while(1) {
if((src[0] == 0) && (src[1]==0))
return len;
len += 2;
src += 2;
}
return len; /* ?? */
}
/**
* calculate how long a utf16le string will be once converted
*/
int util_utf16toutf8_len(unsigned char *utf16, int len) {
unsigned char *src = utf16;
int out_len = 0;
uint32_t temp_dword;
while(src+2 <= utf16 + len) {
temp_dword = src[1] << 8 | src[0];
if((temp_dword & 0xFC00) == 0xD800) {
src += 2;
if(src + 2 <= utf16 + len) {
out_len += 4;
} else {
return -1;
}
} else {
if(temp_dword <= 0x7F)
out_len += 1;
else if(temp_dword <= 0x7FF)
out_len += 2;
else if(temp_dword <= 0xFFFF)
out_len += 3;
}
src += 2;
}
return out_len;
}
/**
* convert utf16 string to utf8. This is a bit naive, but...
* Since utf-8 can't expand past 4 bytes per code point, and
* we're converting utf-16, we can't be more than 2n+1 bytes, so
* we'll just allocate that much.
*
* Probably it could be more efficiently calculated, but this will
* always work. Besides, these are small strings, and will be freed
* after the db insert.
*
* We assume this is utf-16LE, as it comes from windows
*
* @param utf16 utf-16 to convert
* @param len length of utf-16 string
*/
int util_utf16toutf8(unsigned char *utf8, int dlen, unsigned char *utf16, int len) {
unsigned char *src=utf16;
unsigned char *dst;
unsigned int w1, w2;
int bytes;
int new_len;
if(!len)
return FALSE;
new_len = util_utf16toutf8_len(utf16,len);
if((new_len == -1) || (dlen <= new_len)) {
DPRINTF(E_LOG,L_MISC,"Cannot convert %s to utf8; E2BIG (%d vs %d)\n",utf8,new_len,dlen);
return FALSE;
}
dst=utf8;
while((src+2) <= utf16+len) {
w1=src[1] << 8 | src[0];
src += 2;
if((w1 & 0xFC00) == 0xD800) { // could be surrogate pair
if(src+2 > utf16+len) {
DPRINTF(E_INF,L_SCAN,"Invalid utf-16 in file\n");
return FALSE;
}
w2 = src[3] << 8 | src[2];
if((w2 & 0xFC00) != 0xDC00) {
DPRINTF(E_INF,L_SCAN,"Invalid utf-16 in file\n");
return FALSE;
}
// get bottom 10 of each
w1 = w1 & 0x03FF;
w1 = w1 << 10;
w1 = w1 | (w2 & 0x03FF);
// add back the 0x10000
w1 += 0x10000;
}
// now encode the original code point in utf-8
if (w1 < 0x80) {
*dst++ = w1;
bytes=0;
} else if (w1 < 0x800) {
*dst++ = 0xC0 | (w1 >> 6);
bytes=1;
} else if (w1 < 0x10000) {
*dst++ = 0xE0 | (w1 >> 12);
bytes=2;
} else {
*dst++ = 0xF0 | (w1 >> 18);
bytes=3;
}
while(bytes) {
*dst++ = 0x80 | ((w1 >> (6*(bytes-1))) & 0x3f);
bytes--;
}
}
*dst = '\x0';
return new_len;
}
/**
* calculate how long a utf8 string will be once converted
*/
int util_utf8toutf16_len(unsigned char *utf8) {
int len,out_len,trailing_bytes;
unsigned char *src = utf8;
len=(int)strlen((char *)utf8);
out_len = 0;
while(src < utf8 + len) {
trailing_bytes = 0;
if((*src & 0xE0) == 0xC0) trailing_bytes = 1;
else if((*src & 0xF0) == 0xE0) trailing_bytes = 2;
else if((*src & 0xF8) == 0xF0) trailing_bytes = 3;
if(src + trailing_bytes > utf8 + len)
return -1;
out_len += 2;
if(trailing_bytes == 3) /* surrogate pair */
out_len += 2;
src += (1 + trailing_bytes);
}
out_len += 1;
return out_len;
}
unsigned char *util_utf8toutf16_alloc(unsigned char *utf8) {
unsigned char *out;
int new_len;
new_len = util_utf8toutf16_len(utf8);
if(new_len == -1)
return NULL;
out = calloc(1,new_len + 2);
if(!util_utf8toutf16(out,new_len + 2,utf8,(int)strlen((char*)utf8))) {
free(out);
return NULL;
}
return out;
}
unsigned char *util_utf16touft8_alloc(unsigned char *utf16, int len) {
unsigned char *out;
int new_len;
new_len = util_utf16toutf8_len(utf16,len);
if(new_len == -1)
return NULL;
out = calloc(1,new_len + 1);
if(!util_utf16toutf8(out,new_len + 1,utf16,len)) {
free(out);
return NULL;
}
return out;
}
int util_utf8toutf16(unsigned char *utf16, int dlen, unsigned char *utf8, int len) {
unsigned char *src=utf8;
unsigned char *dst;
int new_len;
int trailing_bytes;
uint32_t utf32;
uint16_t temp_word;
len=(int)strlen((char*)utf8); /* ignore passed length, might be wrong! */
if(!len)
return FALSE;
new_len = util_utf8toutf16_len(utf8);
if((new_len == -1) || (dlen <= (new_len+1))) {
DPRINTF(E_LOG,L_MISC,"Cannot convert %s to utf16; E2BIG (%d vs %d)\n",utf8,new_len,dlen);
return FALSE;
}
dst=utf16;
while(src < utf8 + len) {
utf32=0;
trailing_bytes=0;
if((*src & 0xE0) == 0xC0) trailing_bytes = 1;
else if((*src & 0xF0) == 0xE0) trailing_bytes = 2;
else if((*src & 0xF8) == 0xF0) trailing_bytes = 3;
if(src + trailing_bytes > utf8 + len) {
DPRINTF(E_LOG,L_SCAN,"Invalid UTF8 string\n");
return FALSE;
}
switch(trailing_bytes) {
case 0:
utf32 = *src;
break;
case 1:
utf32 = ((src[0] & 0x1F) << 6) |
(src[1] & 0x3F);
break;
case 2:
utf32 = ((src[0] & 0x0F) << 12) |
((src[1] & 0x3F) << 6) |
((src[2] & 0x3F));
break;
case 3:
utf32 = ((src[0] & 0x07) << 18) |
((src[1] & 0x3F) << 12) |
((src[2] & 0x3F) << 6) |
((src[3] & 0x3F));
break;
}
if(utf32 <= 0xFFFF) {
/* we are encoding LE style... */
*dst++ = utf32 & 0xFF;
*dst++ = (utf32 & 0xFF00) >> 8;
} else {
/* Encode with surrogates */
temp_word = 0xD800 | ((utf32 & 0x0FFC00) >> 10);
*dst++ = temp_word & 0xFF;
*dst++ = (temp_word & 0xFF00) >> 8;
temp_word = 0xDC00 | (utf32 & 0x3FF);
*dst++ = temp_word & 0xFF;
*dst++ = (temp_word & 0xFF00) >> 8;
}
src += (trailing_bytes + 1);
}
*dst++ = '\x0';
*dst = '\x0';
return new_len;
}
/*
int util_utf8toutf16(unsigned char *utf16, size_t dlen, unsigned char *utf8, size_t slen) {
int util_utf8toutf16(unsigned char *utf16, int dlen, unsigned char *utf8, int slen) {
int result;
DPRINTF(E_DBG,L_MISC,"Converting %s to utf-16le (slen=%d, dlen=%d)\n",utf8,slen,dlen);
result=_util_xtoy(utf16, dlen, utf8, slen, "UTF-8","UTF-16LE");
result=util_xtoy(utf16, dlen, utf8, slen, "UTF-8","UTF-16LE");
DPRINTF(E_DBG,L_MISC,"Result: %d\n",result);
_util_hexdump(utf16,32);
// _util_hexdump(utf16,32);
return result;
}
int util_utf16toutf8(unsigned char *utf8, size_t dlen, unsigned char *utf16, size_t slen) {
int util_utf16toutf8(unsigned char *utf8, int dlen, unsigned char *utf16, int slen) {
int result;
DPRINTF(E_DBG,L_MISC,"Converting *something* to utf-8 (slen=%d, dlen=%d)\n",slen,dlen);
_util_hexdump(utf16,32);
result = _util_xtoy(utf8, dlen, utf16, slen, "UTF-16LE","UTF-8");
// _util_hexdump(utf16,32);
result = util_xtoy(utf8, dlen, utf16, slen, "UTF-16LE","UTF-8");
DPRINTF(E_DBG,L_MISC,"Converted to %s\n",utf8);
return result;
}
unsigned char *util_alloc_utf16toutf8(unsigned char *utf16, size_t slen) {
unsigned char *util_alloc_utf8to16(unsigned char *utf8) {
char *utf16;
utf16 = calloc(1,strlen((char*)utf8) * 4 + 1);
if(util_xtoy(utf16,strlen((char*)utf8) * 4 + 1, utf8, strlen((char*)utf8),"UTF-8","UTF-16LE")) {
return utf16;
}
free(utf16);
return NULL;
}
unsigned char *util_alloc_utf16toutf8(unsigned char *utf16, int slen) {
char *utf8;
utf8=calloc(1, slen * 2 + 1);
if(_util_xtoy(utf8,slen * 2 + 1,utf16,slen,"UTF-16LE","UTF-8")) {
if(util_xtoy(utf8,slen * 2 + 1,utf16,slen,"UTF-16LE","UTF-8")) {
return utf8;
}
@ -368,7 +111,7 @@ unsigned char *util_alloc_utf16toutf8(unsigned char *utf16, size_t slen) {
return NULL;
}
int _util_xtoy(unsigned char *dbuffer, size_t dlen, unsigned char *sbuffer, size_t slen, char *from, char *to) {
int util_xtoy(unsigned char *dbuffer, int dlen, unsigned char *sbuffer, int slen, char *from, char *to) {
iconv_t iv;
size_t csize;
@ -398,7 +141,6 @@ int _util_xtoy(unsigned char *dbuffer, size_t dlen, unsigned char *sbuffer, size
return (csize != (size_t)-1);
}
*/
void util_hexdump(unsigned char *block, int len) {
char charmap[256];

View File

@ -41,18 +41,11 @@ extern int util_must_exit(void);
int util_split(char *s, char *delimiters, char ***argvp);
void util_dispose_split(char **argv);
//extern char *util_utf16toutf8(unsigned char *utf16, int len);
//int util_utf8toutf16(unsigned char *utf16, size_t dlen, unsigned char *utf8, size_t slen);
//int util_utf16toutf8(unsigned char *utf8, size_t dlen, unsigned char *utf16, size_t slen);
//unsigned char *util_alloc_utf16toutf8(unsigned char *utf16, int slen);
extern unsigned char *util_utf8toutf16_alloc(unsigned char *utf8);
extern unsigned char *util_utf16touft8_alloc(unsigned char *utf16, int len);
extern int util_utf8toutf16_len(unsigned char *utf8);
extern int util_utf16toutf8_len(unsigned char *utf16, int len);
extern int util_utf8toutf16(unsigned char *utf16, int dlen, unsigned char *utf8, int len);
extern int util_utf16toutf8(unsigned char *utf8, int dlen, unsigned char *utf16, int len);
extern int util_utf16_byte_len(unsigned char *utf16);
extern unsigned char *util_utf16touft8_alloc(unsigned char *utf16, int slen);
extern int util_utf8toutf16(unsigned char *utf16, int dlen, unsigned char *utf8, int slen);
extern int util_utf16toutf8(unsigned char *utf8, int dlen, unsigned char *utf16, int slen);
extern int util_xtoy(unsigned char *dbuffer, int dlen, unsigned char *sbuffer, int slen, char *from, char *to);
extern void util_hexdump(unsigned char *block, int len);
extern char *util_vasprintf(char *fmt, va_list ap);

View File

@ -77,11 +77,6 @@ typedef INT64 int64_t;
#define snprintf _snprintf
#define access _access
// #define close os_close
// #define open os_open
// #define waitfdtimed os_waitfdtimed
// #define fopen os_fopen
#define readdir_r os_readdir_r
#define closedir os_closedir
#define opendir os_opendir