use iconv for utf8/utf16 conversion, in preparation for codepage id3tags and file system conversion
This commit is contained in:
parent
f2be31595f
commit
d88132f6c9
1
src/io.c
1
src/io.c
|
@ -1349,6 +1349,7 @@ int io_file_open(IO_PRIVHANDLE *phandle, char *uri) {
|
|||
uint32_t native_mode=0;
|
||||
#ifdef WIN32
|
||||
uint32_t native_permissions=0;
|
||||
WCHAR utf16_path[PATH_MAX+1]; /* the real windows utf16 path */
|
||||
#endif
|
||||
|
||||
ASSERT(phandle);
|
||||
|
|
|
@ -51,19 +51,6 @@ typedef struct {
|
|||
extern int os_register(void);
|
||||
extern int os_unregister(void);
|
||||
extern char *os_configpath(void);
|
||||
|
||||
/* replacements for socket functions */
|
||||
// extern int os_opensocket(unsigned short port);
|
||||
// extern int os_acceptsocket(int fd, struct in_addr *hostaddr);
|
||||
// extern int os_shutdown(int fd, int how);
|
||||
// extern int os_waitfdtimed(int fd, struct timeval end);
|
||||
|
||||
// extern int os_close(int fd);
|
||||
// extern int os_open(const char *filename, int oflag);
|
||||
// extern FILE *os_fopen(const char *filename, const char *mode);
|
||||
|
||||
// extern int os_read(int fd,void *buffer,unsigned int count);
|
||||
// extern int os_write(int fd, void *buffer, unsigned int count);
|
||||
extern int os_getuid(void);
|
||||
|
||||
/* missing win32 functions */
|
||||
|
|
302
src/util.c
302
src/util.c
|
@ -18,7 +18,7 @@
|
|||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
//#include <iconv.h>
|
||||
#include <iconv.h>
|
||||
|
||||
#include "daapd.h"
|
||||
#include "err.h"
|
||||
|
@ -32,7 +32,6 @@ int _util_initialized=0;
|
|||
|
||||
|
||||
/* Forwards */
|
||||
//int _util_xtoy(unsigned char *dbuffer, size_t dlen, unsigned char *sbuffer, size_t slen, char *from, char *to);
|
||||
void _util_mutex_init(void);
|
||||
|
||||
/**
|
||||
|
@ -66,301 +65,45 @@ int util_must_exit(void) {
|
|||
return config.stop;
|
||||
}
|
||||
|
||||
|
||||
int util_utf16_byte_len(unsigned char *utf16) {
|
||||
unsigned char *src = utf16;
|
||||
int len = 0;
|
||||
|
||||
while(1) {
|
||||
if((src[0] == 0) && (src[1]==0))
|
||||
return len;
|
||||
len += 2;
|
||||
src += 2;
|
||||
}
|
||||
return len; /* ?? */
|
||||
}
|
||||
|
||||
/**
|
||||
* calculate how long a utf16le string will be once converted
|
||||
*/
|
||||
int util_utf16toutf8_len(unsigned char *utf16, int len) {
|
||||
unsigned char *src = utf16;
|
||||
int out_len = 0;
|
||||
uint32_t temp_dword;
|
||||
|
||||
while(src+2 <= utf16 + len) {
|
||||
temp_dword = src[1] << 8 | src[0];
|
||||
|
||||
if((temp_dword & 0xFC00) == 0xD800) {
|
||||
src += 2;
|
||||
if(src + 2 <= utf16 + len) {
|
||||
out_len += 4;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
if(temp_dword <= 0x7F)
|
||||
out_len += 1;
|
||||
else if(temp_dword <= 0x7FF)
|
||||
out_len += 2;
|
||||
else if(temp_dword <= 0xFFFF)
|
||||
out_len += 3;
|
||||
}
|
||||
|
||||
src += 2;
|
||||
}
|
||||
return out_len;
|
||||
}
|
||||
|
||||
/**
|
||||
* convert utf16 string to utf8. This is a bit naive, but...
|
||||
* Since utf-8 can't expand past 4 bytes per code point, and
|
||||
* we're converting utf-16, we can't be more than 2n+1 bytes, so
|
||||
* we'll just allocate that much.
|
||||
*
|
||||
* Probably it could be more efficiently calculated, but this will
|
||||
* always work. Besides, these are small strings, and will be freed
|
||||
* after the db insert.
|
||||
*
|
||||
* We assume this is utf-16LE, as it comes from windows
|
||||
*
|
||||
* @param utf16 utf-16 to convert
|
||||
* @param len length of utf-16 string
|
||||
*/
|
||||
|
||||
int util_utf16toutf8(unsigned char *utf8, int dlen, unsigned char *utf16, int len) {
|
||||
unsigned char *src=utf16;
|
||||
unsigned char *dst;
|
||||
unsigned int w1, w2;
|
||||
int bytes;
|
||||
int new_len;
|
||||
|
||||
if(!len)
|
||||
return FALSE;
|
||||
|
||||
new_len = util_utf16toutf8_len(utf16,len);
|
||||
if((new_len == -1) || (dlen <= new_len)) {
|
||||
DPRINTF(E_LOG,L_MISC,"Cannot convert %s to utf8; E2BIG (%d vs %d)\n",utf8,new_len,dlen);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
dst=utf8;
|
||||
while((src+2) <= utf16+len) {
|
||||
w1=src[1] << 8 | src[0];
|
||||
src += 2;
|
||||
if((w1 & 0xFC00) == 0xD800) { // could be surrogate pair
|
||||
if(src+2 > utf16+len) {
|
||||
DPRINTF(E_INF,L_SCAN,"Invalid utf-16 in file\n");
|
||||
return FALSE;
|
||||
}
|
||||
w2 = src[3] << 8 | src[2];
|
||||
if((w2 & 0xFC00) != 0xDC00) {
|
||||
DPRINTF(E_INF,L_SCAN,"Invalid utf-16 in file\n");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
// get bottom 10 of each
|
||||
w1 = w1 & 0x03FF;
|
||||
w1 = w1 << 10;
|
||||
w1 = w1 | (w2 & 0x03FF);
|
||||
|
||||
// add back the 0x10000
|
||||
w1 += 0x10000;
|
||||
}
|
||||
|
||||
// now encode the original code point in utf-8
|
||||
if (w1 < 0x80) {
|
||||
*dst++ = w1;
|
||||
bytes=0;
|
||||
} else if (w1 < 0x800) {
|
||||
*dst++ = 0xC0 | (w1 >> 6);
|
||||
bytes=1;
|
||||
} else if (w1 < 0x10000) {
|
||||
*dst++ = 0xE0 | (w1 >> 12);
|
||||
bytes=2;
|
||||
} else {
|
||||
*dst++ = 0xF0 | (w1 >> 18);
|
||||
bytes=3;
|
||||
}
|
||||
|
||||
while(bytes) {
|
||||
*dst++ = 0x80 | ((w1 >> (6*(bytes-1))) & 0x3f);
|
||||
bytes--;
|
||||
}
|
||||
}
|
||||
|
||||
*dst = '\x0';
|
||||
|
||||
return new_len;
|
||||
}
|
||||
|
||||
/**
|
||||
* calculate how long a utf8 string will be once converted
|
||||
*/
|
||||
int util_utf8toutf16_len(unsigned char *utf8) {
|
||||
int len,out_len,trailing_bytes;
|
||||
unsigned char *src = utf8;
|
||||
|
||||
len=(int)strlen((char *)utf8);
|
||||
out_len = 0;
|
||||
|
||||
while(src < utf8 + len) {
|
||||
trailing_bytes = 0;
|
||||
if((*src & 0xE0) == 0xC0) trailing_bytes = 1;
|
||||
else if((*src & 0xF0) == 0xE0) trailing_bytes = 2;
|
||||
else if((*src & 0xF8) == 0xF0) trailing_bytes = 3;
|
||||
|
||||
if(src + trailing_bytes > utf8 + len)
|
||||
return -1;
|
||||
|
||||
out_len += 2;
|
||||
if(trailing_bytes == 3) /* surrogate pair */
|
||||
out_len += 2;
|
||||
|
||||
src += (1 + trailing_bytes);
|
||||
}
|
||||
|
||||
out_len += 1;
|
||||
return out_len;
|
||||
}
|
||||
|
||||
unsigned char *util_utf8toutf16_alloc(unsigned char *utf8) {
|
||||
unsigned char *out;
|
||||
int new_len;
|
||||
|
||||
new_len = util_utf8toutf16_len(utf8);
|
||||
if(new_len == -1)
|
||||
return NULL;
|
||||
|
||||
out = calloc(1,new_len + 2);
|
||||
if(!util_utf8toutf16(out,new_len + 2,utf8,(int)strlen((char*)utf8))) {
|
||||
free(out);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
unsigned char *util_utf16touft8_alloc(unsigned char *utf16, int len) {
|
||||
unsigned char *out;
|
||||
int new_len;
|
||||
|
||||
new_len = util_utf16toutf8_len(utf16,len);
|
||||
if(new_len == -1)
|
||||
return NULL;
|
||||
|
||||
out = calloc(1,new_len + 1);
|
||||
if(!util_utf16toutf8(out,new_len + 1,utf16,len)) {
|
||||
free(out);
|
||||
return NULL;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
int util_utf8toutf16(unsigned char *utf16, int dlen, unsigned char *utf8, int len) {
|
||||
unsigned char *src=utf8;
|
||||
unsigned char *dst;
|
||||
int new_len;
|
||||
int trailing_bytes;
|
||||
uint32_t utf32;
|
||||
uint16_t temp_word;
|
||||
|
||||
len=(int)strlen((char*)utf8); /* ignore passed length, might be wrong! */
|
||||
if(!len)
|
||||
return FALSE;
|
||||
|
||||
new_len = util_utf8toutf16_len(utf8);
|
||||
if((new_len == -1) || (dlen <= (new_len+1))) {
|
||||
DPRINTF(E_LOG,L_MISC,"Cannot convert %s to utf16; E2BIG (%d vs %d)\n",utf8,new_len,dlen);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
dst=utf16;
|
||||
|
||||
while(src < utf8 + len) {
|
||||
utf32=0;
|
||||
trailing_bytes=0;
|
||||
|
||||
if((*src & 0xE0) == 0xC0) trailing_bytes = 1;
|
||||
else if((*src & 0xF0) == 0xE0) trailing_bytes = 2;
|
||||
else if((*src & 0xF8) == 0xF0) trailing_bytes = 3;
|
||||
|
||||
if(src + trailing_bytes > utf8 + len) {
|
||||
DPRINTF(E_LOG,L_SCAN,"Invalid UTF8 string\n");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
switch(trailing_bytes) {
|
||||
case 0:
|
||||
utf32 = *src;
|
||||
break;
|
||||
case 1:
|
||||
utf32 = ((src[0] & 0x1F) << 6) |
|
||||
(src[1] & 0x3F);
|
||||
break;
|
||||
case 2:
|
||||
utf32 = ((src[0] & 0x0F) << 12) |
|
||||
((src[1] & 0x3F) << 6) |
|
||||
((src[2] & 0x3F));
|
||||
break;
|
||||
case 3:
|
||||
utf32 = ((src[0] & 0x07) << 18) |
|
||||
((src[1] & 0x3F) << 12) |
|
||||
((src[2] & 0x3F) << 6) |
|
||||
((src[3] & 0x3F));
|
||||
break;
|
||||
}
|
||||
|
||||
if(utf32 <= 0xFFFF) {
|
||||
/* we are encoding LE style... */
|
||||
*dst++ = utf32 & 0xFF;
|
||||
*dst++ = (utf32 & 0xFF00) >> 8;
|
||||
} else {
|
||||
/* Encode with surrogates */
|
||||
temp_word = 0xD800 | ((utf32 & 0x0FFC00) >> 10);
|
||||
*dst++ = temp_word & 0xFF;
|
||||
*dst++ = (temp_word & 0xFF00) >> 8;
|
||||
temp_word = 0xDC00 | (utf32 & 0x3FF);
|
||||
*dst++ = temp_word & 0xFF;
|
||||
*dst++ = (temp_word & 0xFF00) >> 8;
|
||||
}
|
||||
|
||||
src += (trailing_bytes + 1);
|
||||
}
|
||||
|
||||
*dst++ = '\x0';
|
||||
*dst = '\x0';
|
||||
return new_len;
|
||||
}
|
||||
|
||||
/*
|
||||
int util_utf8toutf16(unsigned char *utf16, size_t dlen, unsigned char *utf8, size_t slen) {
|
||||
int util_utf8toutf16(unsigned char *utf16, int dlen, unsigned char *utf8, int slen) {
|
||||
int result;
|
||||
DPRINTF(E_DBG,L_MISC,"Converting %s to utf-16le (slen=%d, dlen=%d)\n",utf8,slen,dlen);
|
||||
|
||||
result=_util_xtoy(utf16, dlen, utf8, slen, "UTF-8","UTF-16LE");
|
||||
result=util_xtoy(utf16, dlen, utf8, slen, "UTF-8","UTF-16LE");
|
||||
DPRINTF(E_DBG,L_MISC,"Result: %d\n",result);
|
||||
_util_hexdump(utf16,32);
|
||||
// _util_hexdump(utf16,32);
|
||||
return result;
|
||||
}
|
||||
|
||||
int util_utf16toutf8(unsigned char *utf8, size_t dlen, unsigned char *utf16, size_t slen) {
|
||||
int util_utf16toutf8(unsigned char *utf8, int dlen, unsigned char *utf16, int slen) {
|
||||
int result;
|
||||
|
||||
DPRINTF(E_DBG,L_MISC,"Converting *something* to utf-8 (slen=%d, dlen=%d)\n",slen,dlen);
|
||||
_util_hexdump(utf16,32);
|
||||
result = _util_xtoy(utf8, dlen, utf16, slen, "UTF-16LE","UTF-8");
|
||||
// _util_hexdump(utf16,32);
|
||||
result = util_xtoy(utf8, dlen, utf16, slen, "UTF-16LE","UTF-8");
|
||||
|
||||
DPRINTF(E_DBG,L_MISC,"Converted to %s\n",utf8);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned char *util_alloc_utf16toutf8(unsigned char *utf16, size_t slen) {
|
||||
unsigned char *util_alloc_utf8to16(unsigned char *utf8) {
|
||||
char *utf16;
|
||||
|
||||
utf16 = calloc(1,strlen((char*)utf8) * 4 + 1);
|
||||
if(util_xtoy(utf16,strlen((char*)utf8) * 4 + 1, utf8, strlen((char*)utf8),"UTF-8","UTF-16LE")) {
|
||||
return utf16;
|
||||
}
|
||||
|
||||
free(utf16);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
unsigned char *util_alloc_utf16toutf8(unsigned char *utf16, int slen) {
|
||||
char *utf8;
|
||||
|
||||
utf8=calloc(1, slen * 2 + 1);
|
||||
if(_util_xtoy(utf8,slen * 2 + 1,utf16,slen,"UTF-16LE","UTF-8")) {
|
||||
if(util_xtoy(utf8,slen * 2 + 1,utf16,slen,"UTF-16LE","UTF-8")) {
|
||||
return utf8;
|
||||
}
|
||||
|
||||
|
@ -368,7 +111,7 @@ unsigned char *util_alloc_utf16toutf8(unsigned char *utf16, size_t slen) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
int _util_xtoy(unsigned char *dbuffer, size_t dlen, unsigned char *sbuffer, size_t slen, char *from, char *to) {
|
||||
int util_xtoy(unsigned char *dbuffer, int dlen, unsigned char *sbuffer, int slen, char *from, char *to) {
|
||||
iconv_t iv;
|
||||
size_t csize;
|
||||
|
||||
|
@ -398,7 +141,6 @@ int _util_xtoy(unsigned char *dbuffer, size_t dlen, unsigned char *sbuffer, size
|
|||
|
||||
return (csize != (size_t)-1);
|
||||
}
|
||||
*/
|
||||
|
||||
void util_hexdump(unsigned char *block, int len) {
|
||||
char charmap[256];
|
||||
|
|
15
src/util.h
15
src/util.h
|
@ -41,18 +41,11 @@ extern int util_must_exit(void);
|
|||
int util_split(char *s, char *delimiters, char ***argvp);
|
||||
void util_dispose_split(char **argv);
|
||||
|
||||
//extern char *util_utf16toutf8(unsigned char *utf16, int len);
|
||||
//int util_utf8toutf16(unsigned char *utf16, size_t dlen, unsigned char *utf8, size_t slen);
|
||||
//int util_utf16toutf8(unsigned char *utf8, size_t dlen, unsigned char *utf16, size_t slen);
|
||||
//unsigned char *util_alloc_utf16toutf8(unsigned char *utf16, int slen);
|
||||
|
||||
extern unsigned char *util_utf8toutf16_alloc(unsigned char *utf8);
|
||||
extern unsigned char *util_utf16touft8_alloc(unsigned char *utf16, int len);
|
||||
extern int util_utf8toutf16_len(unsigned char *utf8);
|
||||
extern int util_utf16toutf8_len(unsigned char *utf16, int len);
|
||||
extern int util_utf8toutf16(unsigned char *utf16, int dlen, unsigned char *utf8, int len);
|
||||
extern int util_utf16toutf8(unsigned char *utf8, int dlen, unsigned char *utf16, int len);
|
||||
extern int util_utf16_byte_len(unsigned char *utf16);
|
||||
extern unsigned char *util_utf16touft8_alloc(unsigned char *utf16, int slen);
|
||||
extern int util_utf8toutf16(unsigned char *utf16, int dlen, unsigned char *utf8, int slen);
|
||||
extern int util_utf16toutf8(unsigned char *utf8, int dlen, unsigned char *utf16, int slen);
|
||||
extern int util_xtoy(unsigned char *dbuffer, int dlen, unsigned char *sbuffer, int slen, char *from, char *to);
|
||||
|
||||
extern void util_hexdump(unsigned char *block, int len);
|
||||
extern char *util_vasprintf(char *fmt, va_list ap);
|
||||
|
|
|
@ -77,11 +77,6 @@ typedef INT64 int64_t;
|
|||
#define snprintf _snprintf
|
||||
#define access _access
|
||||
|
||||
// #define close os_close
|
||||
// #define open os_open
|
||||
// #define waitfdtimed os_waitfdtimed
|
||||
// #define fopen os_fopen
|
||||
|
||||
#define readdir_r os_readdir_r
|
||||
#define closedir os_closedir
|
||||
#define opendir os_opendir
|
||||
|
|
Loading…
Reference in New Issue