From ff8c69d91547574f8293bf588f2a45e45b823cc6 Mon Sep 17 00:00:00 2001 From: Ron Pedde Date: Mon, 28 Mar 2005 00:28:54 +0000 Subject: [PATCH] clean up invalid utf-8 data. this forces a database update and full rescan (mostly as a test of the db update stuff) --- src/db-generic.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++- src/dbs-sqlite.c | 57 +++++++++++++++++++++++++++++-- 2 files changed, 142 insertions(+), 3 deletions(-) diff --git a/src/db-generic.c b/src/db-generic.c index 4119b08d..c6809e59 100644 --- a/src/db-generic.c +++ b/src/db-generic.c @@ -23,7 +23,7 @@ # include "config.h" #endif -#define _XOPEN_SOURCE 600 /** I forgot why I needed this? */ +#define _XOPEN_SOURCE 500 /** unix98? pthread_once_t, etc */ #include #include @@ -236,6 +236,8 @@ static void db_writelock(void); static void db_readlock(void); static int db_unlock(void); static void db_init_once(void); +static void db_utf8_validate(MP3FILE *pmp3); +static int db_utf8_validate_string(char *string); /** * encode a string meta request into a MetaField_t @@ -423,6 +425,7 @@ int db_add(MP3FILE *pmp3) { int retval; db_writelock(); + db_utf8_validate(pmp3); retval=db_current->dbs_add(pmp3); db_revision_no++; db_unlock(); @@ -685,3 +688,86 @@ int db_dmap_add_container(char *where, char *tag, int size) { return 8; } + +/** + * check the strings in a MP3FILE to ensure they are + * valid utf-8. If they are not, the string will be corrected + * + * \param pmp3 MP3FILE to verify for valid utf-8 + */ +void db_utf8_validate(MP3FILE *pmp3) { + int is_invalid=0; + + /* we won't bother with path and fname... those were culled with the + * scan. Even if they are invalid (_could_ they be?), then we + * won't be able to open the file if we change them. Likewise, + * we won't do type or description, as these can't be bad, or they + * wouldn't have been scanned */ + + is_invalid = db_utf8_validate_string(pmp3->title); + is_invalid |= db_utf8_validate_string(pmp3->artist); + is_invalid |= db_utf8_validate_string(pmp3->album); + is_invalid |= db_utf8_validate_string(pmp3->genre); + is_invalid |= db_utf8_validate_string(pmp3->comment); + is_invalid |= db_utf8_validate_string(pmp3->composer); + is_invalid |= db_utf8_validate_string(pmp3->orchestra); + is_invalid |= db_utf8_validate_string(pmp3->conductor); + is_invalid |= db_utf8_validate_string(pmp3->grouping); + is_invalid |= db_utf8_validate_string(pmp3->url); + + if(is_invalid) { + DPRINTF(E_LOG,L_SCAN,"Invalid UTF-8 in %s\n",pmp3->path); + } +} + +/** + * check a string to verify it is valid utf-8. The passed + * string will be in-place modified to be utf-8 clean by substituting + * the character '?' for invalid utf-8 codepoints + * + * \param string string to clean + */ +int db_utf8_validate_string(char *string) { + char *current = string; + int run,r_current; + int retval=0; + + if(!string) + return 0; + + while(*current) { + if(!((*current) & 0x80)) { + current++; + } else { + run=0; + + /* it's a lead utf-8 character */ + if((*current & 0xE0) == 0xC0) run=1; + if((*current & 0xF0) == 0xE0) run=2; + if((*current & 0xF8) == 0xF0) run=3; + + if(!run) { + /* high bit set, but invalid */ + *current++='?'; + retval=1; + } else { + r_current=0; + while((r_current != run) && (*(current + r_current + 1)) && + ((*(current + r_current + 1) & 0xC0) == 0x80)) { + r_current++; + } + + if(r_current != run) { + *current++ = '?'; + retval=1; + } else { + current += (1 + run); + } + } + } + } + + return retval; +} + + diff --git a/src/dbs-sqlite.c b/src/dbs-sqlite.c index 240aa5ce..5af83dd1 100644 --- a/src/dbs-sqlite.c +++ b/src/dbs-sqlite.c @@ -53,6 +53,7 @@ void db_sqlite_build_mp3file(char **valarray, MP3FILE *pmp3); int db_sqlite_exec(int fatal, char *fmt, ...); int db_sqlite_get_table(int fatal, char ***resarray, int *rows, int *cols, char *fmt, ...); int db_sqlite_free_table(char **resarray); +int db_sqlite_get_int(int loglevel, char *fmt, ...); int db_sqlite_update(MP3FILE *pmp3); int db_sqlite_update_version(int from_version); int db_sqlite_get_version(void); @@ -156,6 +157,45 @@ int db_sqlite_free_table(char **resarray) { return 0; } +/** + * db_sqlite_get_int + */ +int db_sqlite_get_int(int loglevel, char *fmt, ...) { + int rows, cols; + char **resarray; + va_list ap; + char *query; + int err; + char *perr; + int retval; + + va_start(ap,fmt); + query=sqlite_vmprintf(fmt,ap); + va_end(ap); + + DPRINTF(E_DBG,L_DB,"Executing: %s\n",query); + + db_sqlite_lock(); + err=sqlite_get_table(db_sqlite_songs,query,&resarray,&rows,&cols,&perr); + if(err == SQLITE_OK) + sqlite_freemem(query); + db_sqlite_unlock(); + + if(err != SQLITE_OK) { + DPRINTF(loglevel == E_FATAL ? E_LOG : loglevel,L_DB,"Query: %s\n",query); + DPRINTF(loglevel,L_DB,"Error: %s\n",perr); + db_sqlite_lock(); + sqlite_freemem(query); + db_sqlite_unlock(); + return 0; + } + + retval=atoi(resarray[cols]); + + sqlite_free_table(resarray); + return retval; +} + /** * open sqlite database @@ -184,8 +224,13 @@ int db_sqlite_open(char *parameters) { */ int db_sqlite_init(int reload) { int items; + int rescan; db_sqlite_update_version(db_sqlite_get_version()); + rescan=db_sqlite_get_int(E_DBG,"SELECT value FROM config WHERE term='rescan'"); + + if(rescan) + reload=1; items=db_sqlite_get_count(countSongs); @@ -237,9 +282,11 @@ int db_sqlite_end_scan(void) { if(db_sqlite_reload) { db_sqlite_exec(E_FATAL,"COMMIT TRANSACTION"); db_sqlite_exec(E_FATAL,"CREATE INDEX idx_path ON songs(path)"); + db_sqlite_exec(E_DBG,"DELETE FROM config WHERE term='rescan'"); db_sqlite_exec(E_FATAL,"PRAGMA synchronous=NORMAL"); } else { db_sqlite_exec(E_FATAL,"DELETE FROM songs WHERE id NOT IN (SELECT id FROM updated)"); + db_sqlite_exec(E_FATAL,"UPDATE songs SET force_update=0"); db_sqlite_exec(E_FATAL,"DROP TABLE updated"); } @@ -1205,6 +1252,7 @@ int db_sqlite_get_version(void) { char *db_sqlite_upgrade_scripts[] = { + /* version 0 -> version 1 -- initial update */ "CREATE TABLE songs (\n" " id INTEGER PRIMARY KEY NOT NULL,\n" " path VARCHAR(4096) UNIQUE NOT NULL,\n" @@ -1262,7 +1310,13 @@ char *db_sqlite_upgrade_scripts[] = { " songid INTEGER NOT NULL\n" ");\n" "INSERT INTO config VALUES ('version','','1');\n" - "INSERT INTO playlists VALUES (1,'Library',1,0,'1');\n", /* Version 0 -> Version 1 */ + "INSERT INTO playlists VALUES (1,'Library',1,0,'1');\n", + + /* version 1 -> version 2 */ + /* force rescan for invalid utf-8 data */ + "REPLACE INTO config VALUES('rescan',NULL,1);\n" + "UPDATE config SET value=2 WHERE term='version';\n", + NULL /* No more versions! */ }; @@ -1272,7 +1326,6 @@ char *db_sqlite_upgrade_scripts[] = { * \param from_version the current version of the database */ int db_sqlite_update_version(int from_version) { - while(db_sqlite_upgrade_scripts[from_version]) { DPRINTF(E_LOG,L_DB,"Upgrading database from version %d to version %d\n",from_version, from_version+1);