clean up invalid utf-8 data. this forces a database update and full rescan (mostly as a test of the db update stuff)

This commit is contained in:
Ron Pedde 2005-03-28 00:28:54 +00:00
parent 24667c8d01
commit ff8c69d915
2 changed files with 142 additions and 3 deletions

View File

@ -23,7 +23,7 @@
# include "config.h"
#endif
#define _XOPEN_SOURCE 600 /** I forgot why I needed this? */
#define _XOPEN_SOURCE 500 /** unix98? pthread_once_t, etc */
#include <pthread.h>
#include <stdio.h>
@ -236,6 +236,8 @@ static void db_writelock(void);
static void db_readlock(void);
static int db_unlock(void);
static void db_init_once(void);
static void db_utf8_validate(MP3FILE *pmp3);
static int db_utf8_validate_string(char *string);
/**
* encode a string meta request into a MetaField_t
@ -423,6 +425,7 @@ int db_add(MP3FILE *pmp3) {
int retval;
db_writelock();
db_utf8_validate(pmp3);
retval=db_current->dbs_add(pmp3);
db_revision_no++;
db_unlock();
@ -685,3 +688,86 @@ int db_dmap_add_container(char *where, char *tag, int size) {
return 8;
}
/**
* check the strings in a MP3FILE to ensure they are
* valid utf-8. If they are not, the string will be corrected
*
* \param pmp3 MP3FILE to verify for valid utf-8
*/
void db_utf8_validate(MP3FILE *pmp3) {
int is_invalid=0;
/* we won't bother with path and fname... those were culled with the
* scan. Even if they are invalid (_could_ they be?), then we
* won't be able to open the file if we change them. Likewise,
* we won't do type or description, as these can't be bad, or they
* wouldn't have been scanned */
is_invalid = db_utf8_validate_string(pmp3->title);
is_invalid |= db_utf8_validate_string(pmp3->artist);
is_invalid |= db_utf8_validate_string(pmp3->album);
is_invalid |= db_utf8_validate_string(pmp3->genre);
is_invalid |= db_utf8_validate_string(pmp3->comment);
is_invalid |= db_utf8_validate_string(pmp3->composer);
is_invalid |= db_utf8_validate_string(pmp3->orchestra);
is_invalid |= db_utf8_validate_string(pmp3->conductor);
is_invalid |= db_utf8_validate_string(pmp3->grouping);
is_invalid |= db_utf8_validate_string(pmp3->url);
if(is_invalid) {
DPRINTF(E_LOG,L_SCAN,"Invalid UTF-8 in %s\n",pmp3->path);
}
}
/**
* check a string to verify it is valid utf-8. The passed
* string will be in-place modified to be utf-8 clean by substituting
* the character '?' for invalid utf-8 codepoints
*
* \param string string to clean
*/
int db_utf8_validate_string(char *string) {
char *current = string;
int run,r_current;
int retval=0;
if(!string)
return 0;
while(*current) {
if(!((*current) & 0x80)) {
current++;
} else {
run=0;
/* it's a lead utf-8 character */
if((*current & 0xE0) == 0xC0) run=1;
if((*current & 0xF0) == 0xE0) run=2;
if((*current & 0xF8) == 0xF0) run=3;
if(!run) {
/* high bit set, but invalid */
*current++='?';
retval=1;
} else {
r_current=0;
while((r_current != run) && (*(current + r_current + 1)) &&
((*(current + r_current + 1) & 0xC0) == 0x80)) {
r_current++;
}
if(r_current != run) {
*current++ = '?';
retval=1;
} else {
current += (1 + run);
}
}
}
}
return retval;
}

View File

@ -53,6 +53,7 @@ void db_sqlite_build_mp3file(char **valarray, MP3FILE *pmp3);
int db_sqlite_exec(int fatal, char *fmt, ...);
int db_sqlite_get_table(int fatal, char ***resarray, int *rows, int *cols, char *fmt, ...);
int db_sqlite_free_table(char **resarray);
int db_sqlite_get_int(int loglevel, char *fmt, ...);
int db_sqlite_update(MP3FILE *pmp3);
int db_sqlite_update_version(int from_version);
int db_sqlite_get_version(void);
@ -156,6 +157,45 @@ int db_sqlite_free_table(char **resarray) {
return 0;
}
/**
* db_sqlite_get_int
*/
int db_sqlite_get_int(int loglevel, char *fmt, ...) {
int rows, cols;
char **resarray;
va_list ap;
char *query;
int err;
char *perr;
int retval;
va_start(ap,fmt);
query=sqlite_vmprintf(fmt,ap);
va_end(ap);
DPRINTF(E_DBG,L_DB,"Executing: %s\n",query);
db_sqlite_lock();
err=sqlite_get_table(db_sqlite_songs,query,&resarray,&rows,&cols,&perr);
if(err == SQLITE_OK)
sqlite_freemem(query);
db_sqlite_unlock();
if(err != SQLITE_OK) {
DPRINTF(loglevel == E_FATAL ? E_LOG : loglevel,L_DB,"Query: %s\n",query);
DPRINTF(loglevel,L_DB,"Error: %s\n",perr);
db_sqlite_lock();
sqlite_freemem(query);
db_sqlite_unlock();
return 0;
}
retval=atoi(resarray[cols]);
sqlite_free_table(resarray);
return retval;
}
/**
* open sqlite database
@ -184,8 +224,13 @@ int db_sqlite_open(char *parameters) {
*/
int db_sqlite_init(int reload) {
int items;
int rescan;
db_sqlite_update_version(db_sqlite_get_version());
rescan=db_sqlite_get_int(E_DBG,"SELECT value FROM config WHERE term='rescan'");
if(rescan)
reload=1;
items=db_sqlite_get_count(countSongs);
@ -237,9 +282,11 @@ int db_sqlite_end_scan(void) {
if(db_sqlite_reload) {
db_sqlite_exec(E_FATAL,"COMMIT TRANSACTION");
db_sqlite_exec(E_FATAL,"CREATE INDEX idx_path ON songs(path)");
db_sqlite_exec(E_DBG,"DELETE FROM config WHERE term='rescan'");
db_sqlite_exec(E_FATAL,"PRAGMA synchronous=NORMAL");
} else {
db_sqlite_exec(E_FATAL,"DELETE FROM songs WHERE id NOT IN (SELECT id FROM updated)");
db_sqlite_exec(E_FATAL,"UPDATE songs SET force_update=0");
db_sqlite_exec(E_FATAL,"DROP TABLE updated");
}
@ -1205,6 +1252,7 @@ int db_sqlite_get_version(void) {
char *db_sqlite_upgrade_scripts[] = {
/* version 0 -> version 1 -- initial update */
"CREATE TABLE songs (\n"
" id INTEGER PRIMARY KEY NOT NULL,\n"
" path VARCHAR(4096) UNIQUE NOT NULL,\n"
@ -1262,7 +1310,13 @@ char *db_sqlite_upgrade_scripts[] = {
" songid INTEGER NOT NULL\n"
");\n"
"INSERT INTO config VALUES ('version','','1');\n"
"INSERT INTO playlists VALUES (1,'Library',1,0,'1');\n", /* Version 0 -> Version 1 */
"INSERT INTO playlists VALUES (1,'Library',1,0,'1');\n",
/* version 1 -> version 2 */
/* force rescan for invalid utf-8 data */
"REPLACE INTO config VALUES('rescan',NULL,1);\n"
"UPDATE config SET value=2 WHERE term='version';\n",
NULL /* No more versions! */
};
@ -1272,7 +1326,6 @@ char *db_sqlite_upgrade_scripts[] = {
* \param from_version the current version of the database
*/
int db_sqlite_update_version(int from_version) {
while(db_sqlite_upgrade_scripts[from_version]) {
DPRINTF(E_LOG,L_DB,"Upgrading database from version %d to version %d\n",from_version,
from_version+1);