clean up invalid utf-8 data. this forces a database update and full rescan (mostly as a test of the db update stuff)
This commit is contained in:
parent
24667c8d01
commit
ff8c69d915
|
@ -23,7 +23,7 @@
|
||||||
# include "config.h"
|
# include "config.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define _XOPEN_SOURCE 600 /** I forgot why I needed this? */
|
#define _XOPEN_SOURCE 500 /** unix98? pthread_once_t, etc */
|
||||||
|
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
@ -236,6 +236,8 @@ static void db_writelock(void);
|
||||||
static void db_readlock(void);
|
static void db_readlock(void);
|
||||||
static int db_unlock(void);
|
static int db_unlock(void);
|
||||||
static void db_init_once(void);
|
static void db_init_once(void);
|
||||||
|
static void db_utf8_validate(MP3FILE *pmp3);
|
||||||
|
static int db_utf8_validate_string(char *string);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* encode a string meta request into a MetaField_t
|
* encode a string meta request into a MetaField_t
|
||||||
|
@ -423,6 +425,7 @@ int db_add(MP3FILE *pmp3) {
|
||||||
int retval;
|
int retval;
|
||||||
|
|
||||||
db_writelock();
|
db_writelock();
|
||||||
|
db_utf8_validate(pmp3);
|
||||||
retval=db_current->dbs_add(pmp3);
|
retval=db_current->dbs_add(pmp3);
|
||||||
db_revision_no++;
|
db_revision_no++;
|
||||||
db_unlock();
|
db_unlock();
|
||||||
|
@ -685,3 +688,86 @@ int db_dmap_add_container(char *where, char *tag, int size) {
|
||||||
return 8;
|
return 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* check the strings in a MP3FILE to ensure they are
|
||||||
|
* valid utf-8. If they are not, the string will be corrected
|
||||||
|
*
|
||||||
|
* \param pmp3 MP3FILE to verify for valid utf-8
|
||||||
|
*/
|
||||||
|
void db_utf8_validate(MP3FILE *pmp3) {
|
||||||
|
int is_invalid=0;
|
||||||
|
|
||||||
|
/* we won't bother with path and fname... those were culled with the
|
||||||
|
* scan. Even if they are invalid (_could_ they be?), then we
|
||||||
|
* won't be able to open the file if we change them. Likewise,
|
||||||
|
* we won't do type or description, as these can't be bad, or they
|
||||||
|
* wouldn't have been scanned */
|
||||||
|
|
||||||
|
is_invalid = db_utf8_validate_string(pmp3->title);
|
||||||
|
is_invalid |= db_utf8_validate_string(pmp3->artist);
|
||||||
|
is_invalid |= db_utf8_validate_string(pmp3->album);
|
||||||
|
is_invalid |= db_utf8_validate_string(pmp3->genre);
|
||||||
|
is_invalid |= db_utf8_validate_string(pmp3->comment);
|
||||||
|
is_invalid |= db_utf8_validate_string(pmp3->composer);
|
||||||
|
is_invalid |= db_utf8_validate_string(pmp3->orchestra);
|
||||||
|
is_invalid |= db_utf8_validate_string(pmp3->conductor);
|
||||||
|
is_invalid |= db_utf8_validate_string(pmp3->grouping);
|
||||||
|
is_invalid |= db_utf8_validate_string(pmp3->url);
|
||||||
|
|
||||||
|
if(is_invalid) {
|
||||||
|
DPRINTF(E_LOG,L_SCAN,"Invalid UTF-8 in %s\n",pmp3->path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* check a string to verify it is valid utf-8. The passed
|
||||||
|
* string will be in-place modified to be utf-8 clean by substituting
|
||||||
|
* the character '?' for invalid utf-8 codepoints
|
||||||
|
*
|
||||||
|
* \param string string to clean
|
||||||
|
*/
|
||||||
|
int db_utf8_validate_string(char *string) {
|
||||||
|
char *current = string;
|
||||||
|
int run,r_current;
|
||||||
|
int retval=0;
|
||||||
|
|
||||||
|
if(!string)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
while(*current) {
|
||||||
|
if(!((*current) & 0x80)) {
|
||||||
|
current++;
|
||||||
|
} else {
|
||||||
|
run=0;
|
||||||
|
|
||||||
|
/* it's a lead utf-8 character */
|
||||||
|
if((*current & 0xE0) == 0xC0) run=1;
|
||||||
|
if((*current & 0xF0) == 0xE0) run=2;
|
||||||
|
if((*current & 0xF8) == 0xF0) run=3;
|
||||||
|
|
||||||
|
if(!run) {
|
||||||
|
/* high bit set, but invalid */
|
||||||
|
*current++='?';
|
||||||
|
retval=1;
|
||||||
|
} else {
|
||||||
|
r_current=0;
|
||||||
|
while((r_current != run) && (*(current + r_current + 1)) &&
|
||||||
|
((*(current + r_current + 1) & 0xC0) == 0x80)) {
|
||||||
|
r_current++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(r_current != run) {
|
||||||
|
*current++ = '?';
|
||||||
|
retval=1;
|
||||||
|
} else {
|
||||||
|
current += (1 + run);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -53,6 +53,7 @@ void db_sqlite_build_mp3file(char **valarray, MP3FILE *pmp3);
|
||||||
int db_sqlite_exec(int fatal, char *fmt, ...);
|
int db_sqlite_exec(int fatal, char *fmt, ...);
|
||||||
int db_sqlite_get_table(int fatal, char ***resarray, int *rows, int *cols, char *fmt, ...);
|
int db_sqlite_get_table(int fatal, char ***resarray, int *rows, int *cols, char *fmt, ...);
|
||||||
int db_sqlite_free_table(char **resarray);
|
int db_sqlite_free_table(char **resarray);
|
||||||
|
int db_sqlite_get_int(int loglevel, char *fmt, ...);
|
||||||
int db_sqlite_update(MP3FILE *pmp3);
|
int db_sqlite_update(MP3FILE *pmp3);
|
||||||
int db_sqlite_update_version(int from_version);
|
int db_sqlite_update_version(int from_version);
|
||||||
int db_sqlite_get_version(void);
|
int db_sqlite_get_version(void);
|
||||||
|
@ -156,6 +157,45 @@ int db_sqlite_free_table(char **resarray) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* db_sqlite_get_int
|
||||||
|
*/
|
||||||
|
int db_sqlite_get_int(int loglevel, char *fmt, ...) {
|
||||||
|
int rows, cols;
|
||||||
|
char **resarray;
|
||||||
|
va_list ap;
|
||||||
|
char *query;
|
||||||
|
int err;
|
||||||
|
char *perr;
|
||||||
|
int retval;
|
||||||
|
|
||||||
|
va_start(ap,fmt);
|
||||||
|
query=sqlite_vmprintf(fmt,ap);
|
||||||
|
va_end(ap);
|
||||||
|
|
||||||
|
DPRINTF(E_DBG,L_DB,"Executing: %s\n",query);
|
||||||
|
|
||||||
|
db_sqlite_lock();
|
||||||
|
err=sqlite_get_table(db_sqlite_songs,query,&resarray,&rows,&cols,&perr);
|
||||||
|
if(err == SQLITE_OK)
|
||||||
|
sqlite_freemem(query);
|
||||||
|
db_sqlite_unlock();
|
||||||
|
|
||||||
|
if(err != SQLITE_OK) {
|
||||||
|
DPRINTF(loglevel == E_FATAL ? E_LOG : loglevel,L_DB,"Query: %s\n",query);
|
||||||
|
DPRINTF(loglevel,L_DB,"Error: %s\n",perr);
|
||||||
|
db_sqlite_lock();
|
||||||
|
sqlite_freemem(query);
|
||||||
|
db_sqlite_unlock();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
retval=atoi(resarray[cols]);
|
||||||
|
|
||||||
|
sqlite_free_table(resarray);
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* open sqlite database
|
* open sqlite database
|
||||||
|
@ -184,8 +224,13 @@ int db_sqlite_open(char *parameters) {
|
||||||
*/
|
*/
|
||||||
int db_sqlite_init(int reload) {
|
int db_sqlite_init(int reload) {
|
||||||
int items;
|
int items;
|
||||||
|
int rescan;
|
||||||
|
|
||||||
db_sqlite_update_version(db_sqlite_get_version());
|
db_sqlite_update_version(db_sqlite_get_version());
|
||||||
|
rescan=db_sqlite_get_int(E_DBG,"SELECT value FROM config WHERE term='rescan'");
|
||||||
|
|
||||||
|
if(rescan)
|
||||||
|
reload=1;
|
||||||
|
|
||||||
items=db_sqlite_get_count(countSongs);
|
items=db_sqlite_get_count(countSongs);
|
||||||
|
|
||||||
|
@ -237,9 +282,11 @@ int db_sqlite_end_scan(void) {
|
||||||
if(db_sqlite_reload) {
|
if(db_sqlite_reload) {
|
||||||
db_sqlite_exec(E_FATAL,"COMMIT TRANSACTION");
|
db_sqlite_exec(E_FATAL,"COMMIT TRANSACTION");
|
||||||
db_sqlite_exec(E_FATAL,"CREATE INDEX idx_path ON songs(path)");
|
db_sqlite_exec(E_FATAL,"CREATE INDEX idx_path ON songs(path)");
|
||||||
|
db_sqlite_exec(E_DBG,"DELETE FROM config WHERE term='rescan'");
|
||||||
db_sqlite_exec(E_FATAL,"PRAGMA synchronous=NORMAL");
|
db_sqlite_exec(E_FATAL,"PRAGMA synchronous=NORMAL");
|
||||||
} else {
|
} else {
|
||||||
db_sqlite_exec(E_FATAL,"DELETE FROM songs WHERE id NOT IN (SELECT id FROM updated)");
|
db_sqlite_exec(E_FATAL,"DELETE FROM songs WHERE id NOT IN (SELECT id FROM updated)");
|
||||||
|
db_sqlite_exec(E_FATAL,"UPDATE songs SET force_update=0");
|
||||||
db_sqlite_exec(E_FATAL,"DROP TABLE updated");
|
db_sqlite_exec(E_FATAL,"DROP TABLE updated");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1205,6 +1252,7 @@ int db_sqlite_get_version(void) {
|
||||||
|
|
||||||
|
|
||||||
char *db_sqlite_upgrade_scripts[] = {
|
char *db_sqlite_upgrade_scripts[] = {
|
||||||
|
/* version 0 -> version 1 -- initial update */
|
||||||
"CREATE TABLE songs (\n"
|
"CREATE TABLE songs (\n"
|
||||||
" id INTEGER PRIMARY KEY NOT NULL,\n"
|
" id INTEGER PRIMARY KEY NOT NULL,\n"
|
||||||
" path VARCHAR(4096) UNIQUE NOT NULL,\n"
|
" path VARCHAR(4096) UNIQUE NOT NULL,\n"
|
||||||
|
@ -1262,7 +1310,13 @@ char *db_sqlite_upgrade_scripts[] = {
|
||||||
" songid INTEGER NOT NULL\n"
|
" songid INTEGER NOT NULL\n"
|
||||||
");\n"
|
");\n"
|
||||||
"INSERT INTO config VALUES ('version','','1');\n"
|
"INSERT INTO config VALUES ('version','','1');\n"
|
||||||
"INSERT INTO playlists VALUES (1,'Library',1,0,'1');\n", /* Version 0 -> Version 1 */
|
"INSERT INTO playlists VALUES (1,'Library',1,0,'1');\n",
|
||||||
|
|
||||||
|
/* version 1 -> version 2 */
|
||||||
|
/* force rescan for invalid utf-8 data */
|
||||||
|
"REPLACE INTO config VALUES('rescan',NULL,1);\n"
|
||||||
|
"UPDATE config SET value=2 WHERE term='version';\n",
|
||||||
|
|
||||||
NULL /* No more versions! */
|
NULL /* No more versions! */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1272,7 +1326,6 @@ char *db_sqlite_upgrade_scripts[] = {
|
||||||
* \param from_version the current version of the database
|
* \param from_version the current version of the database
|
||||||
*/
|
*/
|
||||||
int db_sqlite_update_version(int from_version) {
|
int db_sqlite_update_version(int from_version) {
|
||||||
|
|
||||||
while(db_sqlite_upgrade_scripts[from_version]) {
|
while(db_sqlite_upgrade_scripts[from_version]) {
|
||||||
DPRINTF(E_LOG,L_DB,"Upgrading database from version %d to version %d\n",from_version,
|
DPRINTF(E_LOG,L_DB,"Upgrading database from version %d to version %d\n",from_version,
|
||||||
from_version+1);
|
from_version+1);
|
||||||
|
|
Loading…
Reference in New Issue