update lexer, first validating parser

This commit is contained in:
Ron Pedde 2005-10-17 04:57:06 +00:00
parent 4740aeded0
commit 7e5535d9bd
1 changed files with 307 additions and 51 deletions

View File

@ -71,6 +71,11 @@ typedef struct tag_token {
* 0x4000 -
* 0x2000 - data is string
* 0x1000 - data is int
*
* 0x0800 -
* 0x0400 -
* 0x0200 -
* 0x0100 -
*/
#define T_STRING 0x2001
@ -80,15 +85,39 @@ typedef struct tag_token {
#define T_OPENPAREN 0x0005
#define T_CLOSEPAREN 0x0006
#define T_QUOTE 0x0007
#define T_LESS 0x0008
#define T_LESSEQUAL 0x0009
#define T_GREATER 0x000A
#define T_GREATEREQUAL 0x000B
#define T_EQUAL 0x000C
#define T_LESS 0x0007
#define T_LESSEQUAL 0x0008
#define T_GREATER 0x0009
#define T_GREATEREQUAL 0x000a
#define T_EQUAL 0x000b
#define T_OR 0x000c
#define T_AND 0x000d
#define T_QUOTE 0x000e
#define T_NUMBER 0x000f
#define T_LAST 0x0010
#define T_EOF 0x000D
#define T_BOF 0x000E
#define T_EOF 0x00fd
#define T_BOF 0x00fe
#define T_ERROR 0x00ff
char *sp_token_descr[] = {
"unknown",
"literal string",
"integer field",
"string field",
"date field",
"(",
")",
"<",
"<=",
">",
">=",
"=",
"or",
"and",
"quote",
"number"
};
typedef struct tag_fieldlookup {
int type;
@ -124,6 +153,12 @@ FIELDLOOKUP sp_fields[] = {
{ T_INT_FIELD, "datakind" },
{ T_INT_FIELD, "itemkind" },
{ T_STRING_FIELD, "description" },
/* end of db fields */
{ T_OR, "or" },
{ T_AND, "and" },
/* end */
{ 0, NULL },
};
@ -134,6 +169,16 @@ typedef struct tag_parsetree {
SP_TOKEN next_token;
} PARSESTRUCT, *PARSETREE;
/* Forwards */
int sp_parse_phrase(PARSETREE tree);
int sp_parse_aexpr(PARSETREE tree);
int sp_parse_oexpr(PARSETREE tree);
int sp_parse_expr(PARSETREE tree);
int sp_parse_criterion(PARSETREE tree);
int sp_parse_string_criterion(PARSETREE tree);
int sp_parse_int_criterion(PARSETREE tree);
int sp_parse_date_criterion(PARSETREE tree);
/**
* scan the input, returning the next available token.
*
@ -141,20 +186,25 @@ typedef struct tag_parsetree {
* @returns next token (token, not the value)
*/
int sp_scan(PARSETREE tree) {
int is_string=0;
char *terminator=NULL;
char *tail;
int advance=0;
FIELDLOOKUP *pfield=sp_fields;
int len;
int found;
if(tree->token.token_id & 0x2000) {
if(tree->token.data.cvalue)
free(tree->token.data.cvalue);
if(tree->token.data.cvalue)
free(tree->token.data.cvalue);
}
tree->token=tree->next_token;
if(tree->token.token_id == T_EOF)
if(tree->token.token_id == T_EOF) {
DPRINTF(E_SPAM,L_PARSE,"Returning token T_EOF\n");
return T_EOF;
}
/* keep advancing until we have a token */
while(*(tree->current) && strchr(" \t\n\r",*(tree->current)))
@ -162,6 +212,7 @@ int sp_scan(PARSETREE tree) {
if(!*(tree->current)) {
tree->next_token.token_id = T_EOF;
DPRINTF(E_SPAM,L_PARSE,"Returning token %04x\n",tree->token.token_id);
return tree->token.token_id;
}
@ -170,10 +221,25 @@ int sp_scan(PARSETREE tree) {
/* check singletons */
switch(*(tree->current)) {
case '|':
if((*(tree->current + 1) == '|')) {
advance = 2;
tree->next_token.token_id = T_OR;
}
break;
case '&':
if((*(tree->current + 1) == '&')) {
advance = 2;
tree->next_token.token_id = T_AND;
}
break;
case '=':
advance=1;
tree->next_token.token_id = T_EQUAL;
break;
case '<':
if((*(tree->current + 1)) == '=') {
advance = 2;
@ -183,6 +249,7 @@ int sp_scan(PARSETREE tree) {
tree->next_token.token_id = T_LESS;
}
break;
case '>':
if((*(tree->current + 1)) == '=') {
advance = 2;
@ -209,45 +276,69 @@ int sp_scan(PARSETREE tree) {
break;
}
if(advance) {
if(advance) { /* singleton */
tree->current += advance;
} else { /* either a keyword token or a quoted string */
DPRINTF(E_SPAM,L_PARSE,"keyword or string!\n");
/* walk to a terminator */
tail = tree->current;
while((*tail) && (!strchr(" \t\n\r\"<>=()",*tail))) {
terminator = " \t\n\r\"<>=()|&";
if(tree->token.token_id == T_QUOTE) {
is_string=1;
terminator="\"";
}
while((*tail) && (!strchr(terminator,*tail))) {
tail++;
}
/* let's see what we have... */
pfield=sp_fields;
found=0;
len = tail - tree->current;
DPRINTF(E_SPAM,L_PARSE,"Len is %d\n",len);
while(pfield->name) {
if(strlen(pfield->name) == len) {
if(strncasecmp(pfield->name,tree->current,len) == 0)
break;
if(!is_string) {
/* find it in the token list */
pfield=sp_fields;
DPRINTF(E_SPAM,L_PARSE,"Len is %d\n",len);
while(pfield->name) {
if(strlen(pfield->name) == len) {
if(strncasecmp(pfield->name,tree->current,len) == 0) {
found=1;
break;
}
}
pfield++;
}
pfield++;
}
if(pfield->name) {
if(found) {
tree->next_token.token_id = pfield->type;
} else {
tree->next_token.token_id = T_STRING;
}
tree->next_token.data.cvalue = malloc(len + 1);
if(!tree->next_token.data.cvalue) {
/* fail on malloc error */
DPRINTF(E_FATAL,L_PARSE,"Malloc error.\n");
}
strncpy(tree->next_token.data.cvalue,tree->current,len);
tree->next_token.data.cvalue[len] = '\x0';
if(tree->next_token.token_id & 0x2000) {
tree->next_token.data.cvalue = malloc(len + 1);
if(!tree->next_token.data.cvalue) {
/* fail on malloc error */
DPRINTF(E_FATAL,L_PARSE,"Malloc error.\n");
}
strncpy(tree->next_token.data.cvalue,tree->current,len);
tree->next_token.data.cvalue[len] = '\x0';
}
/* check for numberic? */
tree->current=tail;
}
DPRINTF(E_SPAM,L_PARSE,"Returning token %04x\n",tree->token.token_id);
if(tree->token.token_id & 0x2000)
DPRINTF(E_SPAM,L_PARSE,"String val: %s\n",tree->token.data.cvalue);
if(tree->token.token_id & 0x1000)
DPRINTF(E_SPAM,L_PARSE,"Int val: %d\n",tree->token.data.ivalue);
return tree->token.token_id;
}
@ -271,14 +362,14 @@ PARSETREE sp_init(void) {
/**
* parse a term or phrase into a tree.
*
* I'm not a language expert, so I'd suggestions on the
* I'm not a language expert, so I'd welcome suggestions on the
* following production rules:
*
* phrase -> aexpr T_EOF
* aexpr -> oexpr { T_AND oexpr }
* oexpr -> expr { T_OR expr }
* expr -> T_OPENPAREN aexpr T_CLOSEPAREN | criteria
* criteria -> field op value
* expr -> T_OPENPAREN aexpr T_CLOSEPAREN | criterion
* criterion -> field op value
*
* field -> T_STRINGFIELD, T_INTFIELD, T_DATEFIELD
* op -> T_EQUAL, T_GREATEREQUAL, etc
@ -296,24 +387,13 @@ int sp_parse(PARSETREE tree, char *term) {
sp_scan(tree);
sp_scan(tree);
while(sp_scan(tree)) {
DPRINTF(E_SPAM,L_PARSE,"Got token %04X\n",tree->token.token_id);
if(tree->token.token_id & 0x2000) {
DPRINTF(E_SPAM,L_PARSE," Str val: %s\n",tree->token.data.cvalue);
} else if(tree->token.token_id & 0x1000) {
DPRINTF(E_SPAM,L_PARSE," Int val: %d (0x%04X)\n",
tree->token.data.ivalue,tree->token.data.ivalue);
}
if((tree->token.token_id == T_EOF))
return 1; /* valid tree! */
/* otherwise, keep scanning until done or error */
if(sp_parse_phrase(tree)) {
DPRINTF(E_SPAM,L_PARSE,"Parsed successfully\n");
} else {
DPRINTF(E_SPAM,L_PARSE,"Parsing error\n");
}
return 0;
return 1;
}
@ -331,7 +411,7 @@ int sp_parse_phrase(PARSETREE tree) {
DPRINTF(E_SPAM,L_PARSE,"Entering sp_parse_phrase\n");
if(sp_parse_aexpr(tree) && (tree->token->token_id == T_EOF))
if(sp_parse_aexpr(tree) && (tree->token.token_id == T_EOF))
result=1;
DPRINTF(E_SPAM,L_PARSE,"Exiting sp_parse_phrase: %s\n",result ?
@ -355,7 +435,7 @@ int sp_parse_aexpr(PARSETREE tree) {
while(1) {
result = sp_parse_oexpr(tree);
if((!result) || (tree->token->token_id != T_AND)) break;
if((!result) || (tree->token.token_id != T_AND)) break;
}
DPRINTF(E_SPAM,L_PARSE,"Exiting sp_parse_aexpr: %s\n",result ?
@ -379,7 +459,7 @@ int sp_parse_oexpr(PARSETREE tree) {
while(1) {
result = sp_parse_expr(tree);
if((!result) || (tree->token->token_id != T_OR)) break;
if((!result) || (tree->token.token_id != T_OR)) break;
}
DPRINTF(E_SPAM,L_PARSE,"Exiting sp_parse_oexpr: %s\n",result ?
@ -397,8 +477,184 @@ int sp_parse_oexpr(PARSETREE tree) {
* @returns 1 if successful, 0 otherwise
*/
int sp_parse_expr(PARSETREE tree) {
int result=0;
DPRINTF(E_SPAM,L_PARSE,"Entering sp_parse_expr\n");
if(tree->token.token_id == T_OPENPAREN) {
sp_scan(tree);
result = sp_parse_aexpr(tree);
if((result) && (tree->token.token_id == T_OPENPAREN)) {
sp_scan(tree);
} else {
/* Error: expecting close paren */
result=0;
}
} else {
result = sp_parse_criterion(tree);
}
DPRINTF(E_SPAM,L_PARSE,"Exiting sp_parse_expr: %s\n",result ?
"success" : "fail");
return result;
}
/**
* parse for a criterion
*
* criterion -> field op value
*
* @param tree tree we are building
* @returns 1 if successful, 0 otherwise
*/
int sp_parse_criterion(PARSETREE tree) {
int result=0;
DPRINTF(E_SPAM,L_PARSE,"Entering sp_parse_criterion\n");
switch(tree->token.token_id) {
case T_STRING_FIELD:
result = sp_parse_string_criterion(tree);
break;
case T_INT_FIELD:
result = sp_parse_int_criterion(tree);
break;
case T_DATE_FIELD:
result = sp_parse_date_criterion(tree);
break;
default:
/* Error: expecting field */
result = 0;
break;
}
DPRINTF(E_SPAM,L_PARSE,"Exiting sp_parse_criterion: %s\n",result ?
"success" : "fail");
return result;
}
/**
* parse for a string criterion
*
* @param tree tree we are building
* @returns 1 if successful, 0 otherwise
*/
int sp_parse_string_criterion(PARSETREE tree) {
int result=0;
DPRINTF(E_SPAM,L_PARSE,"Entering sp_parse_string_criterion\n");
sp_scan(tree); /* scan past the string field we know is there */
switch(tree->token.token_id) {
case T_EQUAL:
result = 1;
break;
default:
/* Error: expecting legal string comparison operator */
break;
}
if(result) {
sp_scan(tree);
/* should be sitting on quote literal string quote */
if(tree->token.token_id == T_QUOTE) {
sp_scan(tree);
if(tree->token.token_id == T_STRING) {
sp_scan(tree);
if(tree->token.token_id == T_QUOTE) {
result=1;
sp_scan(tree);
} else {
DPRINTF(E_SPAM,L_PARSE,"Expecting closign quote\n");
}
} else {
DPRINTF(E_SPAM,L_PARSE,"Expecting literal string\n");
}
} else {
DPRINTF(E_SPAM,L_PARSE,"Expecting opening quote\n");
}
}
DPRINTF(E_SPAM,L_PARSE,"Exiting sp_parse_string_criterion: %s\n",result ?
"success" : "fail");
return result;
}
/**
* parse for an int criterion
*
* @param tree tree we are building
* @returns 1 if successful, 0 otherwise
*/
int sp_parse_int_criterion(PARSETREE tree) {
int result=0;
DPRINTF(E_SPAM,L_PARSE,"Entering sp_parse_int_criterion\n");
sp_scan(tree); /* scan past the int field we know is there */
switch(tree->token.token_id) {
case T_LESSEQUAL:
case T_LESS:
case T_GREATEREQUAL:
case T_GREATER:
case T_EQUAL:
result = 1;
break;
default:
/* Error: expecting legal string comparison operator */
DPRINTF(E_LOG,L_PARSE,"Expecting string comparison op, got %04X\n",
tree->token.token_id);
break;
}
if(result) {
sp_scan(tree);
/* should be sitting on a literal string */
if(tree->token.token_id == T_NUMBER) {
result = 1;
sp_scan(tree);
} else {
/* Error: Expecting literal string */
DPRINTF(E_LOG,L_PARSE,"Expecting string literal, got %04X\n",
tree->token.token_id);
result = 0;
}
}
DPRINTF(E_SPAM,L_PARSE,"Exiting sp_parse_int_criterion: %s\n",result ?
"success" : "fail");
return result;
}
/**
* parse for a date criterion
*
* @param tree tree we are building
* @returns 1 if successful, 0 otherwise
*/
int sp_parse_date_criterion(PARSETREE tree) {
int result=0;
DPRINTF(E_SPAM,L_PARSE,"Entering sp_parse_date_criterion\n");
DPRINTF(E_SPAM,L_PARSE,"Exiting sp_parse_date_criterion: %s\n",result ?
"success" : "fail");
return result;
}
/**
* dispose of an initialized tree
*