mirror of
https://github.com/minio/minio.git
synced 2025-11-25 20:16:10 -05:00
S3 Select API Support for CSV (#6127)
Add support for trivial where clause cases
This commit is contained in:
950
vendor/github.com/xwb1989/sqlparser/token.go
generated
vendored
Normal file
950
vendor/github.com/xwb1989/sqlparser/token.go
generated
vendored
Normal file
@@ -0,0 +1,950 @@
|
||||
/*
|
||||
Copyright 2017 Google Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sqlparser
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/xwb1989/sqlparser/dependency/bytes2"
|
||||
"github.com/xwb1989/sqlparser/dependency/sqltypes"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultBufSize = 4096
|
||||
eofChar = 0x100
|
||||
)
|
||||
|
||||
// Tokenizer is the struct used to generate SQL
|
||||
// tokens for the parser.
|
||||
type Tokenizer struct {
|
||||
InStream io.Reader
|
||||
AllowComments bool
|
||||
ForceEOF bool
|
||||
lastChar uint16
|
||||
Position int
|
||||
lastToken []byte
|
||||
LastError error
|
||||
posVarIndex int
|
||||
ParseTree Statement
|
||||
partialDDL *DDL
|
||||
nesting int
|
||||
multi bool
|
||||
specialComment *Tokenizer
|
||||
|
||||
buf []byte
|
||||
bufPos int
|
||||
bufSize int
|
||||
}
|
||||
|
||||
// NewStringTokenizer creates a new Tokenizer for the
|
||||
// sql string.
|
||||
func NewStringTokenizer(sql string) *Tokenizer {
|
||||
buf := []byte(sql)
|
||||
return &Tokenizer{
|
||||
buf: buf,
|
||||
bufSize: len(buf),
|
||||
}
|
||||
}
|
||||
|
||||
// NewTokenizer creates a new Tokenizer reading a sql
|
||||
// string from the io.Reader.
|
||||
func NewTokenizer(r io.Reader) *Tokenizer {
|
||||
return &Tokenizer{
|
||||
InStream: r,
|
||||
buf: make([]byte, defaultBufSize),
|
||||
}
|
||||
}
|
||||
|
||||
// keywords is a map of mysql keywords that fall into two categories:
|
||||
// 1) keywords considered reserved by MySQL
|
||||
// 2) keywords for us to handle specially in sql.y
|
||||
//
|
||||
// Those marked as UNUSED are likely reserved keywords. We add them here so that
|
||||
// when rewriting queries we can properly backtick quote them so they don't cause issues
|
||||
//
|
||||
// NOTE: If you add new keywords, add them also to the reserved_keywords or
|
||||
// non_reserved_keywords grammar in sql.y -- this will allow the keyword to be used
|
||||
// in identifiers. See the docs for each grammar to determine which one to put it into.
|
||||
var keywords = map[string]int{
|
||||
"accessible": UNUSED,
|
||||
"add": ADD,
|
||||
"against": AGAINST,
|
||||
"all": ALL,
|
||||
"alter": ALTER,
|
||||
"analyze": ANALYZE,
|
||||
"and": AND,
|
||||
"as": AS,
|
||||
"asc": ASC,
|
||||
"asensitive": UNUSED,
|
||||
"auto_increment": AUTO_INCREMENT,
|
||||
"before": UNUSED,
|
||||
"begin": BEGIN,
|
||||
"between": BETWEEN,
|
||||
"bigint": BIGINT,
|
||||
"binary": BINARY,
|
||||
"_binary": UNDERSCORE_BINARY,
|
||||
"bit": BIT,
|
||||
"blob": BLOB,
|
||||
"bool": BOOL,
|
||||
"boolean": BOOLEAN,
|
||||
"both": UNUSED,
|
||||
"by": BY,
|
||||
"call": UNUSED,
|
||||
"cascade": UNUSED,
|
||||
"case": CASE,
|
||||
"cast": CAST,
|
||||
"change": UNUSED,
|
||||
"char": CHAR,
|
||||
"character": CHARACTER,
|
||||
"charset": CHARSET,
|
||||
"check": UNUSED,
|
||||
"collate": COLLATE,
|
||||
"column": COLUMN,
|
||||
"comment": COMMENT_KEYWORD,
|
||||
"committed": COMMITTED,
|
||||
"commit": COMMIT,
|
||||
"condition": UNUSED,
|
||||
"constraint": CONSTRAINT,
|
||||
"continue": UNUSED,
|
||||
"convert": CONVERT,
|
||||
"substr": SUBSTR,
|
||||
"substring": SUBSTRING,
|
||||
"create": CREATE,
|
||||
"cross": CROSS,
|
||||
"current_date": CURRENT_DATE,
|
||||
"current_time": CURRENT_TIME,
|
||||
"current_timestamp": CURRENT_TIMESTAMP,
|
||||
"current_user": UNUSED,
|
||||
"cursor": UNUSED,
|
||||
"database": DATABASE,
|
||||
"databases": DATABASES,
|
||||
"day_hour": UNUSED,
|
||||
"day_microsecond": UNUSED,
|
||||
"day_minute": UNUSED,
|
||||
"day_second": UNUSED,
|
||||
"date": DATE,
|
||||
"datetime": DATETIME,
|
||||
"dec": UNUSED,
|
||||
"decimal": DECIMAL,
|
||||
"declare": UNUSED,
|
||||
"default": DEFAULT,
|
||||
"delayed": UNUSED,
|
||||
"delete": DELETE,
|
||||
"desc": DESC,
|
||||
"describe": DESCRIBE,
|
||||
"deterministic": UNUSED,
|
||||
"distinct": DISTINCT,
|
||||
"distinctrow": UNUSED,
|
||||
"div": DIV,
|
||||
"double": DOUBLE,
|
||||
"drop": DROP,
|
||||
"duplicate": DUPLICATE,
|
||||
"each": UNUSED,
|
||||
"else": ELSE,
|
||||
"elseif": UNUSED,
|
||||
"enclosed": UNUSED,
|
||||
"end": END,
|
||||
"enum": ENUM,
|
||||
"escape": ESCAPE,
|
||||
"escaped": UNUSED,
|
||||
"exists": EXISTS,
|
||||
"exit": UNUSED,
|
||||
"explain": EXPLAIN,
|
||||
"expansion": EXPANSION,
|
||||
"extended": EXTENDED,
|
||||
"false": FALSE,
|
||||
"fetch": UNUSED,
|
||||
"float": FLOAT_TYPE,
|
||||
"float4": UNUSED,
|
||||
"float8": UNUSED,
|
||||
"for": FOR,
|
||||
"force": FORCE,
|
||||
"foreign": FOREIGN,
|
||||
"from": FROM,
|
||||
"full": FULL,
|
||||
"fulltext": FULLTEXT,
|
||||
"generated": UNUSED,
|
||||
"geometry": GEOMETRY,
|
||||
"geometrycollection": GEOMETRYCOLLECTION,
|
||||
"get": UNUSED,
|
||||
"global": GLOBAL,
|
||||
"grant": UNUSED,
|
||||
"group": GROUP,
|
||||
"group_concat": GROUP_CONCAT,
|
||||
"having": HAVING,
|
||||
"high_priority": UNUSED,
|
||||
"hour_microsecond": UNUSED,
|
||||
"hour_minute": UNUSED,
|
||||
"hour_second": UNUSED,
|
||||
"if": IF,
|
||||
"ignore": IGNORE,
|
||||
"in": IN,
|
||||
"index": INDEX,
|
||||
"infile": UNUSED,
|
||||
"inout": UNUSED,
|
||||
"inner": INNER,
|
||||
"insensitive": UNUSED,
|
||||
"insert": INSERT,
|
||||
"int": INT,
|
||||
"int1": UNUSED,
|
||||
"int2": UNUSED,
|
||||
"int3": UNUSED,
|
||||
"int4": UNUSED,
|
||||
"int8": UNUSED,
|
||||
"integer": INTEGER,
|
||||
"interval": INTERVAL,
|
||||
"into": INTO,
|
||||
"io_after_gtids": UNUSED,
|
||||
"is": IS,
|
||||
"isolation": ISOLATION,
|
||||
"iterate": UNUSED,
|
||||
"join": JOIN,
|
||||
"json": JSON,
|
||||
"key": KEY,
|
||||
"keys": KEYS,
|
||||
"key_block_size": KEY_BLOCK_SIZE,
|
||||
"kill": UNUSED,
|
||||
"language": LANGUAGE,
|
||||
"last_insert_id": LAST_INSERT_ID,
|
||||
"leading": UNUSED,
|
||||
"leave": UNUSED,
|
||||
"left": LEFT,
|
||||
"less": LESS,
|
||||
"level": LEVEL,
|
||||
"like": LIKE,
|
||||
"limit": LIMIT,
|
||||
"linear": UNUSED,
|
||||
"lines": UNUSED,
|
||||
"linestring": LINESTRING,
|
||||
"load": UNUSED,
|
||||
"localtime": LOCALTIME,
|
||||
"localtimestamp": LOCALTIMESTAMP,
|
||||
"lock": LOCK,
|
||||
"long": UNUSED,
|
||||
"longblob": LONGBLOB,
|
||||
"longtext": LONGTEXT,
|
||||
"loop": UNUSED,
|
||||
"low_priority": UNUSED,
|
||||
"master_bind": UNUSED,
|
||||
"match": MATCH,
|
||||
"maxvalue": MAXVALUE,
|
||||
"mediumblob": MEDIUMBLOB,
|
||||
"mediumint": MEDIUMINT,
|
||||
"mediumtext": MEDIUMTEXT,
|
||||
"middleint": UNUSED,
|
||||
"minute_microsecond": UNUSED,
|
||||
"minute_second": UNUSED,
|
||||
"mod": MOD,
|
||||
"mode": MODE,
|
||||
"modifies": UNUSED,
|
||||
"multilinestring": MULTILINESTRING,
|
||||
"multipoint": MULTIPOINT,
|
||||
"multipolygon": MULTIPOLYGON,
|
||||
"names": NAMES,
|
||||
"natural": NATURAL,
|
||||
"nchar": NCHAR,
|
||||
"next": NEXT,
|
||||
"not": NOT,
|
||||
"no_write_to_binlog": UNUSED,
|
||||
"null": NULL,
|
||||
"numeric": NUMERIC,
|
||||
"offset": OFFSET,
|
||||
"on": ON,
|
||||
"only": ONLY,
|
||||
"optimize": OPTIMIZE,
|
||||
"optimizer_costs": UNUSED,
|
||||
"option": UNUSED,
|
||||
"optionally": UNUSED,
|
||||
"or": OR,
|
||||
"order": ORDER,
|
||||
"out": UNUSED,
|
||||
"outer": OUTER,
|
||||
"outfile": UNUSED,
|
||||
"partition": PARTITION,
|
||||
"point": POINT,
|
||||
"polygon": POLYGON,
|
||||
"precision": UNUSED,
|
||||
"primary": PRIMARY,
|
||||
"processlist": PROCESSLIST,
|
||||
"procedure": PROCEDURE,
|
||||
"query": QUERY,
|
||||
"range": UNUSED,
|
||||
"read": READ,
|
||||
"reads": UNUSED,
|
||||
"read_write": UNUSED,
|
||||
"real": REAL,
|
||||
"references": UNUSED,
|
||||
"regexp": REGEXP,
|
||||
"release": UNUSED,
|
||||
"rename": RENAME,
|
||||
"reorganize": REORGANIZE,
|
||||
"repair": REPAIR,
|
||||
"repeat": UNUSED,
|
||||
"repeatable": REPEATABLE,
|
||||
"replace": REPLACE,
|
||||
"require": UNUSED,
|
||||
"resignal": UNUSED,
|
||||
"restrict": UNUSED,
|
||||
"return": UNUSED,
|
||||
"revoke": UNUSED,
|
||||
"right": RIGHT,
|
||||
"rlike": REGEXP,
|
||||
"rollback": ROLLBACK,
|
||||
"schema": SCHEMA,
|
||||
"schemas": UNUSED,
|
||||
"second_microsecond": UNUSED,
|
||||
"select": SELECT,
|
||||
"sensitive": UNUSED,
|
||||
"separator": SEPARATOR,
|
||||
"serializable": SERIALIZABLE,
|
||||
"session": SESSION,
|
||||
"set": SET,
|
||||
"share": SHARE,
|
||||
"show": SHOW,
|
||||
"signal": UNUSED,
|
||||
"signed": SIGNED,
|
||||
"smallint": SMALLINT,
|
||||
"spatial": SPATIAL,
|
||||
"specific": UNUSED,
|
||||
"sql": UNUSED,
|
||||
"sqlexception": UNUSED,
|
||||
"sqlstate": UNUSED,
|
||||
"sqlwarning": UNUSED,
|
||||
"sql_big_result": UNUSED,
|
||||
"sql_cache": SQL_CACHE,
|
||||
"sql_calc_found_rows": UNUSED,
|
||||
"sql_no_cache": SQL_NO_CACHE,
|
||||
"sql_small_result": UNUSED,
|
||||
"ssl": UNUSED,
|
||||
"start": START,
|
||||
"starting": UNUSED,
|
||||
"status": STATUS,
|
||||
"stored": UNUSED,
|
||||
"straight_join": STRAIGHT_JOIN,
|
||||
"stream": STREAM,
|
||||
"table": TABLE,
|
||||
"tables": TABLES,
|
||||
"terminated": UNUSED,
|
||||
"text": TEXT,
|
||||
"than": THAN,
|
||||
"then": THEN,
|
||||
"time": TIME,
|
||||
"timestamp": TIMESTAMP,
|
||||
"tinyblob": TINYBLOB,
|
||||
"tinyint": TINYINT,
|
||||
"tinytext": TINYTEXT,
|
||||
"to": TO,
|
||||
"trailing": UNUSED,
|
||||
"transaction": TRANSACTION,
|
||||
"trigger": TRIGGER,
|
||||
"true": TRUE,
|
||||
"truncate": TRUNCATE,
|
||||
"uncommitted": UNCOMMITTED,
|
||||
"undo": UNUSED,
|
||||
"union": UNION,
|
||||
"unique": UNIQUE,
|
||||
"unlock": UNUSED,
|
||||
"unsigned": UNSIGNED,
|
||||
"update": UPDATE,
|
||||
"usage": UNUSED,
|
||||
"use": USE,
|
||||
"using": USING,
|
||||
"utc_date": UTC_DATE,
|
||||
"utc_time": UTC_TIME,
|
||||
"utc_timestamp": UTC_TIMESTAMP,
|
||||
"values": VALUES,
|
||||
"variables": VARIABLES,
|
||||
"varbinary": VARBINARY,
|
||||
"varchar": VARCHAR,
|
||||
"varcharacter": UNUSED,
|
||||
"varying": UNUSED,
|
||||
"virtual": UNUSED,
|
||||
"vindex": VINDEX,
|
||||
"vindexes": VINDEXES,
|
||||
"view": VIEW,
|
||||
"vitess_keyspaces": VITESS_KEYSPACES,
|
||||
"vitess_shards": VITESS_SHARDS,
|
||||
"vitess_tablets": VITESS_TABLETS,
|
||||
"vschema_tables": VSCHEMA_TABLES,
|
||||
"when": WHEN,
|
||||
"where": WHERE,
|
||||
"while": UNUSED,
|
||||
"with": WITH,
|
||||
"write": WRITE,
|
||||
"xor": UNUSED,
|
||||
"year": YEAR,
|
||||
"year_month": UNUSED,
|
||||
"zerofill": ZEROFILL,
|
||||
}
|
||||
|
||||
// keywordStrings contains the reverse mapping of token to keyword strings
|
||||
var keywordStrings = map[int]string{}
|
||||
|
||||
func init() {
|
||||
for str, id := range keywords {
|
||||
if id == UNUSED {
|
||||
continue
|
||||
}
|
||||
keywordStrings[id] = str
|
||||
}
|
||||
}
|
||||
|
||||
// KeywordString returns the string corresponding to the given keyword
|
||||
func KeywordString(id int) string {
|
||||
str, ok := keywordStrings[id]
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
return str
|
||||
}
|
||||
|
||||
// Lex returns the next token form the Tokenizer.
|
||||
// This function is used by go yacc.
|
||||
func (tkn *Tokenizer) Lex(lval *yySymType) int {
|
||||
typ, val := tkn.Scan()
|
||||
for typ == COMMENT {
|
||||
if tkn.AllowComments {
|
||||
break
|
||||
}
|
||||
typ, val = tkn.Scan()
|
||||
}
|
||||
lval.bytes = val
|
||||
tkn.lastToken = val
|
||||
return typ
|
||||
}
|
||||
|
||||
// Error is called by go yacc if there's a parsing error.
|
||||
func (tkn *Tokenizer) Error(err string) {
|
||||
buf := &bytes2.Buffer{}
|
||||
if tkn.lastToken != nil {
|
||||
fmt.Fprintf(buf, "%s at position %v near '%s'", err, tkn.Position, tkn.lastToken)
|
||||
} else {
|
||||
fmt.Fprintf(buf, "%s at position %v", err, tkn.Position)
|
||||
}
|
||||
tkn.LastError = errors.New(buf.String())
|
||||
|
||||
// Try and re-sync to the next statement
|
||||
if tkn.lastChar != ';' {
|
||||
tkn.skipStatement()
|
||||
}
|
||||
}
|
||||
|
||||
// Scan scans the tokenizer for the next token and returns
|
||||
// the token type and an optional value.
|
||||
func (tkn *Tokenizer) Scan() (int, []byte) {
|
||||
if tkn.specialComment != nil {
|
||||
// Enter specialComment scan mode.
|
||||
// for scanning such kind of comment: /*! MySQL-specific code */
|
||||
specialComment := tkn.specialComment
|
||||
tok, val := specialComment.Scan()
|
||||
if tok != 0 {
|
||||
// return the specialComment scan result as the result
|
||||
return tok, val
|
||||
}
|
||||
// leave specialComment scan mode after all stream consumed.
|
||||
tkn.specialComment = nil
|
||||
}
|
||||
if tkn.lastChar == 0 {
|
||||
tkn.next()
|
||||
}
|
||||
|
||||
if tkn.ForceEOF {
|
||||
tkn.skipStatement()
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
tkn.skipBlank()
|
||||
switch ch := tkn.lastChar; {
|
||||
case isLetter(ch):
|
||||
tkn.next()
|
||||
if ch == 'X' || ch == 'x' {
|
||||
if tkn.lastChar == '\'' {
|
||||
tkn.next()
|
||||
return tkn.scanHex()
|
||||
}
|
||||
}
|
||||
if ch == 'B' || ch == 'b' {
|
||||
if tkn.lastChar == '\'' {
|
||||
tkn.next()
|
||||
return tkn.scanBitLiteral()
|
||||
}
|
||||
}
|
||||
isDbSystemVariable := false
|
||||
if ch == '@' && tkn.lastChar == '@' {
|
||||
isDbSystemVariable = true
|
||||
}
|
||||
return tkn.scanIdentifier(byte(ch), isDbSystemVariable)
|
||||
case isDigit(ch):
|
||||
return tkn.scanNumber(false)
|
||||
case ch == ':':
|
||||
return tkn.scanBindVar()
|
||||
case ch == ';' && tkn.multi:
|
||||
return 0, nil
|
||||
default:
|
||||
tkn.next()
|
||||
switch ch {
|
||||
case eofChar:
|
||||
return 0, nil
|
||||
case '=', ',', ';', '(', ')', '+', '*', '%', '^', '~':
|
||||
return int(ch), nil
|
||||
case '&':
|
||||
if tkn.lastChar == '&' {
|
||||
tkn.next()
|
||||
return AND, nil
|
||||
}
|
||||
return int(ch), nil
|
||||
case '|':
|
||||
if tkn.lastChar == '|' {
|
||||
tkn.next()
|
||||
return OR, nil
|
||||
}
|
||||
return int(ch), nil
|
||||
case '?':
|
||||
tkn.posVarIndex++
|
||||
buf := new(bytes2.Buffer)
|
||||
fmt.Fprintf(buf, ":v%d", tkn.posVarIndex)
|
||||
return VALUE_ARG, buf.Bytes()
|
||||
case '.':
|
||||
if isDigit(tkn.lastChar) {
|
||||
return tkn.scanNumber(true)
|
||||
}
|
||||
return int(ch), nil
|
||||
case '/':
|
||||
switch tkn.lastChar {
|
||||
case '/':
|
||||
tkn.next()
|
||||
return tkn.scanCommentType1("//")
|
||||
case '*':
|
||||
tkn.next()
|
||||
switch tkn.lastChar {
|
||||
case '!':
|
||||
return tkn.scanMySQLSpecificComment()
|
||||
default:
|
||||
return tkn.scanCommentType2()
|
||||
}
|
||||
default:
|
||||
return int(ch), nil
|
||||
}
|
||||
case '#':
|
||||
return tkn.scanCommentType1("#")
|
||||
case '-':
|
||||
switch tkn.lastChar {
|
||||
case '-':
|
||||
tkn.next()
|
||||
return tkn.scanCommentType1("--")
|
||||
case '>':
|
||||
tkn.next()
|
||||
if tkn.lastChar == '>' {
|
||||
tkn.next()
|
||||
return JSON_UNQUOTE_EXTRACT_OP, nil
|
||||
}
|
||||
return JSON_EXTRACT_OP, nil
|
||||
}
|
||||
return int(ch), nil
|
||||
case '<':
|
||||
switch tkn.lastChar {
|
||||
case '>':
|
||||
tkn.next()
|
||||
return NE, nil
|
||||
case '<':
|
||||
tkn.next()
|
||||
return SHIFT_LEFT, nil
|
||||
case '=':
|
||||
tkn.next()
|
||||
switch tkn.lastChar {
|
||||
case '>':
|
||||
tkn.next()
|
||||
return NULL_SAFE_EQUAL, nil
|
||||
default:
|
||||
return LE, nil
|
||||
}
|
||||
default:
|
||||
return int(ch), nil
|
||||
}
|
||||
case '>':
|
||||
switch tkn.lastChar {
|
||||
case '=':
|
||||
tkn.next()
|
||||
return GE, nil
|
||||
case '>':
|
||||
tkn.next()
|
||||
return SHIFT_RIGHT, nil
|
||||
default:
|
||||
return int(ch), nil
|
||||
}
|
||||
case '!':
|
||||
if tkn.lastChar == '=' {
|
||||
tkn.next()
|
||||
return NE, nil
|
||||
}
|
||||
return int(ch), nil
|
||||
case '\'', '"':
|
||||
return tkn.scanString(ch, STRING)
|
||||
case '`':
|
||||
return tkn.scanLiteralIdentifier()
|
||||
default:
|
||||
return LEX_ERROR, []byte{byte(ch)}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// skipStatement scans until the EOF, or end of statement is encountered.
|
||||
func (tkn *Tokenizer) skipStatement() {
|
||||
ch := tkn.lastChar
|
||||
for ch != ';' && ch != eofChar {
|
||||
tkn.next()
|
||||
ch = tkn.lastChar
|
||||
}
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) skipBlank() {
|
||||
ch := tkn.lastChar
|
||||
for ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t' {
|
||||
tkn.next()
|
||||
ch = tkn.lastChar
|
||||
}
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) scanIdentifier(firstByte byte, isDbSystemVariable bool) (int, []byte) {
|
||||
buffer := &bytes2.Buffer{}
|
||||
buffer.WriteByte(firstByte)
|
||||
for isLetter(tkn.lastChar) || isDigit(tkn.lastChar) || (isDbSystemVariable && isCarat(tkn.lastChar)) {
|
||||
buffer.WriteByte(byte(tkn.lastChar))
|
||||
tkn.next()
|
||||
}
|
||||
lowered := bytes.ToLower(buffer.Bytes())
|
||||
loweredStr := string(lowered)
|
||||
if keywordID, found := keywords[loweredStr]; found {
|
||||
return keywordID, lowered
|
||||
}
|
||||
// dual must always be case-insensitive
|
||||
if loweredStr == "dual" {
|
||||
return ID, lowered
|
||||
}
|
||||
return ID, buffer.Bytes()
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) scanHex() (int, []byte) {
|
||||
buffer := &bytes2.Buffer{}
|
||||
tkn.scanMantissa(16, buffer)
|
||||
if tkn.lastChar != '\'' {
|
||||
return LEX_ERROR, buffer.Bytes()
|
||||
}
|
||||
tkn.next()
|
||||
if buffer.Len()%2 != 0 {
|
||||
return LEX_ERROR, buffer.Bytes()
|
||||
}
|
||||
return HEX, buffer.Bytes()
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) scanBitLiteral() (int, []byte) {
|
||||
buffer := &bytes2.Buffer{}
|
||||
tkn.scanMantissa(2, buffer)
|
||||
if tkn.lastChar != '\'' {
|
||||
return LEX_ERROR, buffer.Bytes()
|
||||
}
|
||||
tkn.next()
|
||||
return BIT_LITERAL, buffer.Bytes()
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) scanLiteralIdentifier() (int, []byte) {
|
||||
buffer := &bytes2.Buffer{}
|
||||
backTickSeen := false
|
||||
for {
|
||||
if backTickSeen {
|
||||
if tkn.lastChar != '`' {
|
||||
break
|
||||
}
|
||||
backTickSeen = false
|
||||
buffer.WriteByte('`')
|
||||
tkn.next()
|
||||
continue
|
||||
}
|
||||
// The previous char was not a backtick.
|
||||
switch tkn.lastChar {
|
||||
case '`':
|
||||
backTickSeen = true
|
||||
case eofChar:
|
||||
// Premature EOF.
|
||||
return LEX_ERROR, buffer.Bytes()
|
||||
default:
|
||||
buffer.WriteByte(byte(tkn.lastChar))
|
||||
}
|
||||
tkn.next()
|
||||
}
|
||||
if buffer.Len() == 0 {
|
||||
return LEX_ERROR, buffer.Bytes()
|
||||
}
|
||||
return ID, buffer.Bytes()
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) scanBindVar() (int, []byte) {
|
||||
buffer := &bytes2.Buffer{}
|
||||
buffer.WriteByte(byte(tkn.lastChar))
|
||||
token := VALUE_ARG
|
||||
tkn.next()
|
||||
if tkn.lastChar == ':' {
|
||||
token = LIST_ARG
|
||||
buffer.WriteByte(byte(tkn.lastChar))
|
||||
tkn.next()
|
||||
}
|
||||
if !isLetter(tkn.lastChar) {
|
||||
return LEX_ERROR, buffer.Bytes()
|
||||
}
|
||||
for isLetter(tkn.lastChar) || isDigit(tkn.lastChar) || tkn.lastChar == '.' {
|
||||
buffer.WriteByte(byte(tkn.lastChar))
|
||||
tkn.next()
|
||||
}
|
||||
return token, buffer.Bytes()
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) scanMantissa(base int, buffer *bytes2.Buffer) {
|
||||
for digitVal(tkn.lastChar) < base {
|
||||
tkn.consumeNext(buffer)
|
||||
}
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) scanNumber(seenDecimalPoint bool) (int, []byte) {
|
||||
token := INTEGRAL
|
||||
buffer := &bytes2.Buffer{}
|
||||
if seenDecimalPoint {
|
||||
token = FLOAT
|
||||
buffer.WriteByte('.')
|
||||
tkn.scanMantissa(10, buffer)
|
||||
goto exponent
|
||||
}
|
||||
|
||||
// 0x construct.
|
||||
if tkn.lastChar == '0' {
|
||||
tkn.consumeNext(buffer)
|
||||
if tkn.lastChar == 'x' || tkn.lastChar == 'X' {
|
||||
token = HEXNUM
|
||||
tkn.consumeNext(buffer)
|
||||
tkn.scanMantissa(16, buffer)
|
||||
goto exit
|
||||
}
|
||||
}
|
||||
|
||||
tkn.scanMantissa(10, buffer)
|
||||
|
||||
if tkn.lastChar == '.' {
|
||||
token = FLOAT
|
||||
tkn.consumeNext(buffer)
|
||||
tkn.scanMantissa(10, buffer)
|
||||
}
|
||||
|
||||
exponent:
|
||||
if tkn.lastChar == 'e' || tkn.lastChar == 'E' {
|
||||
token = FLOAT
|
||||
tkn.consumeNext(buffer)
|
||||
if tkn.lastChar == '+' || tkn.lastChar == '-' {
|
||||
tkn.consumeNext(buffer)
|
||||
}
|
||||
tkn.scanMantissa(10, buffer)
|
||||
}
|
||||
|
||||
exit:
|
||||
// A letter cannot immediately follow a number.
|
||||
if isLetter(tkn.lastChar) {
|
||||
return LEX_ERROR, buffer.Bytes()
|
||||
}
|
||||
|
||||
return token, buffer.Bytes()
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) scanString(delim uint16, typ int) (int, []byte) {
|
||||
var buffer bytes2.Buffer
|
||||
for {
|
||||
ch := tkn.lastChar
|
||||
if ch == eofChar {
|
||||
// Unterminated string.
|
||||
return LEX_ERROR, buffer.Bytes()
|
||||
}
|
||||
|
||||
if ch != delim && ch != '\\' {
|
||||
buffer.WriteByte(byte(ch))
|
||||
|
||||
// Scan ahead to the next interesting character.
|
||||
start := tkn.bufPos
|
||||
for ; tkn.bufPos < tkn.bufSize; tkn.bufPos++ {
|
||||
ch = uint16(tkn.buf[tkn.bufPos])
|
||||
if ch == delim || ch == '\\' {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
buffer.Write(tkn.buf[start:tkn.bufPos])
|
||||
tkn.Position += (tkn.bufPos - start)
|
||||
|
||||
if tkn.bufPos >= tkn.bufSize {
|
||||
// Reached the end of the buffer without finding a delim or
|
||||
// escape character.
|
||||
tkn.next()
|
||||
continue
|
||||
}
|
||||
|
||||
tkn.bufPos++
|
||||
tkn.Position++
|
||||
}
|
||||
tkn.next() // Read one past the delim or escape character.
|
||||
|
||||
if ch == '\\' {
|
||||
if tkn.lastChar == eofChar {
|
||||
// String terminates mid escape character.
|
||||
return LEX_ERROR, buffer.Bytes()
|
||||
}
|
||||
if decodedChar := sqltypes.SQLDecodeMap[byte(tkn.lastChar)]; decodedChar == sqltypes.DontEscape {
|
||||
ch = tkn.lastChar
|
||||
} else {
|
||||
ch = uint16(decodedChar)
|
||||
}
|
||||
|
||||
} else if ch == delim && tkn.lastChar != delim {
|
||||
// Correctly terminated string, which is not a double delim.
|
||||
break
|
||||
}
|
||||
|
||||
buffer.WriteByte(byte(ch))
|
||||
tkn.next()
|
||||
}
|
||||
|
||||
return typ, buffer.Bytes()
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) scanCommentType1(prefix string) (int, []byte) {
|
||||
buffer := &bytes2.Buffer{}
|
||||
buffer.WriteString(prefix)
|
||||
for tkn.lastChar != eofChar {
|
||||
if tkn.lastChar == '\n' {
|
||||
tkn.consumeNext(buffer)
|
||||
break
|
||||
}
|
||||
tkn.consumeNext(buffer)
|
||||
}
|
||||
return COMMENT, buffer.Bytes()
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) scanCommentType2() (int, []byte) {
|
||||
buffer := &bytes2.Buffer{}
|
||||
buffer.WriteString("/*")
|
||||
for {
|
||||
if tkn.lastChar == '*' {
|
||||
tkn.consumeNext(buffer)
|
||||
if tkn.lastChar == '/' {
|
||||
tkn.consumeNext(buffer)
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
if tkn.lastChar == eofChar {
|
||||
return LEX_ERROR, buffer.Bytes()
|
||||
}
|
||||
tkn.consumeNext(buffer)
|
||||
}
|
||||
return COMMENT, buffer.Bytes()
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) scanMySQLSpecificComment() (int, []byte) {
|
||||
buffer := &bytes2.Buffer{}
|
||||
buffer.WriteString("/*!")
|
||||
tkn.next()
|
||||
for {
|
||||
if tkn.lastChar == '*' {
|
||||
tkn.consumeNext(buffer)
|
||||
if tkn.lastChar == '/' {
|
||||
tkn.consumeNext(buffer)
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
if tkn.lastChar == eofChar {
|
||||
return LEX_ERROR, buffer.Bytes()
|
||||
}
|
||||
tkn.consumeNext(buffer)
|
||||
}
|
||||
_, sql := ExtractMysqlComment(buffer.String())
|
||||
tkn.specialComment = NewStringTokenizer(sql)
|
||||
return tkn.Scan()
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) consumeNext(buffer *bytes2.Buffer) {
|
||||
if tkn.lastChar == eofChar {
|
||||
// This should never happen.
|
||||
panic("unexpected EOF")
|
||||
}
|
||||
buffer.WriteByte(byte(tkn.lastChar))
|
||||
tkn.next()
|
||||
}
|
||||
|
||||
func (tkn *Tokenizer) next() {
|
||||
if tkn.bufPos >= tkn.bufSize && tkn.InStream != nil {
|
||||
// Try and refill the buffer
|
||||
var err error
|
||||
tkn.bufPos = 0
|
||||
if tkn.bufSize, err = tkn.InStream.Read(tkn.buf); err != io.EOF && err != nil {
|
||||
tkn.LastError = err
|
||||
}
|
||||
}
|
||||
|
||||
if tkn.bufPos >= tkn.bufSize {
|
||||
if tkn.lastChar != eofChar {
|
||||
tkn.Position++
|
||||
tkn.lastChar = eofChar
|
||||
}
|
||||
} else {
|
||||
tkn.Position++
|
||||
tkn.lastChar = uint16(tkn.buf[tkn.bufPos])
|
||||
tkn.bufPos++
|
||||
}
|
||||
}
|
||||
|
||||
// reset clears any internal state.
|
||||
func (tkn *Tokenizer) reset() {
|
||||
tkn.ParseTree = nil
|
||||
tkn.partialDDL = nil
|
||||
tkn.specialComment = nil
|
||||
tkn.posVarIndex = 0
|
||||
tkn.nesting = 0
|
||||
tkn.ForceEOF = false
|
||||
}
|
||||
|
||||
func isLetter(ch uint16) bool {
|
||||
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch == '@'
|
||||
}
|
||||
|
||||
func isCarat(ch uint16) bool {
|
||||
return ch == '.' || ch == '\'' || ch == '"' || ch == '`'
|
||||
}
|
||||
|
||||
func digitVal(ch uint16) int {
|
||||
switch {
|
||||
case '0' <= ch && ch <= '9':
|
||||
return int(ch) - '0'
|
||||
case 'a' <= ch && ch <= 'f':
|
||||
return int(ch) - 'a' + 10
|
||||
case 'A' <= ch && ch <= 'F':
|
||||
return int(ch) - 'A' + 10
|
||||
}
|
||||
return 16 // larger than any legal digit val
|
||||
}
|
||||
|
||||
func isDigit(ch uint16) bool {
|
||||
return '0' <= ch && ch <= '9'
|
||||
}
|
||||
Reference in New Issue
Block a user