mirror of
https://github.com/minio/minio.git
synced 2025-11-20 18:06:10 -05:00
Add new SQL parser to support S3 Select syntax (#7102)
- New parser written from scratch, allows easier and complete parsing of the full S3 Select SQL syntax. Parser definition is directly provided by the AST defined for the SQL grammar. - Bring support to parse and interpret SQL involving JSON path expressions; evaluation of JSON path expressions will be subsequently added. - Bring automatic type inference and conversion for untyped values (e.g. CSV data).
This commit is contained in:
committed by
Harshavardhana
parent
0a28c28a8c
commit
2786055df4
383
pkg/s3select/sql/parser_test.go
Normal file
383
pkg/s3select/sql/parser_test.go
Normal file
@@ -0,0 +1,383 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package sql
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
"github.com/alecthomas/participle"
|
||||
"github.com/alecthomas/participle/lexer"
|
||||
)
|
||||
|
||||
func TestJSONPathElement(t *testing.T) {
|
||||
p := participle.MustBuild(
|
||||
&JSONPathElement{},
|
||||
participle.Lexer(sqlLexer),
|
||||
participle.CaseInsensitive("Keyword"),
|
||||
)
|
||||
|
||||
j := JSONPathElement{}
|
||||
cases := []string{
|
||||
// Key
|
||||
"['name']", ".name", `."name"`,
|
||||
|
||||
// Index
|
||||
"[2]", "[0]", "[100]",
|
||||
|
||||
// Object wilcard
|
||||
".*",
|
||||
|
||||
// array wildcard
|
||||
"[*]",
|
||||
}
|
||||
for i, tc := range cases {
|
||||
err := p.ParseString(tc, &j)
|
||||
if err != nil {
|
||||
t.Fatalf("%d: %v", i, err)
|
||||
}
|
||||
// repr.Println(j, repr.Indent(" "), repr.OmitEmpty(true))
|
||||
}
|
||||
}
|
||||
|
||||
func TestJSONPath(t *testing.T) {
|
||||
p := participle.MustBuild(
|
||||
&JSONPath{},
|
||||
participle.Lexer(sqlLexer),
|
||||
participle.CaseInsensitive("Keyword"),
|
||||
)
|
||||
|
||||
j := JSONPath{}
|
||||
cases := []string{
|
||||
"S3Object",
|
||||
"S3Object.id",
|
||||
"S3Object.book.title",
|
||||
"S3Object.id[1]",
|
||||
"S3Object.id['abc']",
|
||||
"S3Object.id['ab']",
|
||||
"S3Object.words.*.id",
|
||||
"S3Object.words.name[*].val",
|
||||
"S3Object.words.name[*].val[*]",
|
||||
"S3Object.words.name[*].val.*",
|
||||
}
|
||||
for i, tc := range cases {
|
||||
err := p.ParseString(tc, &j)
|
||||
if err != nil {
|
||||
t.Fatalf("%d: %v", i, err)
|
||||
}
|
||||
// repr.Println(j, repr.Indent(" "), repr.OmitEmpty(true))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestIdentifierParsing(t *testing.T) {
|
||||
p := participle.MustBuild(
|
||||
&Identifier{},
|
||||
participle.Lexer(sqlLexer),
|
||||
participle.CaseInsensitive("Keyword"),
|
||||
)
|
||||
|
||||
id := Identifier{}
|
||||
validCases := []string{
|
||||
"a",
|
||||
"_a",
|
||||
"abc_a",
|
||||
"a2",
|
||||
`"abc"`,
|
||||
`"abc\a""ac"`,
|
||||
}
|
||||
for i, tc := range validCases {
|
||||
err := p.ParseString(tc, &id)
|
||||
if err != nil {
|
||||
t.Fatalf("%d: %v", i, err)
|
||||
}
|
||||
// repr.Println(id, repr.Indent(" "), repr.OmitEmpty(true))
|
||||
}
|
||||
|
||||
invalidCases := []string{
|
||||
"+a",
|
||||
"-a",
|
||||
"1a",
|
||||
`"ab`,
|
||||
`abc"`,
|
||||
`aa""a`,
|
||||
`"a"a"`,
|
||||
}
|
||||
for i, tc := range invalidCases {
|
||||
err := p.ParseString(tc, &id)
|
||||
if err == nil {
|
||||
t.Fatalf("%d: %v", i, err)
|
||||
}
|
||||
// fmt.Println(tc, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLiteralStringParsing(t *testing.T) {
|
||||
var k ObjectKey
|
||||
p := participle.MustBuild(
|
||||
&ObjectKey{},
|
||||
participle.Lexer(sqlLexer),
|
||||
participle.CaseInsensitive("Keyword"),
|
||||
)
|
||||
|
||||
validCases := []string{
|
||||
"['abc']",
|
||||
"['ab''c']",
|
||||
"['a''b''c']",
|
||||
"['abc-x_1##@(*&(#*))/\\']",
|
||||
}
|
||||
for i, tc := range validCases {
|
||||
err := p.ParseString(tc, &k)
|
||||
if err != nil {
|
||||
t.Fatalf("%d: %v", i, err)
|
||||
}
|
||||
if string(*k.Lit) == "" {
|
||||
t.Fatalf("Incorrect parse %#v", k)
|
||||
}
|
||||
// repr.Println(k, repr.Indent(" "), repr.OmitEmpty(true))
|
||||
}
|
||||
|
||||
invalidCases := []string{
|
||||
"['abc'']",
|
||||
"['-abc'sc']",
|
||||
"[abc']",
|
||||
"['ac]",
|
||||
}
|
||||
for i, tc := range invalidCases {
|
||||
err := p.ParseString(tc, &k)
|
||||
if err == nil {
|
||||
t.Fatalf("%d: %v", i, err)
|
||||
}
|
||||
// fmt.Println(tc, err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFunctionParsing(t *testing.T) {
|
||||
var fex FuncExpr
|
||||
p := participle.MustBuild(
|
||||
&FuncExpr{},
|
||||
participle.Lexer(sqlLexer),
|
||||
participle.CaseInsensitive("Keyword"),
|
||||
participle.CaseInsensitive("Timeword"),
|
||||
)
|
||||
|
||||
validCases := []string{
|
||||
"count(*)",
|
||||
"sum(2 + s.id)",
|
||||
"sum(t)",
|
||||
"avg(s.id[1])",
|
||||
"coalesce(s.id[1], 2, 2 + 3)",
|
||||
|
||||
"cast(s as string)",
|
||||
"cast(s AS INT)",
|
||||
"cast(s as DECIMAL)",
|
||||
"extract(YEAR from '2018-01-09')",
|
||||
"extract(month from '2018-01-09')",
|
||||
|
||||
"extract(hour from '2018-01-09')",
|
||||
"extract(day from '2018-01-09')",
|
||||
"substring('abcd' from 2 for 2)",
|
||||
"substring('abcd' from 2)",
|
||||
"substring('abcd' , 2 , 2)",
|
||||
|
||||
"substring('abcd' , 22 )",
|
||||
"trim(' aab ')",
|
||||
"trim(leading from ' aab ')",
|
||||
"trim(trailing from ' aab ')",
|
||||
"trim(both from ' aab ')",
|
||||
|
||||
"trim(both '12' from ' aab ')",
|
||||
"trim(leading '12' from ' aab ')",
|
||||
"trim(trailing '12' from ' aab ')",
|
||||
"count(23)",
|
||||
}
|
||||
for i, tc := range validCases {
|
||||
err := p.ParseString(tc, &fex)
|
||||
if err != nil {
|
||||
t.Fatalf("%d: %v", i, err)
|
||||
}
|
||||
// repr.Println(fex, repr.Indent(" "), repr.OmitEmpty(true))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSqlLexer(t *testing.T) {
|
||||
// s := bytes.NewBuffer([]byte("s.['name'].*.[*].abc.[\"abc\"]"))
|
||||
s := bytes.NewBuffer([]byte("S3Object.words.*.id"))
|
||||
// s := bytes.NewBuffer([]byte("COUNT(Id)"))
|
||||
lex, err := sqlLexer.Lex(s)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
tokens, err := lexer.ConsumeAll(lex)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// for i, t := range tokens {
|
||||
// fmt.Printf("%d: %#v\n", i, t)
|
||||
// }
|
||||
if len(tokens) != 7 {
|
||||
t.Fatalf("Expected 7 got %d", len(tokens))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectWhere(t *testing.T) {
|
||||
p := participle.MustBuild(
|
||||
&Select{},
|
||||
participle.Lexer(sqlLexer),
|
||||
participle.CaseInsensitive("Keyword"),
|
||||
)
|
||||
|
||||
s := Select{}
|
||||
cases := []string{
|
||||
"select * from s3object",
|
||||
"select a, b from s3object s",
|
||||
"select a, b from s3object as s",
|
||||
"select a, b from s3object as s where a = 1",
|
||||
"select a, b from s3object s where a = 1",
|
||||
"select a, b from s3object where a = 1",
|
||||
}
|
||||
for i, tc := range cases {
|
||||
err := p.ParseString(tc, &s)
|
||||
if err != nil {
|
||||
t.Fatalf("%d: %v", i, err)
|
||||
}
|
||||
|
||||
// repr.Println(s, repr.Indent(" "), repr.OmitEmpty(true))
|
||||
}
|
||||
}
|
||||
|
||||
func TestLikeClause(t *testing.T) {
|
||||
p := participle.MustBuild(
|
||||
&Select{},
|
||||
participle.Lexer(sqlLexer),
|
||||
participle.CaseInsensitive("Keyword"),
|
||||
)
|
||||
|
||||
s := Select{}
|
||||
cases := []string{
|
||||
`select * from s3object where Name like 'abcd'`,
|
||||
`select Name like 'abc' from s3object`,
|
||||
`select * from s3object where Name not like 'abc'`,
|
||||
`select * from s3object where Name like 'abc' escape 't'`,
|
||||
`select * from s3object where Name like 'a\%' escape '?'`,
|
||||
`select * from s3object where Name not like 'abc\' escape '?'`,
|
||||
`select * from s3object where Name like 'a\%' escape LOWER('?')`,
|
||||
`select * from s3object where Name not like LOWER('Bc\') escape '?'`,
|
||||
}
|
||||
for i, tc := range cases {
|
||||
err := p.ParseString(tc, &s)
|
||||
if err != nil {
|
||||
t.Errorf("%d: %v", i, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBetweenClause(t *testing.T) {
|
||||
p := participle.MustBuild(
|
||||
&Select{},
|
||||
participle.Lexer(sqlLexer),
|
||||
participle.CaseInsensitive("Keyword"),
|
||||
)
|
||||
|
||||
s := Select{}
|
||||
cases := []string{
|
||||
`select * from s3object where Id between 1 and 2`,
|
||||
`select * from s3object where Id between 1 and 2 and name = 'Ab'`,
|
||||
`select * from s3object where Id not between 1 and 2`,
|
||||
`select * from s3object where Id not between 1 and 2 and name = 'Bc'`,
|
||||
}
|
||||
for i, tc := range cases {
|
||||
err := p.ParseString(tc, &s)
|
||||
if err != nil {
|
||||
t.Errorf("%d: %v", i, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFromClauseJSONPath(t *testing.T) {
|
||||
p := participle.MustBuild(
|
||||
&Select{},
|
||||
participle.Lexer(sqlLexer),
|
||||
participle.CaseInsensitive("Keyword"),
|
||||
)
|
||||
|
||||
s := Select{}
|
||||
cases := []string{
|
||||
"select * from s3object",
|
||||
"select * from s3object[*].name",
|
||||
"select * from s3object[*].books[*]",
|
||||
"select * from s3object[*].books[*].name",
|
||||
"select * from s3object where name > 2",
|
||||
"select * from s3object[*].name where name > 2",
|
||||
"select * from s3object[*].books[*] where name > 2",
|
||||
"select * from s3object[*].books[*].name where name > 2",
|
||||
"select * from s3object[*].books[*] s",
|
||||
"select * from s3object[*].books[*].name as s",
|
||||
"select * from s3object s where name > 2",
|
||||
"select * from s3object[*].name as s where name > 2",
|
||||
}
|
||||
for i, tc := range cases {
|
||||
err := p.ParseString(tc, &s)
|
||||
if err != nil {
|
||||
t.Fatalf("%d: %v", i, err)
|
||||
}
|
||||
|
||||
// repr.Println(s, repr.Indent(" "), repr.OmitEmpty(true))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestSelectParsing(t *testing.T) {
|
||||
p := participle.MustBuild(
|
||||
&Select{},
|
||||
participle.Lexer(sqlLexer),
|
||||
participle.CaseInsensitive("Keyword"),
|
||||
)
|
||||
|
||||
s := Select{}
|
||||
cases := []string{
|
||||
"select * from s3object where name > 2 or value > 1 or word > 2",
|
||||
"select s.word.id + 2 from s3object s",
|
||||
"select 1-2-3 from s3object s limit 1",
|
||||
}
|
||||
for i, tc := range cases {
|
||||
err := p.ParseString(tc, &s)
|
||||
if err != nil {
|
||||
t.Fatalf("%d: %v", i, err)
|
||||
}
|
||||
|
||||
// repr.Println(s, repr.Indent(" "), repr.OmitEmpty(true))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSqlLexerArithOps(t *testing.T) {
|
||||
s := bytes.NewBuffer([]byte("year from select month hour distinct"))
|
||||
lex, err := sqlLexer.Lex(s)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
tokens, err := lexer.ConsumeAll(lex)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(tokens) != 7 {
|
||||
t.Errorf("Expected 7 got %d", len(tokens))
|
||||
}
|
||||
// for i, t := range tokens {
|
||||
// fmt.Printf("%d: %#v\n", i, t)
|
||||
// }
|
||||
}
|
||||
Reference in New Issue
Block a user