mirror of
https://github.com/minio/minio.git
synced 2024-12-26 23:25:54 -05:00
2786055df4
- New parser written from scratch, allows easier and complete parsing of the full S3 Select SQL syntax. Parser definition is directly provided by the AST defined for the SQL grammar. - Bring support to parse and interpret SQL involving JSON path expressions; evaluation of JSON path expressions will be subsequently added. - Bring automatic type inference and conversion for untyped values (e.g. CSV data).
576 lines
14 KiB
Go
576 lines
14 KiB
Go
package participle
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"reflect"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/alecthomas/participle/lexer"
|
|
)
|
|
|
|
var (
|
|
// MaxIterations limits the number of elements capturable by {}.
|
|
MaxIterations = 1000000
|
|
|
|
positionType = reflect.TypeOf(lexer.Position{})
|
|
captureType = reflect.TypeOf((*Capture)(nil)).Elem()
|
|
parseableType = reflect.TypeOf((*Parseable)(nil)).Elem()
|
|
|
|
// NextMatch should be returned by Parseable.Parse() method implementations to indicate
|
|
// that the node did not match and that other matches should be attempted, if appropriate.
|
|
NextMatch = errors.New("no match") // nolint: golint
|
|
)
|
|
|
|
// A node in the grammar.
|
|
type node interface {
|
|
// Parse from scanner into value.
|
|
//
|
|
// Returned slice will be nil if the node does not match.
|
|
Parse(ctx *parseContext, parent reflect.Value) ([]reflect.Value, error)
|
|
|
|
// Return a decent string representation of the Node.
|
|
String() string
|
|
}
|
|
|
|
func decorate(err *error, name func() string) {
|
|
if *err == nil {
|
|
return
|
|
}
|
|
switch realError := (*err).(type) {
|
|
case *lexer.Error:
|
|
*err = &lexer.Error{Message: name() + ": " + realError.Message, Pos: realError.Pos}
|
|
default:
|
|
*err = fmt.Errorf("%s: %s", name(), realError)
|
|
}
|
|
}
|
|
|
|
// A node that proxies to an implementation that implements the Parseable interface.
|
|
type parseable struct {
|
|
t reflect.Type
|
|
}
|
|
|
|
func (p *parseable) String() string { return stringer(p) }
|
|
|
|
func (p *parseable) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
|
|
rv := reflect.New(p.t)
|
|
v := rv.Interface().(Parseable)
|
|
err = v.Parse(ctx)
|
|
if err != nil {
|
|
if err == NextMatch {
|
|
return nil, nil
|
|
}
|
|
return nil, err
|
|
}
|
|
return []reflect.Value{rv.Elem()}, nil
|
|
}
|
|
|
|
type strct struct {
|
|
typ reflect.Type
|
|
expr node
|
|
}
|
|
|
|
func (s *strct) String() string { return stringer(s) }
|
|
|
|
func (s *strct) maybeInjectPos(pos lexer.Position, v reflect.Value) {
|
|
if f := v.FieldByName("Pos"); f.IsValid() && f.Type() == positionType {
|
|
f.Set(reflect.ValueOf(pos))
|
|
}
|
|
}
|
|
|
|
func (s *strct) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
|
|
sv := reflect.New(s.typ).Elem()
|
|
t, err := ctx.Peek(0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
s.maybeInjectPos(t.Pos, sv)
|
|
if out, err = s.expr.Parse(ctx, sv); err != nil {
|
|
_ = ctx.Apply()
|
|
return []reflect.Value{sv}, err
|
|
} else if out == nil {
|
|
return nil, nil
|
|
}
|
|
return []reflect.Value{sv}, ctx.Apply()
|
|
}
|
|
|
|
type groupMatchMode int
|
|
|
|
const (
|
|
groupMatchOnce groupMatchMode = iota
|
|
groupMatchZeroOrOne = iota
|
|
groupMatchZeroOrMore = iota
|
|
groupMatchOneOrMore = iota
|
|
groupMatchNonEmpty = iota
|
|
)
|
|
|
|
// ( <expr> ) - match once
|
|
// ( <expr> )* - match zero or more times
|
|
// ( <expr> )+ - match one or more times
|
|
// ( <expr> )? - match zero or once
|
|
// ( <expr> )! - must be a non-empty match
|
|
//
|
|
// The additional modifier "!" forces the content of the group to be non-empty if it does match.
|
|
type group struct {
|
|
expr node
|
|
mode groupMatchMode
|
|
}
|
|
|
|
func (g *group) String() string { return stringer(g) }
|
|
func (g *group) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
|
|
// Configure min/max matches.
|
|
min := 1
|
|
max := 1
|
|
switch g.mode {
|
|
case groupMatchNonEmpty:
|
|
out, err = g.expr.Parse(ctx, parent)
|
|
if err != nil {
|
|
return out, err
|
|
}
|
|
if len(out) == 0 {
|
|
t, _ := ctx.Peek(0)
|
|
return out, lexer.Errorf(t.Pos, "sub-expression %s cannot be empty", g)
|
|
}
|
|
return out, nil
|
|
case groupMatchOnce:
|
|
return g.expr.Parse(ctx, parent)
|
|
case groupMatchZeroOrOne:
|
|
min = 0
|
|
case groupMatchZeroOrMore:
|
|
min = 0
|
|
max = MaxIterations
|
|
case groupMatchOneOrMore:
|
|
min = 1
|
|
max = MaxIterations
|
|
}
|
|
matches := 0
|
|
for ; matches < max; matches++ {
|
|
branch := ctx.Branch()
|
|
v, err := g.expr.Parse(branch, parent)
|
|
out = append(out, v...)
|
|
if err != nil {
|
|
// Optional part failed to match.
|
|
if ctx.Stop(branch) {
|
|
return out, err
|
|
}
|
|
break
|
|
} else {
|
|
ctx.Accept(branch)
|
|
}
|
|
if v == nil {
|
|
break
|
|
}
|
|
}
|
|
// fmt.Printf("%d < %d < %d: out == nil? %v\n", min, matches, max, out == nil)
|
|
t, _ := ctx.Peek(0)
|
|
if matches >= MaxIterations {
|
|
panic(lexer.Errorf(t.Pos, "too many iterations of %s (> %d)", g, MaxIterations))
|
|
}
|
|
if matches < min {
|
|
return out, lexer.Errorf(t.Pos, "sub-expression %s must match at least once", g)
|
|
}
|
|
// The idea here is that something like "a"? is a successful match and that parsing should proceed.
|
|
if min == 0 && out == nil {
|
|
out = []reflect.Value{}
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// <expr> {"|" <expr>}
|
|
type disjunction struct {
|
|
nodes []node
|
|
}
|
|
|
|
func (d *disjunction) String() string { return stringer(d) }
|
|
|
|
func (d *disjunction) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
|
|
var (
|
|
deepestError = 0
|
|
firstError error
|
|
firstValues []reflect.Value
|
|
)
|
|
for _, a := range d.nodes {
|
|
branch := ctx.Branch()
|
|
if value, err := a.Parse(branch, parent); err != nil {
|
|
// If this branch progressed too far and still didn't match, error out.
|
|
if ctx.Stop(branch) {
|
|
return value, err
|
|
}
|
|
// Show the closest error returned. The idea here is that the further the parser progresses
|
|
// without error, the more difficult it is to trace the error back to its root.
|
|
if err != nil && branch.cursor >= deepestError {
|
|
firstError = err
|
|
firstValues = value
|
|
deepestError = branch.cursor
|
|
}
|
|
} else if value != nil {
|
|
ctx.Accept(branch)
|
|
return value, nil
|
|
}
|
|
}
|
|
if firstError != nil {
|
|
return firstValues, firstError
|
|
}
|
|
return nil, nil
|
|
}
|
|
|
|
// <node> ...
|
|
type sequence struct {
|
|
head bool
|
|
node node
|
|
next *sequence
|
|
}
|
|
|
|
func (s *sequence) String() string { return stringer(s) }
|
|
|
|
func (s *sequence) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
|
|
for n := s; n != nil; n = n.next {
|
|
child, err := n.node.Parse(ctx, parent)
|
|
out = append(out, child...)
|
|
if err != nil {
|
|
return out, err
|
|
}
|
|
if child == nil {
|
|
// Early exit if first value doesn't match, otherwise all values must match.
|
|
if n == s {
|
|
return nil, nil
|
|
}
|
|
token, err := ctx.Peek(0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return out, lexer.Errorf(token.Pos, "unexpected %q (expected %s)", token, n)
|
|
}
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// @<expr>
|
|
type capture struct {
|
|
field structLexerField
|
|
node node
|
|
}
|
|
|
|
func (c *capture) String() string { return stringer(c) }
|
|
|
|
func (c *capture) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
|
|
token, err := ctx.Peek(0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
pos := token.Pos
|
|
v, err := c.node.Parse(ctx, parent)
|
|
if err != nil {
|
|
if v != nil {
|
|
ctx.Defer(pos, parent, c.field, v)
|
|
}
|
|
return []reflect.Value{parent}, err
|
|
}
|
|
if v == nil {
|
|
return nil, nil
|
|
}
|
|
ctx.Defer(pos, parent, c.field, v)
|
|
return []reflect.Value{parent}, nil
|
|
}
|
|
|
|
// <identifier> - named lexer token reference
|
|
type reference struct {
|
|
typ rune
|
|
identifier string // Used for informational purposes.
|
|
}
|
|
|
|
func (r *reference) String() string { return stringer(r) }
|
|
|
|
func (r *reference) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
|
|
token, err := ctx.Peek(0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if token.Type != r.typ {
|
|
return nil, nil
|
|
}
|
|
_, _ = ctx.Next()
|
|
return []reflect.Value{reflect.ValueOf(token.Value)}, nil
|
|
}
|
|
|
|
// [ <expr> ] <sequence>
|
|
type optional struct {
|
|
node node
|
|
}
|
|
|
|
func (o *optional) String() string { return stringer(o) }
|
|
|
|
func (o *optional) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
|
|
branch := ctx.Branch()
|
|
out, err = o.node.Parse(branch, parent)
|
|
if err != nil {
|
|
// Optional part failed to match.
|
|
if ctx.Stop(branch) {
|
|
return out, err
|
|
}
|
|
} else {
|
|
ctx.Accept(branch)
|
|
}
|
|
if out == nil {
|
|
out = []reflect.Value{}
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// { <expr> } <sequence>
|
|
type repetition struct {
|
|
node node
|
|
}
|
|
|
|
func (r *repetition) String() string { return stringer(r) }
|
|
|
|
// Parse a repetition. Once a repetition is encountered it will always match, so grammars
|
|
// should ensure that branches are differentiated prior to the repetition.
|
|
func (r *repetition) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
|
|
i := 0
|
|
for ; i < MaxIterations; i++ {
|
|
branch := ctx.Branch()
|
|
v, err := r.node.Parse(branch, parent)
|
|
out = append(out, v...)
|
|
if err != nil {
|
|
// Optional part failed to match.
|
|
if ctx.Stop(branch) {
|
|
return out, err
|
|
}
|
|
break
|
|
} else {
|
|
ctx.Accept(branch)
|
|
}
|
|
if v == nil {
|
|
break
|
|
}
|
|
}
|
|
if i >= MaxIterations {
|
|
t, _ := ctx.Peek(0)
|
|
panic(lexer.Errorf(t.Pos, "too many iterations of %s (> %d)", r, MaxIterations))
|
|
}
|
|
if out == nil {
|
|
out = []reflect.Value{}
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// Match a token literal exactly "..."[:<type>].
|
|
type literal struct {
|
|
s string
|
|
t rune
|
|
tt string // Used for display purposes - symbolic name of t.
|
|
}
|
|
|
|
func (l *literal) String() string { return stringer(l) }
|
|
|
|
func (l *literal) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
|
|
token, err := ctx.Peek(0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
equal := false // nolint: ineffassign
|
|
if ctx.caseInsensitive[token.Type] {
|
|
equal = strings.EqualFold(token.Value, l.s)
|
|
} else {
|
|
equal = token.Value == l.s
|
|
}
|
|
if equal && (l.t == -1 || l.t == token.Type) {
|
|
next, err := ctx.Next()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return []reflect.Value{reflect.ValueOf(next.Value)}, nil
|
|
}
|
|
return nil, nil
|
|
}
|
|
|
|
// Attempt to transform values to given type.
|
|
//
|
|
// This will dereference pointers, and attempt to parse strings into integer values, floats, etc.
|
|
func conform(t reflect.Type, values []reflect.Value) (out []reflect.Value, err error) {
|
|
for _, v := range values {
|
|
for t != v.Type() && t.Kind() == reflect.Ptr && v.Kind() != reflect.Ptr {
|
|
// This can occur during partial failure.
|
|
if !v.CanAddr() {
|
|
return
|
|
}
|
|
v = v.Addr()
|
|
}
|
|
|
|
// Already of the right kind, don't bother converting.
|
|
if v.Kind() == t.Kind() {
|
|
out = append(out, v)
|
|
continue
|
|
}
|
|
|
|
kind := t.Kind()
|
|
switch kind {
|
|
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
|
n, err := strconv.ParseInt(v.String(), 0, sizeOfKind(kind))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid integer %q: %s", v.String(), err)
|
|
}
|
|
v = reflect.New(t).Elem()
|
|
v.SetInt(n)
|
|
|
|
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
|
n, err := strconv.ParseUint(v.String(), 0, sizeOfKind(kind))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid integer %q: %s", v.String(), err)
|
|
}
|
|
v = reflect.New(t).Elem()
|
|
v.SetUint(n)
|
|
|
|
case reflect.Bool:
|
|
v = reflect.ValueOf(true)
|
|
|
|
case reflect.Float32, reflect.Float64:
|
|
n, err := strconv.ParseFloat(v.String(), sizeOfKind(kind))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid integer %q: %s", v.String(), err)
|
|
}
|
|
v = reflect.New(t).Elem()
|
|
v.SetFloat(n)
|
|
}
|
|
|
|
out = append(out, v)
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
func sizeOfKind(kind reflect.Kind) int {
|
|
switch kind {
|
|
case reflect.Int8, reflect.Uint8:
|
|
return 8
|
|
case reflect.Int16, reflect.Uint16:
|
|
return 16
|
|
case reflect.Int32, reflect.Uint32, reflect.Float32:
|
|
return 32
|
|
case reflect.Int64, reflect.Uint64, reflect.Float64:
|
|
return 64
|
|
case reflect.Int, reflect.Uint:
|
|
return strconv.IntSize
|
|
}
|
|
panic("unsupported kind " + kind.String())
|
|
}
|
|
|
|
// Set field.
|
|
//
|
|
// If field is a pointer the pointer will be set to the value. If field is a string, value will be
|
|
// appended. If field is a slice, value will be appended to slice.
|
|
//
|
|
// For all other types, an attempt will be made to convert the string to the corresponding
|
|
// type (int, float32, etc.).
|
|
func setField(pos lexer.Position, strct reflect.Value, field structLexerField, fieldValue []reflect.Value) (err error) { // nolint: gocyclo
|
|
defer decorate(&err, func() string { return pos.String() + ": " + strct.Type().String() + "." + field.Name })
|
|
|
|
f := strct.FieldByIndex(field.Index)
|
|
switch f.Kind() {
|
|
case reflect.Slice:
|
|
fieldValue, err = conform(f.Type().Elem(), fieldValue)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
f.Set(reflect.Append(f, fieldValue...))
|
|
return nil
|
|
|
|
case reflect.Ptr:
|
|
if f.IsNil() {
|
|
fv := reflect.New(f.Type().Elem()).Elem()
|
|
f.Set(fv.Addr())
|
|
f = fv
|
|
} else {
|
|
f = f.Elem()
|
|
}
|
|
}
|
|
|
|
if f.Kind() == reflect.Struct {
|
|
if pf := f.FieldByName("Pos"); pf.IsValid() && pf.Type() == positionType {
|
|
pf.Set(reflect.ValueOf(pos))
|
|
}
|
|
}
|
|
|
|
if f.CanAddr() {
|
|
if d, ok := f.Addr().Interface().(Capture); ok {
|
|
ifv := []string{}
|
|
for _, v := range fieldValue {
|
|
ifv = append(ifv, v.Interface().(string))
|
|
}
|
|
err := d.Capture(ifv)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// Strings concatenate all captured tokens.
|
|
if f.Kind() == reflect.String {
|
|
fieldValue, err = conform(f.Type(), fieldValue)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, v := range fieldValue {
|
|
f.Set(reflect.ValueOf(f.String() + v.String()).Convert(f.Type()))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Coalesce multiple tokens into one. This allows eg. ["-", "10"] to be captured as separate tokens but
|
|
// parsed as a single string "-10".
|
|
if len(fieldValue) > 1 {
|
|
out := []string{}
|
|
for _, v := range fieldValue {
|
|
out = append(out, v.String())
|
|
}
|
|
fieldValue = []reflect.Value{reflect.ValueOf(strings.Join(out, ""))}
|
|
}
|
|
|
|
fieldValue, err = conform(f.Type(), fieldValue)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
fv := fieldValue[0]
|
|
|
|
switch f.Kind() {
|
|
// Numeric types will increment if the token can not be coerced.
|
|
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
|
if fv.Type() != f.Type() {
|
|
f.SetInt(f.Int() + 1)
|
|
} else {
|
|
f.Set(fv)
|
|
}
|
|
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
|
if fv.Type() != f.Type() {
|
|
f.SetUint(f.Uint() + 1)
|
|
} else {
|
|
f.Set(fv)
|
|
}
|
|
|
|
case reflect.Float32, reflect.Float64:
|
|
if fv.Type() != f.Type() {
|
|
f.SetFloat(f.Float() + 1)
|
|
} else {
|
|
f.Set(fv)
|
|
}
|
|
|
|
case reflect.Bool, reflect.Struct:
|
|
if fv.Type() != f.Type() {
|
|
return fmt.Errorf("value %q is not correct type %s", fv, f.Type())
|
|
}
|
|
f.Set(fv)
|
|
|
|
default:
|
|
return fmt.Errorf("unsupported field type %s for field %s", f.Type(), field.Name)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Error is an error returned by the parser internally to differentiate from non-Participle errors.
|
|
type Error string
|
|
|
|
func (e Error) Error() string { return string(e) }
|