Aditya Manthramurthy 2786055df4 Add new SQL parser to support S3 Select syntax (#7102)
- New parser written from scratch, allows easier and complete parsing
  of the full S3 Select SQL syntax. Parser definition is directly
  provided by the AST defined for the SQL grammar.

- Bring support to parse and interpret SQL involving JSON path
  expressions; evaluation of JSON path expressions will be
  subsequently added.

- Bring automatic type inference and conversion for untyped
  values (e.g. CSV data).
2019-01-28 17:59:48 -08:00

230 lines
5.7 KiB
Go

package participle
import (
"bytes"
"fmt"
"io"
"reflect"
"strings"
"github.com/alecthomas/participle/lexer"
)
// A Parser for a particular grammar and lexer.
type Parser struct {
root node
lex lexer.Definition
typ reflect.Type
useLookahead int
caseInsensitive map[string]bool
mappers []mapperByToken
}
// MustBuild calls Build(grammar, options...) and panics if an error occurs.
func MustBuild(grammar interface{}, options ...Option) *Parser {
parser, err := Build(grammar, options...)
if err != nil {
panic(err)
}
return parser
}
// Build constructs a parser for the given grammar.
//
// If "Lexer()" is not provided as an option, a default lexer based on text/scanner will be used. This scans typical Go-
// like tokens.
//
// See documentation for details
func Build(grammar interface{}, options ...Option) (parser *Parser, err error) {
// Configure Parser struct with defaults + options.
p := &Parser{
lex: lexer.TextScannerLexer,
caseInsensitive: map[string]bool{},
useLookahead: 1,
}
for _, option := range options {
if option == nil {
return nil, fmt.Errorf("nil Option passed, signature has changed; " +
"if you intended to provide a custom Lexer, try participle.Build(grammar, participle.Lexer(lexer))")
}
if err = option(p); err != nil {
return nil, err
}
}
if len(p.mappers) > 0 {
mappers := map[rune][]Mapper{}
symbols := p.lex.Symbols()
for _, mapper := range p.mappers {
if len(mapper.symbols) == 0 {
mappers[lexer.EOF] = append(mappers[lexer.EOF], mapper.mapper)
} else {
for _, symbol := range mapper.symbols {
if rn, ok := symbols[symbol]; !ok {
return nil, fmt.Errorf("mapper %#v uses unknown token %q", mapper, symbol)
} else { // nolint: golint
mappers[rn] = append(mappers[rn], mapper.mapper)
}
}
}
}
p.lex = &mappingLexerDef{p.lex, func(t lexer.Token) (lexer.Token, error) {
combined := make([]Mapper, 0, len(mappers[t.Type])+len(mappers[lexer.EOF]))
combined = append(combined, mappers[lexer.EOF]...)
combined = append(combined, mappers[t.Type]...)
var err error
for _, m := range combined {
t, err = m(t)
if err != nil {
return t, err
}
}
return t, nil
}}
}
context := newGeneratorContext(p.lex)
v := reflect.ValueOf(grammar)
if v.Kind() == reflect.Interface {
v = v.Elem()
}
p.typ = v.Type()
p.root, err = context.parseType(p.typ)
if err != nil {
return nil, err
}
return p, nil
}
// Lex uses the parser's lexer to tokenise input.
func (p *Parser) Lex(r io.Reader) ([]lexer.Token, error) {
lex, err := p.lex.Lex(r)
if err != nil {
return nil, err
}
return lexer.ConsumeAll(lex)
}
// Parse from r into grammar v which must be of the same type as the grammar passed to
// participle.Build().
func (p *Parser) Parse(r io.Reader, v interface{}) (err error) {
rv := reflect.ValueOf(v)
if rv.Kind() == reflect.Interface {
rv = rv.Elem()
}
var stream reflect.Value
if rv.Kind() == reflect.Chan {
stream = rv
rt := rv.Type().Elem()
rv = reflect.New(rt).Elem()
}
rt := rv.Type()
if rt != p.typ {
return fmt.Errorf("must parse into value of type %s not %T", p.typ, v)
}
baseLexer, err := p.lex.Lex(r)
if err != nil {
return err
}
lex := lexer.Upgrade(baseLexer)
caseInsensitive := map[rune]bool{}
for sym, rn := range p.lex.Symbols() {
if p.caseInsensitive[sym] {
caseInsensitive[rn] = true
}
}
ctx, err := newParseContext(lex, p.useLookahead, caseInsensitive)
if err != nil {
return err
}
// If the grammar implements Parseable, use it.
if parseable, ok := v.(Parseable); ok {
return p.rootParseable(ctx, parseable)
}
if rt.Kind() != reflect.Ptr || rt.Elem().Kind() != reflect.Struct {
return fmt.Errorf("target must be a pointer to a struct, not %s", rt)
}
if stream.IsValid() {
return p.parseStreaming(ctx, stream)
}
return p.parseOne(ctx, rv)
}
func (p *Parser) parseStreaming(ctx *parseContext, rv reflect.Value) error {
t := rv.Type().Elem().Elem()
for {
if token, _ := ctx.Peek(0); token.EOF() {
rv.Close()
return nil
}
v := reflect.New(t)
if err := p.parseInto(ctx, v); err != nil {
return err
}
rv.Send(v)
}
}
func (p *Parser) parseOne(ctx *parseContext, rv reflect.Value) error {
err := p.parseInto(ctx, rv)
if err != nil {
return err
}
token, err := ctx.Peek(0)
if err != nil {
return err
} else if !token.EOF() {
return lexer.Errorf(token.Pos, "unexpected trailing token %q", token)
}
return nil
}
func (p *Parser) parseInto(ctx *parseContext, rv reflect.Value) error {
if rv.IsNil() {
return fmt.Errorf("target must be a non-nil pointer to a struct, but is a nil %s", rv.Type())
}
pv, err := p.root.Parse(ctx, rv.Elem())
if len(pv) > 0 && pv[0].Type() == rv.Elem().Type() {
rv.Elem().Set(reflect.Indirect(pv[0]))
}
if err != nil {
return err
}
if pv == nil {
token, _ := ctx.Peek(0)
return lexer.Errorf(token.Pos, "invalid syntax")
}
return nil
}
func (p *Parser) rootParseable(lex lexer.PeekingLexer, parseable Parseable) error {
peek, err := lex.Peek(0)
if err != nil {
return err
}
err = parseable.Parse(lex)
if err == NextMatch {
return lexer.Errorf(peek.Pos, "invalid syntax")
}
if err == nil && !peek.EOF() {
return lexer.Errorf(peek.Pos, "unexpected token %q", peek)
}
return err
}
// ParseString is a convenience around Parse().
func (p *Parser) ParseString(s string, v interface{}) error {
return p.Parse(strings.NewReader(s), v)
}
// ParseBytes is a convenience around Parse().
func (p *Parser) ParseBytes(b []byte, v interface{}) error {
return p.Parse(bytes.NewReader(b), v)
}
// String representation of the grammar.
func (p *Parser) String() string {
return stringern(p.root, 128)
}