mirror of
https://github.com/minio/minio.git
synced 2024-12-27 15:45:55 -05:00
2786055df4
- New parser written from scratch, allows easier and complete parsing of the full S3 Select SQL syntax. Parser definition is directly provided by the AST defined for the SQL grammar. - Bring support to parse and interpret SQL involving JSON path expressions; evaluation of JSON path expressions will be subsequently added. - Bring automatic type inference and conversion for untyped values (e.g. CSV data).
230 lines
5.7 KiB
Go
230 lines
5.7 KiB
Go
package participle
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"reflect"
|
|
"strings"
|
|
|
|
"github.com/alecthomas/participle/lexer"
|
|
)
|
|
|
|
// A Parser for a particular grammar and lexer.
|
|
type Parser struct {
|
|
root node
|
|
lex lexer.Definition
|
|
typ reflect.Type
|
|
useLookahead int
|
|
caseInsensitive map[string]bool
|
|
mappers []mapperByToken
|
|
}
|
|
|
|
// MustBuild calls Build(grammar, options...) and panics if an error occurs.
|
|
func MustBuild(grammar interface{}, options ...Option) *Parser {
|
|
parser, err := Build(grammar, options...)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return parser
|
|
}
|
|
|
|
// Build constructs a parser for the given grammar.
|
|
//
|
|
// If "Lexer()" is not provided as an option, a default lexer based on text/scanner will be used. This scans typical Go-
|
|
// like tokens.
|
|
//
|
|
// See documentation for details
|
|
func Build(grammar interface{}, options ...Option) (parser *Parser, err error) {
|
|
// Configure Parser struct with defaults + options.
|
|
p := &Parser{
|
|
lex: lexer.TextScannerLexer,
|
|
caseInsensitive: map[string]bool{},
|
|
useLookahead: 1,
|
|
}
|
|
for _, option := range options {
|
|
if option == nil {
|
|
return nil, fmt.Errorf("nil Option passed, signature has changed; " +
|
|
"if you intended to provide a custom Lexer, try participle.Build(grammar, participle.Lexer(lexer))")
|
|
}
|
|
if err = option(p); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
if len(p.mappers) > 0 {
|
|
mappers := map[rune][]Mapper{}
|
|
symbols := p.lex.Symbols()
|
|
for _, mapper := range p.mappers {
|
|
if len(mapper.symbols) == 0 {
|
|
mappers[lexer.EOF] = append(mappers[lexer.EOF], mapper.mapper)
|
|
} else {
|
|
for _, symbol := range mapper.symbols {
|
|
if rn, ok := symbols[symbol]; !ok {
|
|
return nil, fmt.Errorf("mapper %#v uses unknown token %q", mapper, symbol)
|
|
} else { // nolint: golint
|
|
mappers[rn] = append(mappers[rn], mapper.mapper)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
p.lex = &mappingLexerDef{p.lex, func(t lexer.Token) (lexer.Token, error) {
|
|
combined := make([]Mapper, 0, len(mappers[t.Type])+len(mappers[lexer.EOF]))
|
|
combined = append(combined, mappers[lexer.EOF]...)
|
|
combined = append(combined, mappers[t.Type]...)
|
|
|
|
var err error
|
|
for _, m := range combined {
|
|
t, err = m(t)
|
|
if err != nil {
|
|
return t, err
|
|
}
|
|
}
|
|
return t, nil
|
|
}}
|
|
}
|
|
|
|
context := newGeneratorContext(p.lex)
|
|
v := reflect.ValueOf(grammar)
|
|
if v.Kind() == reflect.Interface {
|
|
v = v.Elem()
|
|
}
|
|
p.typ = v.Type()
|
|
p.root, err = context.parseType(p.typ)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return p, nil
|
|
}
|
|
|
|
// Lex uses the parser's lexer to tokenise input.
|
|
func (p *Parser) Lex(r io.Reader) ([]lexer.Token, error) {
|
|
lex, err := p.lex.Lex(r)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return lexer.ConsumeAll(lex)
|
|
}
|
|
|
|
// Parse from r into grammar v which must be of the same type as the grammar passed to
|
|
// participle.Build().
|
|
func (p *Parser) Parse(r io.Reader, v interface{}) (err error) {
|
|
rv := reflect.ValueOf(v)
|
|
if rv.Kind() == reflect.Interface {
|
|
rv = rv.Elem()
|
|
}
|
|
var stream reflect.Value
|
|
if rv.Kind() == reflect.Chan {
|
|
stream = rv
|
|
rt := rv.Type().Elem()
|
|
rv = reflect.New(rt).Elem()
|
|
}
|
|
rt := rv.Type()
|
|
if rt != p.typ {
|
|
return fmt.Errorf("must parse into value of type %s not %T", p.typ, v)
|
|
}
|
|
baseLexer, err := p.lex.Lex(r)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
lex := lexer.Upgrade(baseLexer)
|
|
caseInsensitive := map[rune]bool{}
|
|
for sym, rn := range p.lex.Symbols() {
|
|
if p.caseInsensitive[sym] {
|
|
caseInsensitive[rn] = true
|
|
}
|
|
}
|
|
ctx, err := newParseContext(lex, p.useLookahead, caseInsensitive)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// If the grammar implements Parseable, use it.
|
|
if parseable, ok := v.(Parseable); ok {
|
|
return p.rootParseable(ctx, parseable)
|
|
}
|
|
if rt.Kind() != reflect.Ptr || rt.Elem().Kind() != reflect.Struct {
|
|
return fmt.Errorf("target must be a pointer to a struct, not %s", rt)
|
|
}
|
|
if stream.IsValid() {
|
|
return p.parseStreaming(ctx, stream)
|
|
}
|
|
return p.parseOne(ctx, rv)
|
|
}
|
|
|
|
func (p *Parser) parseStreaming(ctx *parseContext, rv reflect.Value) error {
|
|
t := rv.Type().Elem().Elem()
|
|
for {
|
|
if token, _ := ctx.Peek(0); token.EOF() {
|
|
rv.Close()
|
|
return nil
|
|
}
|
|
v := reflect.New(t)
|
|
if err := p.parseInto(ctx, v); err != nil {
|
|
return err
|
|
}
|
|
rv.Send(v)
|
|
}
|
|
}
|
|
|
|
func (p *Parser) parseOne(ctx *parseContext, rv reflect.Value) error {
|
|
err := p.parseInto(ctx, rv)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
token, err := ctx.Peek(0)
|
|
if err != nil {
|
|
return err
|
|
} else if !token.EOF() {
|
|
return lexer.Errorf(token.Pos, "unexpected trailing token %q", token)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (p *Parser) parseInto(ctx *parseContext, rv reflect.Value) error {
|
|
if rv.IsNil() {
|
|
return fmt.Errorf("target must be a non-nil pointer to a struct, but is a nil %s", rv.Type())
|
|
}
|
|
pv, err := p.root.Parse(ctx, rv.Elem())
|
|
if len(pv) > 0 && pv[0].Type() == rv.Elem().Type() {
|
|
rv.Elem().Set(reflect.Indirect(pv[0]))
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if pv == nil {
|
|
token, _ := ctx.Peek(0)
|
|
return lexer.Errorf(token.Pos, "invalid syntax")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (p *Parser) rootParseable(lex lexer.PeekingLexer, parseable Parseable) error {
|
|
peek, err := lex.Peek(0)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = parseable.Parse(lex)
|
|
if err == NextMatch {
|
|
return lexer.Errorf(peek.Pos, "invalid syntax")
|
|
}
|
|
if err == nil && !peek.EOF() {
|
|
return lexer.Errorf(peek.Pos, "unexpected token %q", peek)
|
|
}
|
|
return err
|
|
}
|
|
|
|
// ParseString is a convenience around Parse().
|
|
func (p *Parser) ParseString(s string, v interface{}) error {
|
|
return p.Parse(strings.NewReader(s), v)
|
|
}
|
|
|
|
// ParseBytes is a convenience around Parse().
|
|
func (p *Parser) ParseBytes(b []byte, v interface{}) error {
|
|
return p.Parse(bytes.NewReader(b), v)
|
|
}
|
|
|
|
// String representation of the grammar.
|
|
func (p *Parser) String() string {
|
|
return stringern(p.root, 128)
|
|
}
|