mirror of
https://github.com/minio/minio.git
synced 2024-12-24 06:05:55 -05:00
Add JSON Path expression evaluation support (#7315)
- Includes support for FROM clause JSON path
This commit is contained in:
parent
b296b3cf8b
commit
e463386921
@ -20,6 +20,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/csv"
|
"encoding/csv"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/bcicen/jstream"
|
"github.com/bcicen/jstream"
|
||||||
@ -86,6 +87,16 @@ func (r *Record) MarshalJSON() ([]byte, error) {
|
|||||||
return json.Marshal(kvs)
|
return json.Marshal(kvs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Raw - returns the underlying data with format info.
|
||||||
|
func (r *Record) Raw() (sql.SelectObjectFormat, interface{}) {
|
||||||
|
return sql.SelectFmtCSV, r
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace - is not supported for CSV
|
||||||
|
func (r *Record) Replace(_ jstream.KVS) error {
|
||||||
|
return errors.New("Replace is not supported for CSV")
|
||||||
|
}
|
||||||
|
|
||||||
// NewRecord - creates new CSV record.
|
// NewRecord - creates new CSV record.
|
||||||
func NewRecord() *Record {
|
func NewRecord() *Record {
|
||||||
return &Record{}
|
return &Record{}
|
||||||
|
@ -17,7 +17,6 @@
|
|||||||
package json
|
package json
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
|
||||||
"io"
|
"io"
|
||||||
|
|
||||||
"github.com/minio/minio/pkg/s3select/sql"
|
"github.com/minio/minio/pkg/s3select/sql"
|
||||||
@ -43,24 +42,22 @@ func (r *Reader) Read() (sql.Record, error) {
|
|||||||
return nil, io.EOF
|
return nil, io.EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
var data []byte
|
var kvs jstream.KVS
|
||||||
var err error
|
|
||||||
|
|
||||||
if v.ValueType == jstream.Object {
|
if v.ValueType == jstream.Object {
|
||||||
data, err = json.Marshal(v.Value)
|
// This is a JSON object type (that preserves key
|
||||||
|
// order)
|
||||||
|
kvs = v.Value.(jstream.KVS)
|
||||||
} else {
|
} else {
|
||||||
// To be AWS S3 compatible Select for JSON needs to
|
// To be AWS S3 compatible Select for JSON needs to
|
||||||
// output non-object JSON as single column value
|
// output non-object JSON as single column value
|
||||||
// i.e. a map with `_1` as key and value as the
|
// i.e. a map with `_1` as key and value as the
|
||||||
// non-object.
|
// non-object.
|
||||||
data, err = json.Marshal(jstream.KVS{jstream.KV{Key: "_1", Value: v.Value}})
|
kvs = jstream.KVS{jstream.KV{Key: "_1", Value: v.Value}}
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return nil, errJSONParsingError(err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Record{
|
return &Record{
|
||||||
Data: data,
|
KVS: kvs,
|
||||||
|
SelectFormat: sql.SelectFmtJSON,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,7 +26,6 @@ import (
|
|||||||
|
|
||||||
"github.com/bcicen/jstream"
|
"github.com/bcicen/jstream"
|
||||||
"github.com/minio/minio/pkg/s3select/sql"
|
"github.com/minio/minio/pkg/s3select/sql"
|
||||||
"github.com/tidwall/gjson"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// RawJSON is a byte-slice that contains valid JSON
|
// RawJSON is a byte-slice that contains valid JSON
|
||||||
@ -40,34 +39,20 @@ func (b RawJSON) MarshalJSON() ([]byte, error) {
|
|||||||
|
|
||||||
// Record - is JSON record.
|
// Record - is JSON record.
|
||||||
type Record struct {
|
type Record struct {
|
||||||
// Used in Get()
|
|
||||||
Data []byte
|
|
||||||
|
|
||||||
// Used in Set(), Marshal*()
|
// Used in Set(), Marshal*()
|
||||||
kvs jstream.KVS
|
KVS jstream.KVS
|
||||||
|
|
||||||
|
SelectFormat sql.SelectObjectFormat
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get - gets the value for a column name.
|
// Get - gets the value for a column name.
|
||||||
func (r *Record) Get(name string) (*sql.Value, error) {
|
func (r *Record) Get(name string) (*sql.Value, error) {
|
||||||
result := gjson.GetBytes(r.Data, name)
|
// Get is implemented directly in the sql package.
|
||||||
switch result.Type {
|
return nil, errors.New("not implemented here")
|
||||||
case gjson.Null:
|
|
||||||
return sql.FromNull(), nil
|
|
||||||
case gjson.False:
|
|
||||||
return sql.FromBool(false), nil
|
|
||||||
case gjson.Number:
|
|
||||||
return sql.FromFloat(result.Float()), nil
|
|
||||||
case gjson.String:
|
|
||||||
return sql.FromString(result.String()), nil
|
|
||||||
case gjson.True:
|
|
||||||
return sql.FromBool(true), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, fmt.Errorf("unsupported gjson value %v; %v", result, result.Type)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set - sets the value for a column name.
|
// Set - sets the value for a column name.
|
||||||
func (r *Record) Set(name string, value *sql.Value) (err error) {
|
func (r *Record) Set(name string, value *sql.Value) error {
|
||||||
var v interface{}
|
var v interface{}
|
||||||
if b, ok := value.ToBool(); ok {
|
if b, ok := value.ToBool(); ok {
|
||||||
v = b
|
v = b
|
||||||
@ -88,14 +73,14 @@ func (r *Record) Set(name string, value *sql.Value) (err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
name = strings.Replace(name, "*", "__ALL__", -1)
|
name = strings.Replace(name, "*", "__ALL__", -1)
|
||||||
r.kvs = append(r.kvs, jstream.KV{Key: name, Value: v})
|
r.KVS = append(r.KVS, jstream.KV{Key: name, Value: v})
|
||||||
return err
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// MarshalCSV - encodes to CSV data.
|
// MarshalCSV - encodes to CSV data.
|
||||||
func (r *Record) MarshalCSV(fieldDelimiter rune) ([]byte, error) {
|
func (r *Record) MarshalCSV(fieldDelimiter rune) ([]byte, error) {
|
||||||
var csvRecord []string
|
var csvRecord []string
|
||||||
for _, kv := range r.kvs {
|
for _, kv := range r.KVS {
|
||||||
var columnValue string
|
var columnValue string
|
||||||
switch val := kv.Value.(type) {
|
switch val := kv.Value.(type) {
|
||||||
case bool, float64, int64, string:
|
case bool, float64, int64, string:
|
||||||
@ -125,14 +110,26 @@ func (r *Record) MarshalCSV(fieldDelimiter rune) ([]byte, error) {
|
|||||||
return data[:len(data)-1], nil
|
return data[:len(data)-1], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Raw - returns the underlying representation.
|
||||||
|
func (r *Record) Raw() (sql.SelectObjectFormat, interface{}) {
|
||||||
|
return r.SelectFormat, r.KVS
|
||||||
|
}
|
||||||
|
|
||||||
// MarshalJSON - encodes to JSON data.
|
// MarshalJSON - encodes to JSON data.
|
||||||
func (r *Record) MarshalJSON() ([]byte, error) {
|
func (r *Record) MarshalJSON() ([]byte, error) {
|
||||||
return json.Marshal(r.kvs)
|
return json.Marshal(r.KVS)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace the underlying buffer of json data.
|
||||||
|
func (r *Record) Replace(k jstream.KVS) error {
|
||||||
|
r.KVS = k
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewRecord - creates new empty JSON record.
|
// NewRecord - creates new empty JSON record.
|
||||||
func NewRecord() *Record {
|
func NewRecord(f sql.SelectObjectFormat) *Record {
|
||||||
return &Record{
|
return &Record{
|
||||||
Data: []byte("{}"),
|
KVS: jstream.KVS{},
|
||||||
|
SelectFormat: f,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,6 @@
|
|||||||
package parquet
|
package parquet
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
|
||||||
"io"
|
"io"
|
||||||
|
|
||||||
"github.com/bcicen/jstream"
|
"github.com/bcicen/jstream"
|
||||||
@ -75,11 +74,7 @@ func (r *Reader) Read() (rec sql.Record, rerr error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
parquetRecord.Range(f)
|
parquetRecord.Range(f)
|
||||||
data, err := json.Marshal(kvs)
|
return &jsonfmt.Record{KVS: kvs, SelectFormat: sql.SelectFmtParquet}, nil
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return &jsonfmt.Record{Data: data}, rerr
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close - closes underlaying readers.
|
// Close - closes underlaying readers.
|
||||||
|
@ -246,7 +246,7 @@ func (s3Select *S3Select) outputRecord() sql.Record {
|
|||||||
case csvFormat:
|
case csvFormat:
|
||||||
return csv.NewRecord()
|
return csv.NewRecord()
|
||||||
case jsonFormat:
|
case jsonFormat:
|
||||||
return json.NewRecord()
|
return json.NewRecord(sql.SelectFmtJSON)
|
||||||
}
|
}
|
||||||
|
|
||||||
panic(fmt.Errorf("unknown output format '%v'", s3Select.Output.format))
|
panic(fmt.Errorf("unknown output format '%v'", s3Select.Output.format))
|
||||||
@ -391,6 +391,10 @@ func (s3Select *S3Select) Evaluate(w http.ResponseWriter) {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if inputRecord, err = s3Select.statement.EvalFrom(s3Select.Input.format, inputRecord); err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
if s3Select.statement.IsAggregated() {
|
if s3Select.statement.IsAggregated() {
|
||||||
if err = s3Select.statement.AggregateRow(inputRecord); err != nil {
|
if err = s3Select.statement.AggregateRow(inputRecord); err != nil {
|
||||||
break
|
break
|
||||||
|
@ -16,6 +16,8 @@
|
|||||||
|
|
||||||
package sql
|
package sql
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
type s3Error struct {
|
type s3Error struct {
|
||||||
code string
|
code string
|
||||||
message string
|
message string
|
||||||
@ -91,7 +93,16 @@ func errQueryAnalysisFailure(err error) *s3Error {
|
|||||||
func errBadTableName(err error) *s3Error {
|
func errBadTableName(err error) *s3Error {
|
||||||
return &s3Error{
|
return &s3Error{
|
||||||
code: "BadTableName",
|
code: "BadTableName",
|
||||||
message: "The table name is not supported",
|
message: fmt.Sprintf("The table name is not supported: %v", err),
|
||||||
|
statusCode: 400,
|
||||||
|
cause: err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func errDataSource(err error) *s3Error {
|
||||||
|
return &s3Error{
|
||||||
|
code: "DataSourcePathUnsupported",
|
||||||
|
message: fmt.Sprintf("Data source: %v", err),
|
||||||
statusCode: 400,
|
statusCode: 400,
|
||||||
cause: err,
|
cause: err,
|
||||||
}
|
}
|
||||||
|
@ -17,8 +17,11 @@
|
|||||||
package sql
|
package sql
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/bcicen/jstream"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -319,7 +322,44 @@ func (e *JSONPath) evalNode(r Record) (*Value, error) {
|
|||||||
if len(ps) == 2 {
|
if len(ps) == 2 {
|
||||||
keypath = ps[1]
|
keypath = ps[1]
|
||||||
}
|
}
|
||||||
return r.Get(keypath)
|
objFmt, rawVal := r.Raw()
|
||||||
|
switch objFmt {
|
||||||
|
case SelectFmtJSON, SelectFmtParquet:
|
||||||
|
rowVal := rawVal.(jstream.KVS)
|
||||||
|
|
||||||
|
pathExpr := e.PathExpr
|
||||||
|
if len(pathExpr) == 0 {
|
||||||
|
pathExpr = []*JSONPathElement{{Key: &ObjectKey{ID: e.BaseKey}}}
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := jsonpathEval(pathExpr, rowVal)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch rval := result.(type) {
|
||||||
|
case string:
|
||||||
|
return FromString(rval), nil
|
||||||
|
case float64:
|
||||||
|
return FromFloat(rval), nil
|
||||||
|
case int64:
|
||||||
|
return FromInt(rval), nil
|
||||||
|
case bool:
|
||||||
|
return FromBool(rval), nil
|
||||||
|
case jstream.KVS, []interface{}:
|
||||||
|
bs, err := json.Marshal(result)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return FromBytes(bs), nil
|
||||||
|
case nil:
|
||||||
|
return FromNull(), nil
|
||||||
|
default:
|
||||||
|
return nil, errors.New("Unhandled value type")
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return r.Get(keypath)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *PrimaryTerm) evalNode(r Record) (res *Value, err error) {
|
func (e *PrimaryTerm) evalNode(r Record) (res *Value, err error) {
|
||||||
|
84
pkg/s3select/sql/jsondata/books.json
Normal file
84
pkg/s3select/sql/jsondata/books.json
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
{
|
||||||
|
"title": "Murder on the Orient Express",
|
||||||
|
"authorInfo": {
|
||||||
|
"name": "Agatha Christie",
|
||||||
|
"yearRange": [1890, 1976],
|
||||||
|
"penName": "Mary Westmacott"
|
||||||
|
},
|
||||||
|
"genre": "Crime novel",
|
||||||
|
"publicationHistory": [
|
||||||
|
{
|
||||||
|
"year": 1934,
|
||||||
|
"publisher": "Collins Crime Club (London)",
|
||||||
|
"type": "Hardcover",
|
||||||
|
"pages": 256
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": 1934,
|
||||||
|
"publisher": "Dodd Mead and Company (New York)",
|
||||||
|
"type": "Hardcover",
|
||||||
|
"pages": 302
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": 2011,
|
||||||
|
"publisher": "Harper Collins",
|
||||||
|
"type": "Paperback",
|
||||||
|
"pages": 265
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
{
|
||||||
|
"title": "The Robots of Dawn",
|
||||||
|
"authorInfo": {
|
||||||
|
"name": "Isaac Asimov",
|
||||||
|
"yearRange": [1920, 1992],
|
||||||
|
"penName": "Paul French"
|
||||||
|
},
|
||||||
|
"genre": "Science fiction",
|
||||||
|
"publicationHistory": [
|
||||||
|
{
|
||||||
|
"year": 1983,
|
||||||
|
"publisher": "Phantasia Press",
|
||||||
|
"type": "Hardcover",
|
||||||
|
"pages": 336
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": 1984,
|
||||||
|
"publisher": "Granada",
|
||||||
|
"type": "Hardcover",
|
||||||
|
"pages": 419
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": 2018,
|
||||||
|
"publisher": "Harper Voyager",
|
||||||
|
"type": "Paperback",
|
||||||
|
"pages": 432
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
{
|
||||||
|
"title": "Pigs Have Wings",
|
||||||
|
"authorInfo": {
|
||||||
|
"name": "P. G. Wodehouse",
|
||||||
|
"yearRange": [1881, 1975]
|
||||||
|
},
|
||||||
|
"genre": "Comic novel",
|
||||||
|
"publicationHistory": [
|
||||||
|
{
|
||||||
|
"year": 1952,
|
||||||
|
"publisher": "Doubleday & Company",
|
||||||
|
"type": "Hardcover"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": 2000,
|
||||||
|
"publisher": "Harry N. Abrams",
|
||||||
|
"type": "Hardcover"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"year": 2019,
|
||||||
|
"publisher": "Ulverscroft Collections",
|
||||||
|
"type": "Paperback",
|
||||||
|
"pages": 294
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
100
pkg/s3select/sql/jsonpath.go
Normal file
100
pkg/s3select/sql/jsonpath.go
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
/*
|
||||||
|
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package sql
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
"github.com/bcicen/jstream"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
errKeyLookup = errors.New("Cannot look up key in non-object value")
|
||||||
|
errIndexLookup = errors.New("Cannot look up array index in non-array value")
|
||||||
|
errWildcardObjectLookup = errors.New("Object wildcard used on non-object value")
|
||||||
|
errWildcardArrayLookup = errors.New("Array wildcard used on non-array value")
|
||||||
|
errWilcardObjectUsageInvalid = errors.New("Invalid usage of object wildcard")
|
||||||
|
)
|
||||||
|
|
||||||
|
func jsonpathEval(p []*JSONPathElement, v interface{}) (r interface{}, err error) {
|
||||||
|
// fmt.Printf("JPATHexpr: %v jsonobj: %v\n\n", p, v)
|
||||||
|
if len(p) == 0 || v == nil {
|
||||||
|
return v, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case p[0].Key != nil:
|
||||||
|
key := p[0].Key.keyString()
|
||||||
|
|
||||||
|
kvs, ok := v.(jstream.KVS)
|
||||||
|
if !ok {
|
||||||
|
return nil, errKeyLookup
|
||||||
|
}
|
||||||
|
for _, kv := range kvs {
|
||||||
|
if kv.Key == key {
|
||||||
|
return jsonpathEval(p[1:], kv.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Key not found - return nil result
|
||||||
|
return nil, nil
|
||||||
|
|
||||||
|
case p[0].Index != nil:
|
||||||
|
idx := *p[0].Index
|
||||||
|
|
||||||
|
arr, ok := v.([]interface{})
|
||||||
|
if !ok {
|
||||||
|
return nil, errIndexLookup
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx >= len(arr) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return jsonpathEval(p[1:], arr[idx])
|
||||||
|
|
||||||
|
case p[0].ObjectWildcard:
|
||||||
|
kvs, ok := v.(jstream.KVS)
|
||||||
|
if !ok {
|
||||||
|
return nil, errWildcardObjectLookup
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(p[1:]) > 0 {
|
||||||
|
return nil, errWilcardObjectUsageInvalid
|
||||||
|
}
|
||||||
|
|
||||||
|
return kvs, nil
|
||||||
|
|
||||||
|
case p[0].ArrayWildcard:
|
||||||
|
arr, ok := v.([]interface{})
|
||||||
|
if !ok {
|
||||||
|
return nil, errWildcardArrayLookup
|
||||||
|
}
|
||||||
|
|
||||||
|
// Lookup remainder of path in each array element and
|
||||||
|
// make result array.
|
||||||
|
var result []interface{}
|
||||||
|
for _, a := range arr {
|
||||||
|
rval, err := jsonpathEval(p[1:], a)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
result = append(result, rval)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
panic("cannot reach here")
|
||||||
|
}
|
96
pkg/s3select/sql/jsonpath_test.go
Normal file
96
pkg/s3select/sql/jsonpath_test.go
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
/*
|
||||||
|
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package sql
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/alecthomas/participle"
|
||||||
|
"github.com/bcicen/jstream"
|
||||||
|
)
|
||||||
|
|
||||||
|
func getJSONStructs(b []byte) ([]interface{}, error) {
|
||||||
|
dec := jstream.NewDecoder(bytes.NewBuffer(b), 0).ObjectAsKVS()
|
||||||
|
var result []interface{}
|
||||||
|
for parsedVal := range dec.Stream() {
|
||||||
|
result = append(result, parsedVal.Value)
|
||||||
|
}
|
||||||
|
if err := dec.Err(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestJsonpathEval(t *testing.T) {
|
||||||
|
f, err := os.Open(filepath.Join("jsondata", "books.json"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
b, err := ioutil.ReadAll(f)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
p := participle.MustBuild(
|
||||||
|
&JSONPath{},
|
||||||
|
participle.Lexer(sqlLexer),
|
||||||
|
participle.CaseInsensitive("Keyword"),
|
||||||
|
)
|
||||||
|
cases := []struct {
|
||||||
|
str string
|
||||||
|
res []interface{}
|
||||||
|
}{
|
||||||
|
{"s.title", []interface{}{"Murder on the Orient Express", "The Robots of Dawn", "Pigs Have Wings"}},
|
||||||
|
{"s.authorInfo.yearRange", []interface{}{[]interface{}{1890.0, 1976.0}, []interface{}{1920.0, 1992.0}, []interface{}{1881.0, 1975.0}}},
|
||||||
|
{"s.authorInfo.name", []interface{}{"Agatha Christie", "Isaac Asimov", "P. G. Wodehouse"}},
|
||||||
|
{"s.authorInfo.yearRange[0]", []interface{}{1890.0, 1920.0, 1881.0}},
|
||||||
|
{"s.publicationHistory[0].pages", []interface{}{256.0, 336.0, nil}},
|
||||||
|
}
|
||||||
|
for i, tc := range cases {
|
||||||
|
jp := JSONPath{}
|
||||||
|
err := p.ParseString(tc.str, &jp)
|
||||||
|
// fmt.Println(jp)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("parse failed!: %d %v %s", i, err, tc)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read only the first json object from the file
|
||||||
|
recs, err := getJSONStructs(b)
|
||||||
|
if err != nil || len(recs) != 3 {
|
||||||
|
t.Fatalf("%v or length was not 3", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for j, rec := range recs {
|
||||||
|
// fmt.Println(rec)
|
||||||
|
r, err := jsonpathEval(jp.PathExpr, rec)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Error: %d %d %v", i, j, err)
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(r, tc.res[j]) {
|
||||||
|
fmt.Printf("%#v (%v) != %v (%v)\n", r, reflect.TypeOf(r), tc.res[j], reflect.TypeOf(tc.res[j]))
|
||||||
|
t.Errorf("case: %d %d failed", i, j)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -57,7 +57,7 @@ type ObjectKey struct {
|
|||||||
// quoted.
|
// quoted.
|
||||||
type QuotedIdentifier string
|
type QuotedIdentifier string
|
||||||
|
|
||||||
// Capture inferface used by participle
|
// Capture interface used by participle
|
||||||
func (qi *QuotedIdentifier) Capture(values []string) error {
|
func (qi *QuotedIdentifier) Capture(values []string) error {
|
||||||
// Remove enclosing quotes
|
// Remove enclosing quotes
|
||||||
n := len(values[0])
|
n := len(values[0])
|
||||||
@ -74,8 +74,8 @@ func (qi *QuotedIdentifier) Capture(values []string) error {
|
|||||||
type Select struct {
|
type Select struct {
|
||||||
Expression *SelectExpression `parser:"\"SELECT\" @@"`
|
Expression *SelectExpression `parser:"\"SELECT\" @@"`
|
||||||
From *TableExpression `parser:"\"FROM\" @@"`
|
From *TableExpression `parser:"\"FROM\" @@"`
|
||||||
Where *Expression `parser:"[ \"WHERE\" @@ ]"`
|
Where *Expression `parser:"( \"WHERE\" @@ )?"`
|
||||||
Limit *LitValue `parser:"[ \"LIMIT\" @@ ]"`
|
Limit *LitValue `parser:"( \"LIMIT\" @@ )?"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// SelectExpression represents the items requested in the select
|
// SelectExpression represents the items requested in the select
|
||||||
@ -94,7 +94,7 @@ type TableExpression struct {
|
|||||||
// JSONPathElement represents a keypath component
|
// JSONPathElement represents a keypath component
|
||||||
type JSONPathElement struct {
|
type JSONPathElement struct {
|
||||||
Key *ObjectKey `parser:" @@"` // ['name'] and .name forms
|
Key *ObjectKey `parser:" @@"` // ['name'] and .name forms
|
||||||
Index *uint64 `parser:"| \"[\" @Number \"]\""` // [3] form
|
Index *int `parser:"| \"[\" @Number \"]\""` // [3] form
|
||||||
ObjectWildcard bool `parser:"| @\".*\""` // .* form
|
ObjectWildcard bool `parser:"| @\".*\""` // .* form
|
||||||
ArrayWildcard bool `parser:"| @\"[*]\""` // [*] form
|
ArrayWildcard bool `parser:"| @\"[*]\""` // [*] form
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,7 @@ func TestJSONPathElement(t *testing.T) {
|
|||||||
&JSONPathElement{},
|
&JSONPathElement{},
|
||||||
participle.Lexer(sqlLexer),
|
participle.Lexer(sqlLexer),
|
||||||
participle.CaseInsensitive("Keyword"),
|
participle.CaseInsensitive("Keyword"),
|
||||||
|
participle.CaseInsensitive("Timeword"),
|
||||||
)
|
)
|
||||||
|
|
||||||
j := JSONPathElement{}
|
j := JSONPathElement{}
|
||||||
@ -59,6 +60,7 @@ func TestJSONPath(t *testing.T) {
|
|||||||
&JSONPath{},
|
&JSONPath{},
|
||||||
participle.Lexer(sqlLexer),
|
participle.Lexer(sqlLexer),
|
||||||
participle.CaseInsensitive("Keyword"),
|
participle.CaseInsensitive("Keyword"),
|
||||||
|
participle.CaseInsensitive("Timeword"),
|
||||||
)
|
)
|
||||||
|
|
||||||
j := JSONPath{}
|
j := JSONPath{}
|
||||||
|
@ -16,10 +16,32 @@
|
|||||||
|
|
||||||
package sql
|
package sql
|
||||||
|
|
||||||
|
import "github.com/bcicen/jstream"
|
||||||
|
|
||||||
|
// SelectObjectFormat specifies the format of the underlying data
|
||||||
|
type SelectObjectFormat int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// SelectFmtUnknown - unknown format (default value)
|
||||||
|
SelectFmtUnknown SelectObjectFormat = iota
|
||||||
|
// SelectFmtCSV - CSV format
|
||||||
|
SelectFmtCSV
|
||||||
|
// SelectFmtJSON - JSON format
|
||||||
|
SelectFmtJSON
|
||||||
|
// SelectFmtParquet - Parquet format
|
||||||
|
SelectFmtParquet
|
||||||
|
)
|
||||||
|
|
||||||
// Record - is a type containing columns and their values.
|
// Record - is a type containing columns and their values.
|
||||||
type Record interface {
|
type Record interface {
|
||||||
Get(name string) (*Value, error)
|
Get(name string) (*Value, error)
|
||||||
Set(name string, value *Value) error
|
Set(name string, value *Value) error
|
||||||
MarshalCSV(fieldDelimiter rune) ([]byte, error)
|
MarshalCSV(fieldDelimiter rune) ([]byte, error)
|
||||||
MarshalJSON() ([]byte, error)
|
MarshalJSON() ([]byte, error)
|
||||||
|
|
||||||
|
// Returns underlying representation
|
||||||
|
Raw() (SelectObjectFormat, interface{})
|
||||||
|
|
||||||
|
// Replaces the underlying data
|
||||||
|
Replace(k jstream.KVS) error
|
||||||
}
|
}
|
||||||
|
@ -20,12 +20,18 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/bcicen/jstream"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
errBadLimitSpecified = errors.New("Limit value must be a positive integer")
|
errBadLimitSpecified = errors.New("Limit value must be a positive integer")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
baseTableName = "s3object"
|
||||||
|
)
|
||||||
|
|
||||||
// SelectStatement is the top level parsed and analyzed structure
|
// SelectStatement is the top level parsed and analyzed structure
|
||||||
type SelectStatement struct {
|
type SelectStatement struct {
|
||||||
selectAST *Select
|
selectAST *Select
|
||||||
@ -74,9 +80,8 @@ func ParseSelectStatement(s string) (stmt SelectStatement, err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Validate table name
|
// Validate table name
|
||||||
tableString := strings.ToLower(selectAST.From.Table.String())
|
err = validateTableName(selectAST.From)
|
||||||
if !strings.HasPrefix(tableString, "s3object.") && tableString != "s3object" {
|
if err != nil {
|
||||||
err = errBadTableName(errors.New("Table name must be s3object"))
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -89,6 +94,19 @@ func ParseSelectStatement(s string) (stmt SelectStatement, err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func validateTableName(from *TableExpression) error {
|
||||||
|
if strings.ToLower(from.Table.BaseKey.String()) != baseTableName {
|
||||||
|
return errBadTableName(errors.New("table name must be `s3object`"))
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(from.Table.PathExpr) > 0 {
|
||||||
|
if !from.Table.PathExpr[0].ArrayWildcard {
|
||||||
|
return errBadTableName(errors.New("keypath table name is invalid - please check the service documentation"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func parseLimit(v *LitValue) (int64, error) {
|
func parseLimit(v *LitValue) (int64, error) {
|
||||||
switch {
|
switch {
|
||||||
case v == nil:
|
case v == nil:
|
||||||
@ -104,6 +122,41 @@ func parseLimit(v *LitValue) (int64, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EvalFrom evaluates the From clause on the input record. It only
|
||||||
|
// applies to JSON input data format (currently).
|
||||||
|
func (e *SelectStatement) EvalFrom(format string, input Record) (Record, error) {
|
||||||
|
if e.selectAST.From.HasKeypath() {
|
||||||
|
if format == "json" {
|
||||||
|
objFmt, rawVal := input.Raw()
|
||||||
|
if objFmt != SelectFmtJSON {
|
||||||
|
return nil, errDataSource(errors.New("unexpected non JSON input"))
|
||||||
|
}
|
||||||
|
|
||||||
|
jsonRec := rawVal.(jstream.KVS)
|
||||||
|
txedRec, err := jsonpathEval(e.selectAST.From.Table.PathExpr[1:], jsonRec)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var kvs jstream.KVS
|
||||||
|
switch v := txedRec.(type) {
|
||||||
|
case jstream.KVS:
|
||||||
|
kvs = v
|
||||||
|
default:
|
||||||
|
kvs = jstream.KVS{jstream.KV{Key: "_1", Value: v}}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = input.Replace(kvs); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return input, nil
|
||||||
|
}
|
||||||
|
return nil, errDataSource(errors.New("path not supported"))
|
||||||
|
}
|
||||||
|
return input, nil
|
||||||
|
}
|
||||||
|
|
||||||
// IsAggregated returns if the statement involves SQL aggregation
|
// IsAggregated returns if the statement involves SQL aggregation
|
||||||
func (e *SelectStatement) IsAggregated() bool {
|
func (e *SelectStatement) IsAggregated() bool {
|
||||||
return e.selectQProp.isAggregation
|
return e.selectQProp.isAggregation
|
||||||
@ -164,12 +217,10 @@ func (e *SelectStatement) AggregateRow(input Record) error {
|
|||||||
// applies only to non-aggregation queries.
|
// applies only to non-aggregation queries.
|
||||||
func (e *SelectStatement) Eval(input, output Record) (Record, error) {
|
func (e *SelectStatement) Eval(input, output Record) (Record, error) {
|
||||||
ok, err := e.isPassingWhereClause(input)
|
ok, err := e.isPassingWhereClause(input)
|
||||||
if err != nil {
|
if err != nil || !ok {
|
||||||
|
// Either error or row did not pass where clause
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if !ok {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if e.selectAST.Expression.All {
|
if e.selectAST.Expression.All {
|
||||||
// Return the input record for `SELECT * FROM
|
// Return the input record for `SELECT * FROM
|
||||||
|
@ -62,6 +62,13 @@ func (o *ObjectKey) String() string {
|
|||||||
return fmt.Sprintf(".%s", o.ID.String())
|
return fmt.Sprintf(".%s", o.ID.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (o *ObjectKey) keyString() string {
|
||||||
|
if o.Lit != nil {
|
||||||
|
return string(*o.Lit)
|
||||||
|
}
|
||||||
|
return o.ID.String()
|
||||||
|
}
|
||||||
|
|
||||||
// getLastKeypathComponent checks if the given expression is a path
|
// getLastKeypathComponent checks if the given expression is a path
|
||||||
// expression, and if so extracts the last dot separated component of
|
// expression, and if so extracts the last dot separated component of
|
||||||
// the path. Otherwise it returns false.
|
// the path. Otherwise it returns false.
|
||||||
@ -81,7 +88,19 @@ func getLastKeypathComponent(e *Expression) (string, bool) {
|
|||||||
return "", false
|
return "", false
|
||||||
}
|
}
|
||||||
|
|
||||||
keypath := operand.Left.Left.Primary.JPathExpr.String()
|
// Check if path expression ends in a key
|
||||||
ps := strings.Split(keypath, ".")
|
jpath := operand.Left.Left.Primary.JPathExpr
|
||||||
|
n := len(jpath.PathExpr)
|
||||||
|
if n > 0 && jpath.PathExpr[n-1].Key == nil {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
ps := strings.Split(jpath.String(), ".")
|
||||||
return ps[len(ps)-1], true
|
return ps[len(ps)-1], true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HasKeypath returns if the from clause has a key path -
|
||||||
|
// e.g. S3object[*].id
|
||||||
|
func (from *TableExpression) HasKeypath() bool {
|
||||||
|
return len(from.Table.PathExpr) > 1
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user