mirror of https://github.com/minio/minio.git
SIMDJSON S3 select input (#8401)
This commit is contained in:
parent
d1144c2c7e
commit
e4020fb41f
5
go.mod
5
go.mod
|
@ -50,8 +50,8 @@ require (
|
||||||
github.com/inconshreveable/go-update v0.0.0-20160112193335-8152e7eb6ccf
|
github.com/inconshreveable/go-update v0.0.0-20160112193335-8152e7eb6ccf
|
||||||
github.com/jonboulle/clockwork v0.1.0 // indirect
|
github.com/jonboulle/clockwork v0.1.0 // indirect
|
||||||
github.com/json-iterator/go v1.1.7
|
github.com/json-iterator/go v1.1.7
|
||||||
github.com/klauspost/compress v1.9.7
|
github.com/klauspost/compress v1.9.8
|
||||||
github.com/klauspost/cpuid v1.2.2 // indirect
|
github.com/klauspost/cpuid v1.2.2
|
||||||
github.com/klauspost/pgzip v1.2.1
|
github.com/klauspost/pgzip v1.2.1
|
||||||
github.com/klauspost/readahead v1.3.1
|
github.com/klauspost/readahead v1.3.1
|
||||||
github.com/klauspost/reedsolomon v1.9.3
|
github.com/klauspost/reedsolomon v1.9.3
|
||||||
|
@ -70,6 +70,7 @@ require (
|
||||||
github.com/minio/minio-go/v6 v6.0.45
|
github.com/minio/minio-go/v6 v6.0.45
|
||||||
github.com/minio/parquet-go v0.0.0-20200125064549-a1e49702e174
|
github.com/minio/parquet-go v0.0.0-20200125064549-a1e49702e174
|
||||||
github.com/minio/sha256-simd v0.1.1
|
github.com/minio/sha256-simd v0.1.1
|
||||||
|
github.com/minio/simdjson-go v0.1.2
|
||||||
github.com/minio/sio v0.2.0
|
github.com/minio/sio v0.2.0
|
||||||
github.com/mitchellh/go-homedir v1.1.0
|
github.com/mitchellh/go-homedir v1.1.0
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||||
|
|
8
go.sum
8
go.sum
|
@ -224,6 +224,8 @@ github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0
|
||||||
github.com/klauspost/compress v1.9.4/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
github.com/klauspost/compress v1.9.4/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||||
github.com/klauspost/compress v1.9.7 h1:hYW1gP94JUmAhBtJ+LNz5My+gBobDxPR1iVuKug26aA=
|
github.com/klauspost/compress v1.9.7 h1:hYW1gP94JUmAhBtJ+LNz5My+gBobDxPR1iVuKug26aA=
|
||||||
github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
github.com/klauspost/compress v1.9.7/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||||
|
github.com/klauspost/compress v1.9.8 h1:VMAMUUOh+gaxKTMk+zqbjsSjsIcUcL/LF4o63i82QyA=
|
||||||
|
github.com/klauspost/compress v1.9.8/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||||
github.com/klauspost/cpuid v1.2.2 h1:1xAgYebNnsb9LKCdLOvFWtAxGU/33mjJtyOVbmUa0Us=
|
github.com/klauspost/cpuid v1.2.2 h1:1xAgYebNnsb9LKCdLOvFWtAxGU/33mjJtyOVbmUa0Us=
|
||||||
github.com/klauspost/cpuid v1.2.2/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
github.com/klauspost/cpuid v1.2.2/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||||
github.com/klauspost/pgzip v1.2.1 h1:oIPZROsWuPHpOdMVWLuJZXwgjhrW8r1yEX8UqMyeNHM=
|
github.com/klauspost/pgzip v1.2.1 h1:oIPZROsWuPHpOdMVWLuJZXwgjhrW8r1yEX8UqMyeNHM=
|
||||||
|
@ -278,6 +280,12 @@ github.com/minio/parquet-go v0.0.0-20200125064549-a1e49702e174 h1:WYFHZIJ5LTWd4C
|
||||||
github.com/minio/parquet-go v0.0.0-20200125064549-a1e49702e174/go.mod h1:PXYM9yI2l0YPmxHUXe6mFTmkQcyaVasDshAPTbGpDoo=
|
github.com/minio/parquet-go v0.0.0-20200125064549-a1e49702e174/go.mod h1:PXYM9yI2l0YPmxHUXe6mFTmkQcyaVasDshAPTbGpDoo=
|
||||||
github.com/minio/sha256-simd v0.1.1 h1:5QHSlgo3nt5yKOJrC7W8w7X+NFl8cMPZm96iu8kKUJU=
|
github.com/minio/sha256-simd v0.1.1 h1:5QHSlgo3nt5yKOJrC7W8w7X+NFl8cMPZm96iu8kKUJU=
|
||||||
github.com/minio/sha256-simd v0.1.1/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM=
|
github.com/minio/sha256-simd v0.1.1/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM=
|
||||||
|
github.com/minio/simdjson-go v0.1.0 h1:4QbJlXkE6lIzwzFP63e1aExJxDI6certBM0YKNPVm3g=
|
||||||
|
github.com/minio/simdjson-go v0.1.0/go.mod h1:ZAVJ8HB/LLrnk+nt4+nYd9kTAdRKGZdvW/x5Jelgevk=
|
||||||
|
github.com/minio/simdjson-go v0.1.1 h1:m1e/lgS21pUtACcNn6j6juiZetOBaM/FjrwW/pehg14=
|
||||||
|
github.com/minio/simdjson-go v0.1.1/go.mod h1:ZAVJ8HB/LLrnk+nt4+nYd9kTAdRKGZdvW/x5Jelgevk=
|
||||||
|
github.com/minio/simdjson-go v0.1.2 h1:ZKMpcX+qscSGcy3ZG9uBeJoNbYNZkwCpVyj7pWJLXQQ=
|
||||||
|
github.com/minio/simdjson-go v0.1.2/go.mod h1:ZAVJ8HB/LLrnk+nt4+nYd9kTAdRKGZdvW/x5Jelgevk=
|
||||||
github.com/minio/sio v0.2.0 h1:NCRCFLx0r5pRbXf65LVNjxbCGZgNQvNFQkgX3XF4BoA=
|
github.com/minio/sio v0.2.0 h1:NCRCFLx0r5pRbXf65LVNjxbCGZgNQvNFQkgX3XF4BoA=
|
||||||
github.com/minio/sio v0.2.0/go.mod h1:nKM5GIWSrqbOZp0uhyj6M1iA0X6xQzSGtYSaTKSCut0=
|
github.com/minio/sio v0.2.0/go.mod h1:nKM5GIWSrqbOZp0uhyj6M1iA0X6xQzSGtYSaTKSCut0=
|
||||||
github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc=
|
github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc=
|
||||||
|
|
|
@ -55,10 +55,10 @@ func (r *Record) Get(name string) (*sql.Value, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set - sets the value for a column name.
|
// Set - sets the value for a column name.
|
||||||
func (r *Record) Set(name string, value *sql.Value) error {
|
func (r *Record) Set(name string, value *sql.Value) (sql.Record, error) {
|
||||||
r.columnNames = append(r.columnNames, name)
|
r.columnNames = append(r.columnNames, name)
|
||||||
r.csvRecord = append(r.csvRecord, value.CSVString())
|
r.csvRecord = append(r.csvRecord, value.CSVString())
|
||||||
return nil
|
return r, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset data in record.
|
// Reset data in record.
|
||||||
|
@ -121,7 +121,7 @@ func (r *Record) Raw() (sql.SelectObjectFormat, interface{}) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Replace - is not supported for CSV
|
// Replace - is not supported for CSV
|
||||||
func (r *Record) Replace(_ jstream.KVS) error {
|
func (r *Record) Replace(_ interface{}) error {
|
||||||
return errors.New("Replace is not supported for CSV")
|
return errors.New("Replace is not supported for CSV")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,8 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"math"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/bcicen/jstream"
|
"github.com/bcicen/jstream"
|
||||||
|
@ -72,7 +74,7 @@ func (r *Record) Clone(dst sql.Record) sql.Record {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set - sets the value for a column name.
|
// Set - sets the value for a column name.
|
||||||
func (r *Record) Set(name string, value *sql.Value) error {
|
func (r *Record) Set(name string, value *sql.Value) (sql.Record, error) {
|
||||||
var v interface{}
|
var v interface{}
|
||||||
if b, ok := value.ToBool(); ok {
|
if b, ok := value.ToBool(); ok {
|
||||||
v = b
|
v = b
|
||||||
|
@ -97,12 +99,12 @@ func (r *Record) Set(name string, value *sql.Value) error {
|
||||||
} else if arr, ok := value.ToArray(); ok {
|
} else if arr, ok := value.ToArray(); ok {
|
||||||
v = arr
|
v = arr
|
||||||
} else {
|
} else {
|
||||||
return fmt.Errorf("unsupported sql value %v and type %v", value, value.GetTypeString())
|
return nil, fmt.Errorf("unsupported sql value %v and type %v", value, value.GetTypeString())
|
||||||
}
|
}
|
||||||
|
|
||||||
name = strings.Replace(name, "*", "__ALL__", -1)
|
name = strings.Replace(name, "*", "__ALL__", -1)
|
||||||
r.KVS = append(r.KVS, jstream.KV{Key: name, Value: v})
|
r.KVS = append(r.KVS, jstream.KV{Key: name, Value: v})
|
||||||
return nil
|
return r, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// WriteCSV - encodes to CSV data.
|
// WriteCSV - encodes to CSV data.
|
||||||
|
@ -111,7 +113,11 @@ func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune) error {
|
||||||
for _, kv := range r.KVS {
|
for _, kv := range r.KVS {
|
||||||
var columnValue string
|
var columnValue string
|
||||||
switch val := kv.Value.(type) {
|
switch val := kv.Value.(type) {
|
||||||
case bool, float64, int64, string:
|
case float64:
|
||||||
|
columnValue = jsonFloat(val)
|
||||||
|
case string:
|
||||||
|
columnValue = val
|
||||||
|
case bool, int64:
|
||||||
columnValue = fmt.Sprintf("%v", val)
|
columnValue = fmt.Sprintf("%v", val)
|
||||||
case nil:
|
case nil:
|
||||||
columnValue = ""
|
columnValue = ""
|
||||||
|
@ -153,8 +159,12 @@ func (r *Record) WriteJSON(writer io.Writer) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Replace the underlying buffer of json data.
|
// Replace the underlying buffer of json data.
|
||||||
func (r *Record) Replace(k jstream.KVS) error {
|
func (r *Record) Replace(k interface{}) error {
|
||||||
r.KVS = k
|
v, ok := k.(jstream.KVS)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("cannot replace internal data in json record with type %T", k)
|
||||||
|
}
|
||||||
|
r.KVS = v
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -165,3 +175,32 @@ func NewRecord(f sql.SelectObjectFormat) *Record {
|
||||||
SelectFormat: f,
|
SelectFormat: f,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// jsonFloat converts a float to string similar to Go stdlib formats json floats.
|
||||||
|
func jsonFloat(f float64) string {
|
||||||
|
var tmp [32]byte
|
||||||
|
dst := tmp[:0]
|
||||||
|
|
||||||
|
// Convert as if by ES6 number to string conversion.
|
||||||
|
// This matches most other JSON generators.
|
||||||
|
// See golang.org/issue/6384 and golang.org/issue/14135.
|
||||||
|
// Like fmt %g, but the exponent cutoffs are different
|
||||||
|
// and exponents themselves are not padded to two digits.
|
||||||
|
abs := math.Abs(f)
|
||||||
|
fmt := byte('f')
|
||||||
|
if abs != 0 {
|
||||||
|
if abs < 1e-6 || abs >= 1e21 {
|
||||||
|
fmt = 'e'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dst = strconv.AppendFloat(dst, f, fmt, -1, 64)
|
||||||
|
if fmt == 'e' {
|
||||||
|
// clean up e-09 to e-9
|
||||||
|
n := len(dst)
|
||||||
|
if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' {
|
||||||
|
dst[n-2] = dst[n-1]
|
||||||
|
dst = dst[:n-1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return string(dst)
|
||||||
|
}
|
||||||
|
|
|
@ -39,6 +39,9 @@ func (r *countUpReader) Read(p []byte) (n int, err error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *countUpReader) BytesRead() int64 {
|
func (r *countUpReader) BytesRead() int64 {
|
||||||
|
if r == nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
return atomic.LoadInt64(&r.bytesRead)
|
return atomic.LoadInt64(&r.bytesRead)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,6 +72,9 @@ func (pr *progressReader) Read(p []byte) (n int, err error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pr *progressReader) Close() error {
|
func (pr *progressReader) Close() error {
|
||||||
|
if pr.rc == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
pr.closedMu.Lock()
|
pr.closedMu.Lock()
|
||||||
defer pr.closedMu.Unlock()
|
defer pr.closedMu.Unlock()
|
||||||
if pr.closed {
|
if pr.closed {
|
||||||
|
@ -79,6 +85,9 @@ func (pr *progressReader) Close() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (pr *progressReader) Stats() (bytesScanned, bytesProcessed int64) {
|
func (pr *progressReader) Stats() (bytesScanned, bytesProcessed int64) {
|
||||||
|
if pr == nil {
|
||||||
|
return 0, 0
|
||||||
|
}
|
||||||
return pr.scannedReader.BytesRead(), pr.processedReader.BytesRead()
|
return pr.scannedReader.BytesRead(), pr.processedReader.BytesRead()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,9 @@ import (
|
||||||
"github.com/minio/minio/pkg/s3select/csv"
|
"github.com/minio/minio/pkg/s3select/csv"
|
||||||
"github.com/minio/minio/pkg/s3select/json"
|
"github.com/minio/minio/pkg/s3select/json"
|
||||||
"github.com/minio/minio/pkg/s3select/parquet"
|
"github.com/minio/minio/pkg/s3select/parquet"
|
||||||
|
"github.com/minio/minio/pkg/s3select/simdj"
|
||||||
"github.com/minio/minio/pkg/s3select/sql"
|
"github.com/minio/minio/pkg/s3select/sql"
|
||||||
|
"github.com/minio/simdjson-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type recordReader interface {
|
type recordReader interface {
|
||||||
|
@ -317,7 +319,11 @@ func (s3Select *S3Select) Open(getReader func(offset, length int64) (io.ReadClos
|
||||||
}
|
}
|
||||||
|
|
||||||
if strings.EqualFold(s3Select.Input.JSONArgs.ContentType, "lines") {
|
if strings.EqualFold(s3Select.Input.JSONArgs.ContentType, "lines") {
|
||||||
s3Select.recordReader = json.NewPReader(s3Select.progressReader, &s3Select.Input.JSONArgs)
|
if simdjson.SupportedCPU() {
|
||||||
|
s3Select.recordReader = simdj.NewReader(s3Select.progressReader, &s3Select.Input.JSONArgs)
|
||||||
|
} else {
|
||||||
|
s3Select.recordReader = json.NewPReader(s3Select.progressReader, &s3Select.Input.JSONArgs)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
s3Select.recordReader = json.NewReader(s3Select.progressReader, &s3Select.Input.JSONArgs)
|
s3Select.recordReader = json.NewReader(s3Select.progressReader, &s3Select.Input.JSONArgs)
|
||||||
}
|
}
|
||||||
|
@ -350,7 +356,9 @@ func (s3Select *S3Select) marshal(buf *bytes.Buffer, record sql.Record) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
buf.Truncate(buf.Len() - 1)
|
if buf.Bytes()[buf.Len()-1] == '\n' {
|
||||||
|
buf.Truncate(buf.Len() - 1)
|
||||||
|
}
|
||||||
buf.WriteString(s3Select.Output.CSVArgs.RecordDelimiter)
|
buf.WriteString(s3Select.Output.CSVArgs.RecordDelimiter)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
@ -359,8 +367,10 @@ func (s3Select *S3Select) marshal(buf *bytes.Buffer, record sql.Record) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
// Trim trailing newline from non-simd output
|
||||||
buf.Truncate(buf.Len() - 1)
|
if buf.Bytes()[buf.Len()-1] == '\n' {
|
||||||
|
buf.Truncate(buf.Len() - 1)
|
||||||
|
}
|
||||||
buf.WriteString(s3Select.Output.JSONArgs.RecordDelimiter)
|
buf.WriteString(s3Select.Output.JSONArgs.RecordDelimiter)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
|
@ -21,13 +21,16 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"math"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/klauspost/cpuid"
|
||||||
"github.com/minio/minio-go/v6"
|
"github.com/minio/minio-go/v6"
|
||||||
|
"github.com/minio/simdjson-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
type testResponseWriter struct {
|
type testResponseWriter struct {
|
||||||
|
@ -56,6 +59,7 @@ func TestJSONQueries(t *testing.T) {
|
||||||
{"id": 1,"title": "Second Record","desc": "another text","synonyms": ["some", "synonym", "value"]}
|
{"id": 1,"title": "Second Record","desc": "another text","synonyms": ["some", "synonym", "value"]}
|
||||||
{"id": 2,"title": "Second Record","desc": "another text","numbers": [2, 3.0, 4]}
|
{"id": 2,"title": "Second Record","desc": "another text","numbers": [2, 3.0, 4]}
|
||||||
{"id": 3,"title": "Second Record","desc": "another text","nested": [[2, 3.0, 4], [7, 8.5, 9]]}`
|
{"id": 3,"title": "Second Record","desc": "another text","nested": [[2, 3.0, 4], [7, 8.5, 9]]}`
|
||||||
|
|
||||||
var testTable = []struct {
|
var testTable = []struct {
|
||||||
name string
|
name string
|
||||||
query string
|
query string
|
||||||
|
@ -229,6 +233,11 @@ func TestJSONQueries(t *testing.T) {
|
||||||
{"id":3, "value": "true"}
|
{"id":3, "value": "true"}
|
||||||
`,
|
`,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "index-wildcard-in",
|
||||||
|
query: `SELECT * from s3object s WHERE title = 'Test Record'`,
|
||||||
|
wantResult: `{"id":0,"title":"Test Record","desc":"Some text","synonyms":["foo","bar","whatever"]}`,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "select-output-field-as-csv",
|
name: "select-output-field-as-csv",
|
||||||
requestXML: []byte(`<?xml version="1.0" encoding="UTF-8"?>
|
requestXML: []byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
@ -260,7 +269,7 @@ func TestJSONQueries(t *testing.T) {
|
||||||
<InputSerialization>
|
<InputSerialization>
|
||||||
<CompressionType>NONE</CompressionType>
|
<CompressionType>NONE</CompressionType>
|
||||||
<JSON>
|
<JSON>
|
||||||
<Type>DOCUMENT</Type>
|
<Type>LINES</Type>
|
||||||
</JSON>
|
</JSON>
|
||||||
</InputSerialization>
|
</InputSerialization>
|
||||||
<OutputSerialization>
|
<OutputSerialization>
|
||||||
|
@ -274,6 +283,59 @@ func TestJSONQueries(t *testing.T) {
|
||||||
|
|
||||||
for _, testCase := range testTable {
|
for _, testCase := range testTable {
|
||||||
t.Run(testCase.name, func(t *testing.T) {
|
t.Run(testCase.name, func(t *testing.T) {
|
||||||
|
// Hack cpuid to the CPU doesn't appear to support AVX2.
|
||||||
|
// Restore whatever happens.
|
||||||
|
defer func(f cpuid.Flags) {
|
||||||
|
cpuid.CPU.Features = f
|
||||||
|
}(cpuid.CPU.Features)
|
||||||
|
cpuid.CPU.Features &= math.MaxUint64 - cpuid.AVX2
|
||||||
|
|
||||||
|
testReq := testCase.requestXML
|
||||||
|
if len(testReq) == 0 {
|
||||||
|
testReq = []byte(fmt.Sprintf(defRequest, testCase.query))
|
||||||
|
}
|
||||||
|
s3Select, err := NewS3Select(bytes.NewReader(testReq))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) {
|
||||||
|
in := input
|
||||||
|
if len(testCase.withJSON) > 0 {
|
||||||
|
in = testCase.withJSON
|
||||||
|
}
|
||||||
|
return ioutil.NopCloser(bytes.NewBufferString(in)), nil
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
w := &testResponseWriter{}
|
||||||
|
s3Select.Evaluate(w)
|
||||||
|
s3Select.Close()
|
||||||
|
resp := http.Response{
|
||||||
|
StatusCode: http.StatusOK,
|
||||||
|
Body: ioutil.NopCloser(bytes.NewReader(w.response)),
|
||||||
|
ContentLength: int64(len(w.response)),
|
||||||
|
}
|
||||||
|
res, err := minio.NewSelectResults(&resp, "testbucket")
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
got, err := ioutil.ReadAll(res)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
gotS := strings.TrimSpace(string(got))
|
||||||
|
if !reflect.DeepEqual(gotS, testCase.wantResult) {
|
||||||
|
t.Errorf("received response does not match with expected reply. Query: %s\ngot: %s\nwant:%s", testCase.query, gotS, testCase.wantResult)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
t.Run("simd-"+testCase.name, func(t *testing.T) {
|
||||||
|
if !simdjson.SupportedCPU() {
|
||||||
|
t.Skip("No CPU support")
|
||||||
|
}
|
||||||
testReq := testCase.requestXML
|
testReq := testCase.requestXML
|
||||||
if len(testReq) == 0 {
|
if len(testReq) == 0 {
|
||||||
testReq = []byte(fmt.Sprintf(defRequest, testCase.query))
|
testReq = []byte(fmt.Sprintf(defRequest, testCase.query))
|
||||||
|
@ -320,6 +382,89 @@ func TestJSONQueries(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCSVQueries(t *testing.T) {
|
func TestCSVQueries(t *testing.T) {
|
||||||
|
input := `index,ID,CaseNumber,Date,Day,Month,Year,Block,IUCR,PrimaryType,Description,LocationDescription,Arrest,Domestic,Beat,District,Ward,CommunityArea,FBI Code,XCoordinate,YCoordinate,UpdatedOn,Latitude,Longitude,Location
|
||||||
|
2700763,7732229,,2010-05-26 00:00:00,26,May,2010,113XX S HALSTED ST,1150,,CREDIT CARD FRAUD,,False,False,2233,22.0,34.0,,11,,,,41.688043288,-87.6422444,"(41.688043288, -87.6422444)"`
|
||||||
|
|
||||||
|
var testTable = []struct {
|
||||||
|
name string
|
||||||
|
query string
|
||||||
|
requestXML []byte
|
||||||
|
wantResult string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "select-in-text-simple",
|
||||||
|
query: `SELECT index FROM s3Object s WHERE "Month"='May'`,
|
||||||
|
wantResult: `2700763`,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
defRequest := `<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<SelectObjectContentRequest>
|
||||||
|
<Expression>%s</Expression>
|
||||||
|
<ExpressionType>SQL</ExpressionType>
|
||||||
|
<InputSerialization>
|
||||||
|
<CompressionType>NONE</CompressionType>
|
||||||
|
<CSV>
|
||||||
|
<FieldDelimiter>,</FieldDelimiter>
|
||||||
|
<FileHeaderInfo>USE</FileHeaderInfo>
|
||||||
|
<QuoteCharacter>"</QuoteCharacter>
|
||||||
|
<QuoteEscapeCharacter>"</QuoteEscapeCharacter>
|
||||||
|
<RecordDelimiter>\n</RecordDelimiter>
|
||||||
|
</CSV>
|
||||||
|
</InputSerialization>
|
||||||
|
<OutputSerialization>
|
||||||
|
<CSV>
|
||||||
|
</CSV>
|
||||||
|
</OutputSerialization>
|
||||||
|
<RequestProgress>
|
||||||
|
<Enabled>FALSE</Enabled>
|
||||||
|
</RequestProgress>
|
||||||
|
</SelectObjectContentRequest>`
|
||||||
|
|
||||||
|
for _, testCase := range testTable {
|
||||||
|
t.Run(testCase.name, func(t *testing.T) {
|
||||||
|
testReq := testCase.requestXML
|
||||||
|
if len(testReq) == 0 {
|
||||||
|
testReq = []byte(fmt.Sprintf(defRequest, testCase.query))
|
||||||
|
}
|
||||||
|
s3Select, err := NewS3Select(bytes.NewReader(testReq))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) {
|
||||||
|
return ioutil.NopCloser(bytes.NewBufferString(input)), nil
|
||||||
|
}); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
w := &testResponseWriter{}
|
||||||
|
s3Select.Evaluate(w)
|
||||||
|
s3Select.Close()
|
||||||
|
resp := http.Response{
|
||||||
|
StatusCode: http.StatusOK,
|
||||||
|
Body: ioutil.NopCloser(bytes.NewReader(w.response)),
|
||||||
|
ContentLength: int64(len(w.response)),
|
||||||
|
}
|
||||||
|
res, err := minio.NewSelectResults(&resp, "testbucket")
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
got, err := ioutil.ReadAll(res)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
gotS := strings.TrimSpace(string(got))
|
||||||
|
if !reflect.DeepEqual(gotS, testCase.wantResult) {
|
||||||
|
t.Errorf("received response does not match with expected reply. Query: %s\ngot: %s\nwant:%s", testCase.query, gotS, testCase.wantResult)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCSVQueries2(t *testing.T) {
|
||||||
input := `id,time,num,num2,text
|
input := `id,time,num,num2,text
|
||||||
1,2010-01-01T,7867786,4565.908123,"a text, with comma"
|
1,2010-01-01T,7867786,4565.908123,"a text, with comma"
|
||||||
2,2017-01-02T03:04Z,-5, 0.765111,
|
2,2017-01-02T03:04Z,-5, 0.765111,
|
||||||
|
@ -559,7 +704,23 @@ func TestCSVInput(t *testing.T) {
|
||||||
s3Select.Close()
|
s3Select.Close()
|
||||||
|
|
||||||
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
||||||
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v", w.response, testCase.expectedResult)
|
resp := http.Response{
|
||||||
|
StatusCode: http.StatusOK,
|
||||||
|
Body: ioutil.NopCloser(bytes.NewReader(w.response)),
|
||||||
|
ContentLength: int64(len(w.response)),
|
||||||
|
}
|
||||||
|
res, err := minio.NewSelectResults(&resp, "testbucket")
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
got, err := ioutil.ReadAll(res)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v\ndecoded:%s", w.response, testCase.expectedResult, string(got))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -667,7 +828,23 @@ func TestJSONInput(t *testing.T) {
|
||||||
s3Select.Close()
|
s3Select.Close()
|
||||||
|
|
||||||
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
||||||
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v", w.response, testCase.expectedResult)
|
resp := http.Response{
|
||||||
|
StatusCode: http.StatusOK,
|
||||||
|
Body: ioutil.NopCloser(bytes.NewReader(w.response)),
|
||||||
|
ContentLength: int64(len(w.response)),
|
||||||
|
}
|
||||||
|
res, err := minio.NewSelectResults(&resp, "testbucket")
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
got, err := ioutil.ReadAll(res)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v\ndecoded:%s", w.response, testCase.expectedResult, string(got))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -766,7 +943,23 @@ func TestParquetInput(t *testing.T) {
|
||||||
s3Select.Close()
|
s3Select.Close()
|
||||||
|
|
||||||
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
||||||
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v", w.response, testCase.expectedResult)
|
resp := http.Response{
|
||||||
|
StatusCode: http.StatusOK,
|
||||||
|
Body: ioutil.NopCloser(bytes.NewReader(w.response)),
|
||||||
|
ContentLength: int64(len(w.response)),
|
||||||
|
}
|
||||||
|
res, err := minio.NewSelectResults(&resp, "testbucket")
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
got, err := ioutil.ReadAll(res)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v\ndecoded:%s", w.response, testCase.expectedResult, string(got))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
/*
|
||||||
|
* MinIO Cloud Storage, (C) 2019 MinIO, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package simdj
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
type s3Error struct {
|
||||||
|
code string
|
||||||
|
message string
|
||||||
|
statusCode int
|
||||||
|
cause error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (err *s3Error) Cause() error {
|
||||||
|
return err.cause
|
||||||
|
}
|
||||||
|
|
||||||
|
func (err *s3Error) ErrorCode() string {
|
||||||
|
return err.code
|
||||||
|
}
|
||||||
|
|
||||||
|
func (err *s3Error) ErrorMessage() string {
|
||||||
|
return err.message
|
||||||
|
}
|
||||||
|
|
||||||
|
func (err *s3Error) HTTPStatusCode() int {
|
||||||
|
return err.statusCode
|
||||||
|
}
|
||||||
|
|
||||||
|
func (err *s3Error) Error() string {
|
||||||
|
return err.message
|
||||||
|
}
|
||||||
|
|
||||||
|
func errInvalidJSONType(err error) *s3Error {
|
||||||
|
return &s3Error{
|
||||||
|
code: "InvalidJsonType",
|
||||||
|
message: "The JsonType is invalid. Only DOCUMENT and LINES are supported.",
|
||||||
|
statusCode: 400,
|
||||||
|
cause: err,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func errJSONParsingError(err error) *s3Error {
|
||||||
|
return &s3Error{
|
||||||
|
code: "JSONParsingError",
|
||||||
|
message: fmt.Sprintf("Encountered an error parsing the JSON file: %v. Check the file and try again.", err),
|
||||||
|
statusCode: 400,
|
||||||
|
cause: err,
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,187 @@
|
||||||
|
/*
|
||||||
|
* MinIO Cloud Storage, (C) 2019 MinIO, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package simdj
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/minio/minio/pkg/s3select/json"
|
||||||
|
"github.com/minio/minio/pkg/s3select/sql"
|
||||||
|
"github.com/minio/simdjson-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Reader - JSON record reader for S3Select.
|
||||||
|
type Reader struct {
|
||||||
|
args *json.ReaderArgs
|
||||||
|
input chan simdjson.Stream
|
||||||
|
decoded chan simdjson.Object
|
||||||
|
|
||||||
|
// err will only be returned after decoded has been closed.
|
||||||
|
err *error
|
||||||
|
readCloser io.ReadCloser
|
||||||
|
|
||||||
|
exitReader chan struct{}
|
||||||
|
readerWg sync.WaitGroup
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read - reads single record.
|
||||||
|
func (r *Reader) Read(dst sql.Record) (sql.Record, error) {
|
||||||
|
v, ok := <-r.decoded
|
||||||
|
if !ok {
|
||||||
|
if r.err != nil && *r.err != nil {
|
||||||
|
return nil, errJSONParsingError(*r.err)
|
||||||
|
}
|
||||||
|
return nil, io.EOF
|
||||||
|
}
|
||||||
|
dstRec, ok := dst.(*Record)
|
||||||
|
if !ok {
|
||||||
|
dstRec = &Record{}
|
||||||
|
}
|
||||||
|
dstRec.object = v
|
||||||
|
return dstRec, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close - closes underlying reader.
|
||||||
|
func (r *Reader) Close() error {
|
||||||
|
// Close the input.
|
||||||
|
// Potentially racy if the stream decoder is still reading.
|
||||||
|
if r.readCloser != nil {
|
||||||
|
r.readCloser.Close()
|
||||||
|
}
|
||||||
|
if r.exitReader != nil {
|
||||||
|
close(r.exitReader)
|
||||||
|
r.readerWg.Wait()
|
||||||
|
r.exitReader = nil
|
||||||
|
r.input = nil
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// startReader will start a reader that accepts input from r.input.
|
||||||
|
// Input should be root -> object input. Each root indicates a record.
|
||||||
|
// If r.input is closed, it is assumed that no more input will come.
|
||||||
|
// When this function returns r.readerWg will be decremented and r.decoded will be closed.
|
||||||
|
// On errors, r.err will be set. This should only be accessed after r.decoded has been closed.
|
||||||
|
func (r *Reader) startReader() {
|
||||||
|
defer r.readerWg.Done()
|
||||||
|
defer close(r.decoded)
|
||||||
|
var tmpObj simdjson.Object
|
||||||
|
for {
|
||||||
|
var in simdjson.Stream
|
||||||
|
select {
|
||||||
|
case in = <-r.input:
|
||||||
|
case <-r.exitReader:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if in.Error != nil && in.Error != io.EOF {
|
||||||
|
r.err = &in.Error
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if in.Value == nil {
|
||||||
|
if in.Error == io.EOF {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
i := in.Value.Iter()
|
||||||
|
readloop:
|
||||||
|
for {
|
||||||
|
var next simdjson.Iter
|
||||||
|
typ, err := i.AdvanceIter(&next)
|
||||||
|
if err != nil {
|
||||||
|
r.err = &err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
switch typ {
|
||||||
|
case simdjson.TypeNone:
|
||||||
|
break readloop
|
||||||
|
case simdjson.TypeRoot:
|
||||||
|
typ, obj, err := next.Root(nil)
|
||||||
|
if err != nil {
|
||||||
|
r.err = &err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if typ != simdjson.TypeObject {
|
||||||
|
if typ == simdjson.TypeNone {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
err = fmt.Errorf("unexpected json type below root :%v", typ)
|
||||||
|
r.err = &err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
o, err := obj.Object(&tmpObj)
|
||||||
|
if err != nil {
|
||||||
|
r.err = &err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-r.exitReader:
|
||||||
|
return
|
||||||
|
case r.decoded <- *o:
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
err = fmt.Errorf("unexpected root json type:%v", typ)
|
||||||
|
r.err = &err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if in.Error == io.EOF {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewReader - creates new JSON reader using readCloser.
|
||||||
|
func NewReader(readCloser io.ReadCloser, args *json.ReaderArgs) *Reader {
|
||||||
|
r := Reader{
|
||||||
|
args: args,
|
||||||
|
readCloser: readCloser,
|
||||||
|
decoded: make(chan simdjson.Object, 1000),
|
||||||
|
input: make(chan simdjson.Stream, 2),
|
||||||
|
exitReader: make(chan struct{}),
|
||||||
|
}
|
||||||
|
simdjson.ParseNDStream(readCloser, r.input, nil)
|
||||||
|
r.readerWg.Add(1)
|
||||||
|
go r.startReader()
|
||||||
|
return &r
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewElementReader - creates new JSON reader using readCloser.
|
||||||
|
func NewElementReader(ch chan simdjson.Object, err *error, args *json.ReaderArgs) *Reader {
|
||||||
|
return &Reader{
|
||||||
|
args: args,
|
||||||
|
decoded: ch,
|
||||||
|
err: err,
|
||||||
|
readCloser: nil,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewTapeReaderChan will start a reader that will read input from the provided channel.
|
||||||
|
func NewTapeReaderChan(pj chan simdjson.Stream, args *json.ReaderArgs) *Reader {
|
||||||
|
r := Reader{
|
||||||
|
args: args,
|
||||||
|
decoded: make(chan simdjson.Object, 1000),
|
||||||
|
input: pj,
|
||||||
|
exitReader: make(chan struct{}),
|
||||||
|
}
|
||||||
|
r.readerWg.Add(1)
|
||||||
|
go r.startReader()
|
||||||
|
return &r
|
||||||
|
}
|
|
@ -0,0 +1,165 @@
|
||||||
|
/*
|
||||||
|
* MinIO Cloud Storage, (C) 2019 MinIO, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package simdj
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/klauspost/compress/zstd"
|
||||||
|
"github.com/minio/minio/pkg/s3select/json"
|
||||||
|
"github.com/minio/simdjson-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
type tester interface {
|
||||||
|
Fatal(args ...interface{})
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadCompressed(t tester, file string) (js []byte) {
|
||||||
|
dec, err := zstd.NewReader(nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer dec.Close()
|
||||||
|
js, err = ioutil.ReadFile(filepath.Join("testdata", file+".json.zst"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
js, err = dec.DecodeAll(js, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return js
|
||||||
|
}
|
||||||
|
|
||||||
|
var testCases = []struct {
|
||||||
|
name string
|
||||||
|
array bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "parking-citations-10",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNDJSON(t *testing.T) {
|
||||||
|
for _, tt := range testCases {
|
||||||
|
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
ref := loadCompressed(t, tt.name)
|
||||||
|
|
||||||
|
var err error
|
||||||
|
dst := make(chan simdjson.Object, 100)
|
||||||
|
dec := NewElementReader(dst, &err, &json.ReaderArgs{ContentType: "json"})
|
||||||
|
pj, err := simdjson.ParseND(ref, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
i := pj.Iter()
|
||||||
|
cpy := i
|
||||||
|
b, err := cpy.MarshalJSON()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if false {
|
||||||
|
t.Log(string(b))
|
||||||
|
}
|
||||||
|
//_ = ioutil.WriteFile(filepath.Join("testdata", tt.name+".json"), b, os.ModePerm)
|
||||||
|
|
||||||
|
parser:
|
||||||
|
for {
|
||||||
|
var next simdjson.Iter
|
||||||
|
typ, err := i.AdvanceIter(&next)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
switch typ {
|
||||||
|
case simdjson.TypeNone:
|
||||||
|
close(dst)
|
||||||
|
break parser
|
||||||
|
case simdjson.TypeRoot:
|
||||||
|
typ, obj, err := next.Root(nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if typ != simdjson.TypeObject {
|
||||||
|
if typ == simdjson.TypeNone {
|
||||||
|
close(dst)
|
||||||
|
break parser
|
||||||
|
}
|
||||||
|
t.Fatal("Unexpected type:", typ.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
o, err := obj.Object(nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
dst <- *o
|
||||||
|
default:
|
||||||
|
t.Fatal("unexpected type:", typ.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
refDec := json.NewReader(ioutil.NopCloser(bytes.NewBuffer(ref)), &json.ReaderArgs{ContentType: "json"})
|
||||||
|
|
||||||
|
for {
|
||||||
|
rec, err := dec.Read(nil)
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
want, err := refDec.Read(nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
var gotB, wantB bytes.Buffer
|
||||||
|
err = rec.WriteCSV(&gotB, ',')
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
err = want.WriteCSV(&wantB, ',')
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !bytes.Equal(gotB.Bytes(), wantB.Bytes()) {
|
||||||
|
t.Errorf("CSV output mismatch.\nwant: %s(%x)\ngot: %s(%x)", wantB.String(), wantB.Bytes(), gotB.String(), gotB.Bytes())
|
||||||
|
}
|
||||||
|
gotB.Reset()
|
||||||
|
wantB.Reset()
|
||||||
|
|
||||||
|
err = rec.WriteJSON(&gotB)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
err = want.WriteJSON(&wantB)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
// truncate newline from 'want'
|
||||||
|
wantB.Truncate(wantB.Len() - 1)
|
||||||
|
if !bytes.Equal(gotB.Bytes(), wantB.Bytes()) {
|
||||||
|
t.Errorf("JSON output mismatch.\nwant: %s\ngot: %s", wantB.String(), gotB.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,228 @@
|
||||||
|
/*
|
||||||
|
* MinIO Cloud Storage, (C) 2019 MinIO, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package simdj
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/csv"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/bcicen/jstream"
|
||||||
|
"github.com/minio/minio/pkg/s3select/json"
|
||||||
|
"github.com/minio/minio/pkg/s3select/sql"
|
||||||
|
"github.com/minio/simdjson-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Record - is JSON record.
|
||||||
|
type Record struct {
|
||||||
|
// object
|
||||||
|
object simdjson.Object
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get - gets the value for a column name.
|
||||||
|
func (r *Record) Get(name string) (*sql.Value, error) {
|
||||||
|
elem := r.object.FindKey(name, nil)
|
||||||
|
if elem == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return iterToValue(elem.Iter)
|
||||||
|
}
|
||||||
|
|
||||||
|
func iterToValue(iter simdjson.Iter) (*sql.Value, error) {
|
||||||
|
switch iter.Type() {
|
||||||
|
case simdjson.TypeString:
|
||||||
|
v, err := iter.String()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return sql.FromString(v), nil
|
||||||
|
case simdjson.TypeFloat:
|
||||||
|
v, err := iter.Float()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return sql.FromFloat(v), nil
|
||||||
|
case simdjson.TypeInt:
|
||||||
|
v, err := iter.Int()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return sql.FromInt(v), nil
|
||||||
|
case simdjson.TypeUint:
|
||||||
|
v, err := iter.Int()
|
||||||
|
if err != nil {
|
||||||
|
// Can't fit into int, convert to float.
|
||||||
|
v, err := iter.Float()
|
||||||
|
return sql.FromFloat(v), err
|
||||||
|
}
|
||||||
|
return sql.FromInt(v), nil
|
||||||
|
case simdjson.TypeBool:
|
||||||
|
v, err := iter.Bool()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return sql.FromBool(v), nil
|
||||||
|
case simdjson.TypeNull:
|
||||||
|
return sql.FromNull(), nil
|
||||||
|
case simdjson.TypeObject, simdjson.TypeArray:
|
||||||
|
b, err := iter.MarshalJSON()
|
||||||
|
return sql.FromBytes(b), err
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("iterToValue: unknown JSON type: %s", iter.Type().String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset the record.
|
||||||
|
func (r *Record) Reset() {
|
||||||
|
r.object = simdjson.Object{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clone the record and if possible use the destination provided.
|
||||||
|
func (r *Record) Clone(dst sql.Record) sql.Record {
|
||||||
|
other, ok := dst.(*Record)
|
||||||
|
if !ok {
|
||||||
|
other = &Record{}
|
||||||
|
}
|
||||||
|
other.object = r.object
|
||||||
|
return other
|
||||||
|
}
|
||||||
|
|
||||||
|
// CloneTo clones the record to a json Record.
|
||||||
|
// Values are only unmashaled on object level.
|
||||||
|
func (r *Record) CloneTo(dst *json.Record) (sql.Record, error) {
|
||||||
|
if dst == nil {
|
||||||
|
dst = &json.Record{SelectFormat: sql.SelectFmtJSON}
|
||||||
|
}
|
||||||
|
dst.Reset()
|
||||||
|
elems, err := r.object.Parse(nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if cap(dst.KVS) < len(elems.Elements) {
|
||||||
|
dst.KVS = make(jstream.KVS, 0, len(elems.Elements))
|
||||||
|
}
|
||||||
|
for _, elem := range elems.Elements {
|
||||||
|
v, err := sql.IterToValue(elem.Iter)
|
||||||
|
if err != nil {
|
||||||
|
v, err = elem.Iter.Interface()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dst.KVS = append(dst.KVS, jstream.KV{
|
||||||
|
Key: elem.Name,
|
||||||
|
Value: v,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return dst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set - sets the value for a column name.
|
||||||
|
func (r *Record) Set(name string, value *sql.Value) (sql.Record, error) {
|
||||||
|
dst, err := r.CloneTo(nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return dst.Set(name, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteCSV - encodes to CSV data.
|
||||||
|
func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune) error {
|
||||||
|
csvRecord := make([]string, 0, 10)
|
||||||
|
var tmp simdjson.Iter
|
||||||
|
obj := r.object
|
||||||
|
allElems:
|
||||||
|
for {
|
||||||
|
_, typ, err := obj.NextElement(&tmp)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
var columnValue string
|
||||||
|
switch typ {
|
||||||
|
case simdjson.TypeNull, simdjson.TypeFloat, simdjson.TypeUint, simdjson.TypeInt, simdjson.TypeBool, simdjson.TypeString:
|
||||||
|
val, err := tmp.StringCvt()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
columnValue = val
|
||||||
|
case simdjson.TypeObject, simdjson.TypeArray:
|
||||||
|
b, err := tmp.MarshalJSON()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
columnValue = string(b)
|
||||||
|
case simdjson.TypeNone:
|
||||||
|
break allElems
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("cannot marshal unhandled type: %s", typ.String())
|
||||||
|
}
|
||||||
|
csvRecord = append(csvRecord, columnValue)
|
||||||
|
}
|
||||||
|
w := csv.NewWriter(writer)
|
||||||
|
w.Comma = fieldDelimiter
|
||||||
|
if err := w.Write(csvRecord); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
w.Flush()
|
||||||
|
if err := w.Error(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Raw - returns the underlying representation.
|
||||||
|
func (r *Record) Raw() (sql.SelectObjectFormat, interface{}) {
|
||||||
|
return sql.SelectFmtSIMDJSON, r.object
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteJSON - encodes to JSON data.
|
||||||
|
func (r *Record) WriteJSON(writer io.Writer) error {
|
||||||
|
o := r.object
|
||||||
|
elems, err := o.Parse(nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
b, err := elems.MarshalJSON()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
n, err := writer.Write(b)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if n != len(b) {
|
||||||
|
return io.ErrShortWrite
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace the underlying buffer of json data.
|
||||||
|
func (r *Record) Replace(k interface{}) error {
|
||||||
|
v, ok := k.(simdjson.Object)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("cannot replace internal data in simd json record with type %T", k)
|
||||||
|
}
|
||||||
|
r.object = v
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRecord - creates new empty JSON record.
|
||||||
|
func NewRecord(f sql.SelectObjectFormat, obj simdjson.Object) *Record {
|
||||||
|
return &Record{
|
||||||
|
object: obj,
|
||||||
|
}
|
||||||
|
}
|
Binary file not shown.
|
@ -20,9 +20,11 @@ import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/bcicen/jstream"
|
"github.com/bcicen/jstream"
|
||||||
|
"github.com/minio/simdjson-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -370,11 +372,9 @@ func (e *JSONPath) evalNode(r Record) (*Value, error) {
|
||||||
keypath = ps[1]
|
keypath = ps[1]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
objFmt, rawVal := r.Raw()
|
_, rawVal := r.Raw()
|
||||||
switch objFmt {
|
switch rowVal := rawVal.(type) {
|
||||||
case SelectFmtJSON, SelectFmtParquet:
|
case jstream.KVS, simdjson.Object:
|
||||||
rowVal := rawVal.(jstream.KVS)
|
|
||||||
|
|
||||||
pathExpr := e.PathExpr
|
pathExpr := e.PathExpr
|
||||||
if len(pathExpr) == 0 {
|
if len(pathExpr) == 0 {
|
||||||
pathExpr = []*JSONPathElement{{Key: &ObjectKey{ID: e.BaseKey}}}
|
pathExpr = []*JSONPathElement{{Key: &ObjectKey{ID: e.BaseKey}}}
|
||||||
|
@ -400,6 +400,11 @@ func jsonToValue(result interface{}) (*Value, error) {
|
||||||
return FromFloat(rval), nil
|
return FromFloat(rval), nil
|
||||||
case int64:
|
case int64:
|
||||||
return FromInt(rval), nil
|
return FromInt(rval), nil
|
||||||
|
case uint64:
|
||||||
|
if rval <= math.MaxInt64 {
|
||||||
|
return FromInt(int64(rval)), nil
|
||||||
|
}
|
||||||
|
return FromFloat(float64(rval)), nil
|
||||||
case bool:
|
case bool:
|
||||||
return FromBool(rval), nil
|
return FromBool(rval), nil
|
||||||
case jstream.KVS:
|
case jstream.KVS:
|
||||||
|
@ -418,6 +423,17 @@ func jsonToValue(result interface{}) (*Value, error) {
|
||||||
dst[i] = *v
|
dst[i] = *v
|
||||||
}
|
}
|
||||||
return FromArray(dst), nil
|
return FromArray(dst), nil
|
||||||
|
case simdjson.Object:
|
||||||
|
o := rval
|
||||||
|
elems, err := o.Parse(nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
bs, err := elems.MarshalJSON()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return FromBytes(bs), nil
|
||||||
case []Value:
|
case []Value:
|
||||||
return FromArray(rval), nil
|
return FromArray(rval), nil
|
||||||
case nil:
|
case nil:
|
||||||
|
|
|
@ -20,6 +20,7 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
|
|
||||||
"github.com/bcicen/jstream"
|
"github.com/bcicen/jstream"
|
||||||
|
"github.com/minio/simdjson-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -42,17 +43,29 @@ func jsonpathEval(p []*JSONPathElement, v interface{}) (r interface{}, flat bool
|
||||||
case p[0].Key != nil:
|
case p[0].Key != nil:
|
||||||
key := p[0].Key.keyString()
|
key := p[0].Key.keyString()
|
||||||
|
|
||||||
kvs, ok := v.(jstream.KVS)
|
switch kvs := v.(type) {
|
||||||
if !ok {
|
case jstream.KVS:
|
||||||
|
for _, kv := range kvs {
|
||||||
|
if kv.Key == key {
|
||||||
|
return jsonpathEval(p[1:], kv.Value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Key not found - return nil result
|
||||||
|
return nil, false, nil
|
||||||
|
case simdjson.Object:
|
||||||
|
elem := kvs.FindKey(key, nil)
|
||||||
|
if elem == nil {
|
||||||
|
// Key not found - return nil result
|
||||||
|
return nil, false, nil
|
||||||
|
}
|
||||||
|
val, err := IterToValue(elem.Iter)
|
||||||
|
if err != nil {
|
||||||
|
return nil, false, err
|
||||||
|
}
|
||||||
|
return jsonpathEval(p[1:], val)
|
||||||
|
default:
|
||||||
return nil, false, errKeyLookup
|
return nil, false, errKeyLookup
|
||||||
}
|
}
|
||||||
for _, kv := range kvs {
|
|
||||||
if kv.Key == key {
|
|
||||||
return jsonpathEval(p[1:], kv.Value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Key not found - return nil result
|
|
||||||
return nil, false, nil
|
|
||||||
|
|
||||||
case p[0].Index != nil:
|
case p[0].Index != nil:
|
||||||
idx := *p[0].Index
|
idx := *p[0].Index
|
||||||
|
@ -68,17 +81,23 @@ func jsonpathEval(p []*JSONPathElement, v interface{}) (r interface{}, flat bool
|
||||||
return jsonpathEval(p[1:], arr[idx])
|
return jsonpathEval(p[1:], arr[idx])
|
||||||
|
|
||||||
case p[0].ObjectWildcard:
|
case p[0].ObjectWildcard:
|
||||||
kvs, ok := v.(jstream.KVS)
|
switch kvs := v.(type) {
|
||||||
if !ok {
|
case jstream.KVS:
|
||||||
|
if len(p[1:]) > 0 {
|
||||||
|
return nil, false, errWilcardObjectUsageInvalid
|
||||||
|
}
|
||||||
|
|
||||||
|
return kvs, false, nil
|
||||||
|
case simdjson.Object:
|
||||||
|
if len(p[1:]) > 0 {
|
||||||
|
return nil, false, errWilcardObjectUsageInvalid
|
||||||
|
}
|
||||||
|
|
||||||
|
return kvs, false, nil
|
||||||
|
default:
|
||||||
return nil, false, errWildcardObjectLookup
|
return nil, false, errWildcardObjectLookup
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(p[1:]) > 0 {
|
|
||||||
return nil, false, errWilcardObjectUsageInvalid
|
|
||||||
}
|
|
||||||
|
|
||||||
return kvs, false, nil
|
|
||||||
|
|
||||||
case p[0].ArrayWildcard:
|
case p[0].ArrayWildcard:
|
||||||
arr, ok := v.([]interface{})
|
arr, ok := v.([]interface{})
|
||||||
if !ok {
|
if !ok {
|
||||||
|
|
|
@ -17,9 +17,10 @@
|
||||||
package sql
|
package sql
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
|
||||||
"github.com/bcicen/jstream"
|
"github.com/minio/simdjson-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
// SelectObjectFormat specifies the format of the underlying data
|
// SelectObjectFormat specifies the format of the underlying data
|
||||||
|
@ -32,6 +33,8 @@ const (
|
||||||
SelectFmtCSV
|
SelectFmtCSV
|
||||||
// SelectFmtJSON - JSON format
|
// SelectFmtJSON - JSON format
|
||||||
SelectFmtJSON
|
SelectFmtJSON
|
||||||
|
// SelectFmtSIMDJSON - SIMD JSON format
|
||||||
|
SelectFmtSIMDJSON
|
||||||
// SelectFmtParquet - Parquet format
|
// SelectFmtParquet - Parquet format
|
||||||
SelectFmtParquet
|
SelectFmtParquet
|
||||||
)
|
)
|
||||||
|
@ -39,7 +42,10 @@ const (
|
||||||
// Record - is a type containing columns and their values.
|
// Record - is a type containing columns and their values.
|
||||||
type Record interface {
|
type Record interface {
|
||||||
Get(name string) (*Value, error)
|
Get(name string) (*Value, error)
|
||||||
Set(name string, value *Value) error
|
|
||||||
|
// Set a value.
|
||||||
|
// Can return a different record type.
|
||||||
|
Set(name string, value *Value) (Record, error)
|
||||||
WriteCSV(writer io.Writer, fieldDelimiter rune) error
|
WriteCSV(writer io.Writer, fieldDelimiter rune) error
|
||||||
WriteJSON(writer io.Writer) error
|
WriteJSON(writer io.Writer) error
|
||||||
|
|
||||||
|
@ -51,5 +57,77 @@ type Record interface {
|
||||||
Raw() (SelectObjectFormat, interface{})
|
Raw() (SelectObjectFormat, interface{})
|
||||||
|
|
||||||
// Replaces the underlying data
|
// Replaces the underlying data
|
||||||
Replace(k jstream.KVS) error
|
Replace(k interface{}) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// IterToValue converts a simdjson Iter to its underlying value.
|
||||||
|
// Objects are returned as simdjson.Object
|
||||||
|
// Arrays are returned as []interface{} with parsed values.
|
||||||
|
func IterToValue(iter simdjson.Iter) (interface{}, error) {
|
||||||
|
switch iter.Type() {
|
||||||
|
case simdjson.TypeString:
|
||||||
|
v, err := iter.String()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return v, nil
|
||||||
|
case simdjson.TypeFloat:
|
||||||
|
v, err := iter.Float()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return v, nil
|
||||||
|
case simdjson.TypeInt:
|
||||||
|
v, err := iter.Int()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return v, nil
|
||||||
|
case simdjson.TypeUint:
|
||||||
|
v, err := iter.Int()
|
||||||
|
if err != nil {
|
||||||
|
// Can't fit into int, convert to float.
|
||||||
|
v, err := iter.Float()
|
||||||
|
return v, err
|
||||||
|
}
|
||||||
|
return v, nil
|
||||||
|
case simdjson.TypeBool:
|
||||||
|
v, err := iter.Bool()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return v, nil
|
||||||
|
case simdjson.TypeObject:
|
||||||
|
obj, err := iter.Object(nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return *obj, err
|
||||||
|
case simdjson.TypeArray:
|
||||||
|
arr, err := iter.Array(nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
iter := arr.Iter()
|
||||||
|
var dst []interface{}
|
||||||
|
var next simdjson.Iter
|
||||||
|
for {
|
||||||
|
typ, err := iter.AdvanceIter(&next)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if typ == simdjson.TypeNone {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
v, err := IterToValue(next)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
dst = append(dst, v)
|
||||||
|
}
|
||||||
|
return dst, err
|
||||||
|
case simdjson.TypeNull:
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("IterToValue: unknown JSON type: %s", iter.Type().String())
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/bcicen/jstream"
|
"github.com/bcicen/jstream"
|
||||||
|
"github.com/minio/simdjson-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -140,36 +141,56 @@ func parseLimit(v *LitValue) (int64, error) {
|
||||||
// EvalFrom evaluates the From clause on the input record. It only
|
// EvalFrom evaluates the From clause on the input record. It only
|
||||||
// applies to JSON input data format (currently).
|
// applies to JSON input data format (currently).
|
||||||
func (e *SelectStatement) EvalFrom(format string, input Record) (Record, error) {
|
func (e *SelectStatement) EvalFrom(format string, input Record) (Record, error) {
|
||||||
if e.selectAST.From.HasKeypath() {
|
if !e.selectAST.From.HasKeypath() {
|
||||||
if format == "json" {
|
return input, nil
|
||||||
objFmt, rawVal := input.Raw()
|
}
|
||||||
if objFmt != SelectFmtJSON {
|
_, rawVal := input.Raw()
|
||||||
return nil, errDataSource(errors.New("unexpected non JSON input"))
|
|
||||||
}
|
|
||||||
|
|
||||||
jsonRec := rawVal.(jstream.KVS)
|
if format != "json" {
|
||||||
txedRec, _, err := jsonpathEval(e.selectAST.From.Table.PathExpr[1:], jsonRec)
|
return nil, errDataSource(errors.New("path not supported"))
|
||||||
|
}
|
||||||
|
switch rec := rawVal.(type) {
|
||||||
|
case jstream.KVS:
|
||||||
|
txedRec, _, err := jsonpathEval(e.selectAST.From.Table.PathExpr[1:], rec)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var kvs jstream.KVS
|
||||||
|
switch v := txedRec.(type) {
|
||||||
|
case jstream.KVS:
|
||||||
|
kvs = v
|
||||||
|
default:
|
||||||
|
kvs = jstream.KVS{jstream.KV{Key: "_1", Value: v}}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = input.Replace(kvs); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return input, nil
|
||||||
|
case simdjson.Object:
|
||||||
|
txedRec, _, err := jsonpathEval(e.selectAST.From.Table.PathExpr[1:], rec)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch v := txedRec.(type) {
|
||||||
|
case simdjson.Object:
|
||||||
|
err := input.Replace(v)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
default:
|
||||||
var kvs jstream.KVS
|
input.Reset()
|
||||||
switch v := txedRec.(type) {
|
input, err = input.Set("_1", &Value{value: v})
|
||||||
case jstream.KVS:
|
if err != nil {
|
||||||
kvs = v
|
|
||||||
default:
|
|
||||||
kvs = jstream.KVS{jstream.KV{Key: "_1", Value: v}}
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = input.Replace(kvs); err != nil {
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return input, nil
|
|
||||||
}
|
}
|
||||||
return nil, errDataSource(errors.New("path not supported"))
|
return input, nil
|
||||||
}
|
}
|
||||||
return input, nil
|
return nil, errDataSource(errors.New("unexpected non JSON input"))
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsAggregated returns if the statement involves SQL aggregation
|
// IsAggregated returns if the statement involves SQL aggregation
|
||||||
|
@ -186,9 +207,12 @@ func (e *SelectStatement) AggregateResult(output Record) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if expr.As != "" {
|
if expr.As != "" {
|
||||||
output.Set(expr.As, v)
|
output, err = output.Set(expr.As, v)
|
||||||
} else {
|
} else {
|
||||||
output.Set(fmt.Sprintf("_%d", i+1), v)
|
output, err = output.Set(fmt.Sprintf("_%d", i+1), v)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
@ -250,8 +274,7 @@ func (e *SelectStatement) Eval(input, output Record) (Record, error) {
|
||||||
if e.limitValue > -1 {
|
if e.limitValue > -1 {
|
||||||
e.outputCount++
|
e.outputCount++
|
||||||
}
|
}
|
||||||
output = input.Clone(output)
|
return input.Clone(output), nil
|
||||||
return output, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, expr := range e.selectAST.Expression.Expressions {
|
for i, expr := range e.selectAST.Expression.Expressions {
|
||||||
|
@ -262,11 +285,14 @@ func (e *SelectStatement) Eval(input, output Record) (Record, error) {
|
||||||
|
|
||||||
// Pick output column names
|
// Pick output column names
|
||||||
if expr.As != "" {
|
if expr.As != "" {
|
||||||
output.Set(expr.As, v)
|
output, err = output.Set(expr.As, v)
|
||||||
} else if comp, ok := getLastKeypathComponent(expr.Expression); ok {
|
} else if comp, ok := getLastKeypathComponent(expr.Expression); ok {
|
||||||
output.Set(comp, v)
|
output, err = output.Set(comp, v)
|
||||||
} else {
|
} else {
|
||||||
output.Set(fmt.Sprintf("_%d", i+1), v)
|
output, err = output.Set(fmt.Sprintf("_%d", i+1), v)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue