mirror of
https://github.com/minio/minio.git
synced 2025-11-10 14:09:48 -05:00
Fix JSON parser handling for certain objects (#7162)
This PR also adds some comments and simplifies the code. Primary handling is done to ensure that we make sure to honor cached buffer. Added unit tests as well Fixes #7141
This commit is contained in:
committed by
Nitish Tiwari
parent
d203e7e1cc
commit
85e939636f
12
pkg/s3select/json/data/10.json
Normal file
12
pkg/s3select/json/data/10.json
Normal file
@@ -0,0 +1,12 @@
|
||||
[
|
||||
{
|
||||
"key_1": "value",
|
||||
"key_2": "value"
|
||||
}
|
||||
]
|
||||
[
|
||||
{
|
||||
"key_1": "value2",
|
||||
"key_2": "value3"
|
||||
}
|
||||
]
|
||||
8
pkg/s3select/json/data/11.json
Normal file
8
pkg/s3select/json/data/11.json
Normal file
@@ -0,0 +1,8 @@
|
||||
"a"
|
||||
1
|
||||
3.145
|
||||
["a"]
|
||||
{}
|
||||
{
|
||||
"a": 1
|
||||
}
|
||||
5
pkg/s3select/json/data/12.json
Normal file
5
pkg/s3select/json/data/12.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"a": 1
|
||||
}{
|
||||
"b": 2
|
||||
}
|
||||
1
pkg/s3select/json/data/2.json
Normal file
1
pkg/s3select/json/data/2.json
Normal file
@@ -0,0 +1 @@
|
||||
{"text": "hello world\\n2nd line"}
|
||||
1
pkg/s3select/json/data/3.json
Normal file
1
pkg/s3select/json/data/3.json
Normal file
@@ -0,0 +1 @@
|
||||
{"hello":"wor{l}d"}
|
||||
26
pkg/s3select/json/data/4.json
Normal file
26
pkg/s3select/json/data/4.json
Normal file
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"id": "0001",
|
||||
"type": "donut",
|
||||
"name": "Cake",
|
||||
"ppu": 0.55,
|
||||
"batters":
|
||||
{
|
||||
"batter":
|
||||
[
|
||||
{ "id": "1001", "type": "Regular" },
|
||||
{ "id": "1002", "type": "Chocolate" },
|
||||
{ "id": "1003", "type": "Blueberry" },
|
||||
{ "id": "1004", "type": "Devil's Food" }
|
||||
]
|
||||
},
|
||||
"topping":
|
||||
[
|
||||
{ "id": "5001", "type": "None" },
|
||||
{ "id": "5002", "type": "Glazed" },
|
||||
{ "id": "5005", "type": "Sugar" },
|
||||
{ "id": "5007", "type": "Powdered Sugar" },
|
||||
{ "id": "5006", "type": "Chocolate with Sprinkles" },
|
||||
{ "id": "5003", "type": "Chocolate" },
|
||||
{ "id": "5004", "type": "Maple" }
|
||||
]
|
||||
}
|
||||
5
pkg/s3select/json/data/5.json
Normal file
5
pkg/s3select/json/data/5.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"foo": {
|
||||
"bar": "baz"
|
||||
}
|
||||
}
|
||||
1
pkg/s3select/json/data/6.json
Normal file
1
pkg/s3select/json/data/6.json
Normal file
@@ -0,0 +1 @@
|
||||
{ "name": "John", "age":28, "hobby": { "name": "chess", "type": "boardgame" }}
|
||||
3
pkg/s3select/json/data/7.json
Normal file
3
pkg/s3select/json/data/7.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{"name":"Michael", "age": 31}
|
||||
{"name":"Andy", "age": 30}
|
||||
{"name":"Justin", "age": 19}
|
||||
2
pkg/s3select/json/data/8.json
Normal file
2
pkg/s3select/json/data/8.json
Normal file
@@ -0,0 +1,2 @@
|
||||
{"a":"}"
|
||||
}
|
||||
6
pkg/s3select/json/data/9.json
Normal file
6
pkg/s3select/json/data/9.json
Normal file
@@ -0,0 +1,6 @@
|
||||
[
|
||||
{
|
||||
"key_1": "value",
|
||||
"key_2": "value"
|
||||
}
|
||||
]
|
||||
@@ -17,184 +17,46 @@
|
||||
package json
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"strconv"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/sql"
|
||||
"github.com/tidwall/gjson"
|
||||
|
||||
"github.com/bcicen/jstream"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
func toSingleLineJSON(input string, currentKey string, result gjson.Result) (output string, err error) {
|
||||
switch {
|
||||
case result.IsObject():
|
||||
result.ForEach(func(key, value gjson.Result) bool {
|
||||
jsonKey := key.String()
|
||||
if currentKey != "" {
|
||||
jsonKey = currentKey + "." + key.String()
|
||||
}
|
||||
output, err = toSingleLineJSON(input, jsonKey, value)
|
||||
input = output
|
||||
return err == nil
|
||||
})
|
||||
case result.IsArray():
|
||||
i := 0
|
||||
result.ForEach(func(key, value gjson.Result) bool {
|
||||
if currentKey == "" {
|
||||
panic("currentKey is empty")
|
||||
}
|
||||
|
||||
indexKey := currentKey + "." + strconv.Itoa(i)
|
||||
output, err = toSingleLineJSON(input, indexKey, value)
|
||||
input = output
|
||||
i++
|
||||
return err == nil
|
||||
})
|
||||
default:
|
||||
output, err = sjson.Set(input, currentKey, result.Value())
|
||||
}
|
||||
|
||||
return output, err
|
||||
}
|
||||
|
||||
type objectReader struct {
|
||||
reader io.Reader
|
||||
err error
|
||||
|
||||
p []byte
|
||||
start int
|
||||
end int
|
||||
|
||||
escaped bool
|
||||
quoteOpened bool
|
||||
curlyCount uint64
|
||||
endOfObject bool
|
||||
}
|
||||
|
||||
func (or *objectReader) objectEndIndex(p []byte, length int) int {
|
||||
for i := 0; i < length; i++ {
|
||||
if p[i] == '\\' {
|
||||
or.escaped = !or.escaped
|
||||
continue
|
||||
}
|
||||
|
||||
if p[i] == '"' && !or.escaped {
|
||||
or.quoteOpened = !or.quoteOpened
|
||||
}
|
||||
|
||||
or.escaped = false
|
||||
|
||||
switch p[i] {
|
||||
case '{':
|
||||
if !or.quoteOpened {
|
||||
or.curlyCount++
|
||||
}
|
||||
case '}':
|
||||
if or.quoteOpened || or.curlyCount == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
if or.curlyCount--; or.curlyCount == 0 {
|
||||
return i + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
func (or *objectReader) Read(p []byte) (n int, err error) {
|
||||
if or.endOfObject {
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
if or.p != nil {
|
||||
n = copy(p, or.p[or.start:or.end])
|
||||
or.start += n
|
||||
if or.start == or.end {
|
||||
// made full copy.
|
||||
or.p = nil
|
||||
or.start = 0
|
||||
or.end = 0
|
||||
}
|
||||
} else {
|
||||
if or.err != nil {
|
||||
return 0, or.err
|
||||
}
|
||||
|
||||
n, err = or.reader.Read(p)
|
||||
or.err = err
|
||||
switch err {
|
||||
case nil:
|
||||
case io.EOF, io.ErrUnexpectedEOF, io.ErrClosedPipe:
|
||||
or.err = io.EOF
|
||||
default:
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
index := or.objectEndIndex(p, n)
|
||||
if index == -1 || index == n {
|
||||
return n, nil
|
||||
}
|
||||
|
||||
or.endOfObject = true
|
||||
if or.p == nil {
|
||||
or.p = p
|
||||
or.start = index
|
||||
or.end = n
|
||||
} else {
|
||||
or.start -= index
|
||||
}
|
||||
|
||||
return index, nil
|
||||
}
|
||||
|
||||
func (or *objectReader) Reset() error {
|
||||
or.endOfObject = false
|
||||
|
||||
if or.p != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return or.err
|
||||
}
|
||||
|
||||
// Reader - JSON record reader for S3Select.
|
||||
type Reader struct {
|
||||
args *ReaderArgs
|
||||
objectReader *objectReader
|
||||
readCloser io.ReadCloser
|
||||
args *ReaderArgs
|
||||
decoder *jstream.Decoder
|
||||
valueCh chan *jstream.MetaValue
|
||||
readCloser io.ReadCloser
|
||||
}
|
||||
|
||||
// Read - reads single record.
|
||||
func (r *Reader) Read() (sql.Record, error) {
|
||||
if err := r.objectReader.Reset(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := ioutil.ReadAll(r.objectReader)
|
||||
if err != nil {
|
||||
return nil, errJSONParsingError(err)
|
||||
}
|
||||
|
||||
data = bytes.TrimSpace(data)
|
||||
if len(data) == 0 {
|
||||
v, ok := <-r.valueCh
|
||||
if !ok {
|
||||
if err := r.decoder.Err(); err != nil {
|
||||
return nil, errJSONParsingError(err)
|
||||
}
|
||||
return nil, io.EOF
|
||||
}
|
||||
|
||||
if !gjson.ValidBytes(data) {
|
||||
return nil, errJSONParsingError(err)
|
||||
}
|
||||
var data []byte
|
||||
var err error
|
||||
|
||||
if bytes.Count(data, []byte("\n")) > 0 {
|
||||
var s string
|
||||
if s, err = toSingleLineJSON("", "", gjson.ParseBytes(data)); err != nil {
|
||||
return nil, errJSONParsingError(err)
|
||||
}
|
||||
data = []byte(s)
|
||||
if v.ValueType == jstream.Object {
|
||||
data, err = json.Marshal(v.Value)
|
||||
} else {
|
||||
// To be AWS S3 compatible
|
||||
// Select for JSON needs to output non-object JSON as single column value
|
||||
// i.e. a map with `_1` as key and value as the non-object.
|
||||
data, err = sjson.SetBytes(data, "_1", v.Value)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, errJSONParsingError(err)
|
||||
}
|
||||
|
||||
return &Record{
|
||||
@@ -209,9 +71,11 @@ func (r *Reader) Close() error {
|
||||
|
||||
// NewReader - creates new JSON reader using readCloser.
|
||||
func NewReader(readCloser io.ReadCloser, args *ReaderArgs) *Reader {
|
||||
d := jstream.NewDecoder(readCloser, 0)
|
||||
return &Reader{
|
||||
args: args,
|
||||
objectReader: &objectReader{reader: readCloser},
|
||||
readCloser: readCloser,
|
||||
args: args,
|
||||
decoder: d,
|
||||
valueCh: d.Stream(),
|
||||
readCloser: readCloser,
|
||||
}
|
||||
}
|
||||
|
||||
49
pkg/s3select/json/reader_test.go
Normal file
49
pkg/s3select/json/reader_test.go
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNewReader(t *testing.T) {
|
||||
files, err := ioutil.ReadDir("data")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, file := range files {
|
||||
f, err := os.Open(filepath.Join("data", file.Name()))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
r := NewReader(f, &ReaderArgs{})
|
||||
for {
|
||||
_, err = r.Read()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
r.Close()
|
||||
if err != io.EOF {
|
||||
t.Fatalf("Reading failed with %s, %s", err, file.Name())
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user