Fix JSON parser handling for certain objects (#7162)

This PR also adds some comments and simplifies
the code. Primary handling is done to ensure
that we make sure to honor cached buffer.

Added unit tests as well

Fixes #7141
This commit is contained in:
Harshavardhana
2019-02-06 18:34:42 -08:00
committed by Nitish Tiwari
parent d203e7e1cc
commit 85e939636f
22 changed files with 1016 additions and 166 deletions

View File

@@ -0,0 +1,12 @@
[
{
"key_1": "value",
"key_2": "value"
}
]
[
{
"key_1": "value2",
"key_2": "value3"
}
]

View File

@@ -0,0 +1,8 @@
"a"
1
3.145
["a"]
{}
{
"a": 1
}

View File

@@ -0,0 +1,5 @@
{
"a": 1
}{
"b": 2
}

View File

@@ -0,0 +1 @@
{"text": "hello world\\n2nd line"}

View File

@@ -0,0 +1 @@
{"hello":"wor{l}d"}

View File

@@ -0,0 +1,26 @@
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}

View File

@@ -0,0 +1,5 @@
{
"foo": {
"bar": "baz"
}
}

View File

@@ -0,0 +1 @@
{ "name": "John", "age":28, "hobby": { "name": "chess", "type": "boardgame" }}

View File

@@ -0,0 +1,3 @@
{"name":"Michael", "age": 31}
{"name":"Andy", "age": 30}
{"name":"Justin", "age": 19}

View File

@@ -0,0 +1,2 @@
{"a":"}"
}

View File

@@ -0,0 +1,6 @@
[
{
"key_1": "value",
"key_2": "value"
}
]

View File

@@ -17,184 +17,46 @@
package json
import (
"bytes"
"encoding/json"
"io"
"io/ioutil"
"strconv"
"github.com/minio/minio/pkg/s3select/sql"
"github.com/tidwall/gjson"
"github.com/bcicen/jstream"
"github.com/tidwall/sjson"
)
func toSingleLineJSON(input string, currentKey string, result gjson.Result) (output string, err error) {
switch {
case result.IsObject():
result.ForEach(func(key, value gjson.Result) bool {
jsonKey := key.String()
if currentKey != "" {
jsonKey = currentKey + "." + key.String()
}
output, err = toSingleLineJSON(input, jsonKey, value)
input = output
return err == nil
})
case result.IsArray():
i := 0
result.ForEach(func(key, value gjson.Result) bool {
if currentKey == "" {
panic("currentKey is empty")
}
indexKey := currentKey + "." + strconv.Itoa(i)
output, err = toSingleLineJSON(input, indexKey, value)
input = output
i++
return err == nil
})
default:
output, err = sjson.Set(input, currentKey, result.Value())
}
return output, err
}
type objectReader struct {
reader io.Reader
err error
p []byte
start int
end int
escaped bool
quoteOpened bool
curlyCount uint64
endOfObject bool
}
func (or *objectReader) objectEndIndex(p []byte, length int) int {
for i := 0; i < length; i++ {
if p[i] == '\\' {
or.escaped = !or.escaped
continue
}
if p[i] == '"' && !or.escaped {
or.quoteOpened = !or.quoteOpened
}
or.escaped = false
switch p[i] {
case '{':
if !or.quoteOpened {
or.curlyCount++
}
case '}':
if or.quoteOpened || or.curlyCount == 0 {
break
}
if or.curlyCount--; or.curlyCount == 0 {
return i + 1
}
}
}
return -1
}
func (or *objectReader) Read(p []byte) (n int, err error) {
if or.endOfObject {
return 0, io.EOF
}
if or.p != nil {
n = copy(p, or.p[or.start:or.end])
or.start += n
if or.start == or.end {
// made full copy.
or.p = nil
or.start = 0
or.end = 0
}
} else {
if or.err != nil {
return 0, or.err
}
n, err = or.reader.Read(p)
or.err = err
switch err {
case nil:
case io.EOF, io.ErrUnexpectedEOF, io.ErrClosedPipe:
or.err = io.EOF
default:
return 0, err
}
}
index := or.objectEndIndex(p, n)
if index == -1 || index == n {
return n, nil
}
or.endOfObject = true
if or.p == nil {
or.p = p
or.start = index
or.end = n
} else {
or.start -= index
}
return index, nil
}
func (or *objectReader) Reset() error {
or.endOfObject = false
if or.p != nil {
return nil
}
return or.err
}
// Reader - JSON record reader for S3Select.
type Reader struct {
args *ReaderArgs
objectReader *objectReader
readCloser io.ReadCloser
args *ReaderArgs
decoder *jstream.Decoder
valueCh chan *jstream.MetaValue
readCloser io.ReadCloser
}
// Read - reads single record.
func (r *Reader) Read() (sql.Record, error) {
if err := r.objectReader.Reset(); err != nil {
return nil, err
}
data, err := ioutil.ReadAll(r.objectReader)
if err != nil {
return nil, errJSONParsingError(err)
}
data = bytes.TrimSpace(data)
if len(data) == 0 {
v, ok := <-r.valueCh
if !ok {
if err := r.decoder.Err(); err != nil {
return nil, errJSONParsingError(err)
}
return nil, io.EOF
}
if !gjson.ValidBytes(data) {
return nil, errJSONParsingError(err)
}
var data []byte
var err error
if bytes.Count(data, []byte("\n")) > 0 {
var s string
if s, err = toSingleLineJSON("", "", gjson.ParseBytes(data)); err != nil {
return nil, errJSONParsingError(err)
}
data = []byte(s)
if v.ValueType == jstream.Object {
data, err = json.Marshal(v.Value)
} else {
// To be AWS S3 compatible
// Select for JSON needs to output non-object JSON as single column value
// i.e. a map with `_1` as key and value as the non-object.
data, err = sjson.SetBytes(data, "_1", v.Value)
}
if err != nil {
return nil, errJSONParsingError(err)
}
return &Record{
@@ -209,9 +71,11 @@ func (r *Reader) Close() error {
// NewReader - creates new JSON reader using readCloser.
func NewReader(readCloser io.ReadCloser, args *ReaderArgs) *Reader {
d := jstream.NewDecoder(readCloser, 0)
return &Reader{
args: args,
objectReader: &objectReader{reader: readCloser},
readCloser: readCloser,
args: args,
decoder: d,
valueCh: d.Stream(),
readCloser: readCloser,
}
}

View File

@@ -0,0 +1,49 @@
/*
* Minio Cloud Storage, (C) 2019 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package json
import (
"io"
"io/ioutil"
"os"
"path/filepath"
"testing"
)
func TestNewReader(t *testing.T) {
files, err := ioutil.ReadDir("data")
if err != nil {
t.Fatal(err)
}
for _, file := range files {
f, err := os.Open(filepath.Join("data", file.Name()))
if err != nil {
t.Fatal(err)
}
r := NewReader(f, &ReaderArgs{})
for {
_, err = r.Read()
if err != nil {
break
}
}
r.Close()
if err != io.EOF {
t.Fatalf("Reading failed with %s, %s", err, file.Name())
}
}
}