mirror of
https://github.com/minio/minio.git
synced 2024-12-26 23:25:54 -05:00
85e939636f
This PR also adds some comments and simplifies the code. Primary handling is done to ensure that we make sure to honor cached buffer. Added unit tests as well Fixes #7141
555 lines
11 KiB
Go
555 lines
11 KiB
Go
package jstream
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"io"
|
|
"strconv"
|
|
"sync/atomic"
|
|
"unicode/utf16"
|
|
)
|
|
|
|
// ValueType - defines the type of each JSON value
|
|
type ValueType int
|
|
|
|
// Different types of JSON value
|
|
const (
|
|
Unknown ValueType = iota
|
|
Null
|
|
String
|
|
Number
|
|
Boolean
|
|
Array
|
|
Object
|
|
)
|
|
|
|
// MetaValue wraps a decoded interface value with the document
|
|
// position and depth at which the value was parsed
|
|
type MetaValue struct {
|
|
Offset int
|
|
Length int
|
|
Depth int
|
|
Value interface{}
|
|
ValueType ValueType
|
|
}
|
|
|
|
// KV contains a key and value pair parsed from a decoded object
|
|
type KV struct {
|
|
Key string `json:"key"`
|
|
Value interface{} `json:"value"`
|
|
}
|
|
|
|
// KVS - represents key values in an JSON object
|
|
type KVS []KV
|
|
|
|
// MarshalJSON - implements converting slice of KVS into a JSON object
|
|
// with multiple keys and values.
|
|
func (kvs KVS) MarshalJSON() ([]byte, error) {
|
|
b := new(bytes.Buffer)
|
|
b.Write([]byte("{"))
|
|
for i, kv := range kvs {
|
|
b.Write([]byte("\"" + kv.Key + "\"" + ":"))
|
|
valBuf, err := json.Marshal(kv.Value)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
b.Write(valBuf)
|
|
if i < len(kvs)-1 {
|
|
b.Write([]byte(","))
|
|
}
|
|
}
|
|
b.Write([]byte("}"))
|
|
return b.Bytes(), nil
|
|
}
|
|
|
|
// Decoder wraps an io.Reader to provide incremental decoding of
|
|
// JSON values
|
|
type Decoder struct {
|
|
*scanner
|
|
emitDepth int
|
|
emitKV bool
|
|
emitRecursive bool
|
|
|
|
depth int
|
|
scratch *scratch
|
|
metaCh chan *MetaValue
|
|
err error
|
|
|
|
// follow line position to add context to errors
|
|
lineNo int
|
|
lineStart int64
|
|
}
|
|
|
|
// NewDecoder creates new Decoder to read JSON values at the provided
|
|
// emitDepth from the provider io.Reader.
|
|
// If emitDepth is < 0, values at every depth will be emitted.
|
|
func NewDecoder(r io.Reader, emitDepth int) *Decoder {
|
|
d := &Decoder{
|
|
scanner: newScanner(r),
|
|
emitDepth: emitDepth,
|
|
scratch: &scratch{data: make([]byte, 1024)},
|
|
metaCh: make(chan *MetaValue, 128),
|
|
}
|
|
if emitDepth < 0 {
|
|
d.emitDepth = 0
|
|
d.emitRecursive = true
|
|
}
|
|
return d
|
|
}
|
|
|
|
// EmitKV enables emitting a jstream.KV struct when the items(s) parsed
|
|
// at configured emit depth are within a JSON object. By default, only
|
|
// the object values are emitted.
|
|
func (d *Decoder) EmitKV() *Decoder {
|
|
d.emitKV = true
|
|
return d
|
|
}
|
|
|
|
// Recursive enables emitting all values at a depth higher than the
|
|
// configured emit depth; e.g. if an array is found at emit depth, all
|
|
// values within the array are emitted to the stream, then the array
|
|
// containing those values is emitted.
|
|
func (d *Decoder) Recursive() *Decoder {
|
|
d.emitRecursive = true
|
|
return d
|
|
}
|
|
|
|
// Stream begins decoding from the underlying reader and returns a
|
|
// streaming MetaValue channel for JSON values at the configured emitDepth.
|
|
func (d *Decoder) Stream() chan *MetaValue {
|
|
go d.decode()
|
|
return d.metaCh
|
|
}
|
|
|
|
// Pos returns the number of bytes consumed from the underlying reader
|
|
func (d *Decoder) Pos() int { return int(d.pos) }
|
|
|
|
// Err returns the most recent decoder error if any, or nil
|
|
func (d *Decoder) Err() error { return d.err }
|
|
|
|
// Decode parses the JSON-encoded data and returns an interface value
|
|
func (d *Decoder) decode() {
|
|
defer close(d.metaCh)
|
|
d.skipSpaces()
|
|
for d.pos < atomic.LoadInt64(&d.end) {
|
|
_, err := d.emitAny()
|
|
if err != nil {
|
|
d.err = err
|
|
break
|
|
}
|
|
d.skipSpaces()
|
|
}
|
|
}
|
|
|
|
func (d *Decoder) emitAny() (interface{}, error) {
|
|
if d.pos >= atomic.LoadInt64(&d.end) {
|
|
return nil, d.mkError(ErrUnexpectedEOF)
|
|
}
|
|
offset := d.pos - 1
|
|
i, t, err := d.any()
|
|
if d.willEmit() {
|
|
d.metaCh <- &MetaValue{
|
|
Offset: int(offset),
|
|
Length: int(d.pos - offset),
|
|
Depth: d.depth,
|
|
Value: i,
|
|
ValueType: t,
|
|
}
|
|
}
|
|
return i, err
|
|
}
|
|
|
|
// return whether, at the current depth, the value being decoded will
|
|
// be emitted to stream
|
|
func (d *Decoder) willEmit() bool {
|
|
if d.emitRecursive {
|
|
return d.depth >= d.emitDepth
|
|
}
|
|
return d.depth == d.emitDepth
|
|
}
|
|
|
|
// any used to decode any valid JSON value, and returns an
|
|
// interface{} that holds the actual data
|
|
func (d *Decoder) any() (interface{}, ValueType, error) {
|
|
c := d.cur()
|
|
|
|
switch c {
|
|
case '"':
|
|
i, err := d.string()
|
|
return i, String, err
|
|
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
|
i, err := d.number()
|
|
return i, Number, err
|
|
case '-':
|
|
if c = d.next(); c < '0' && c > '9' {
|
|
return nil, Unknown, d.mkError(ErrSyntax, "in negative numeric literal")
|
|
}
|
|
n, err := d.number()
|
|
if err != nil {
|
|
return nil, Unknown, err
|
|
}
|
|
return -n, Number, nil
|
|
case 'f':
|
|
if d.remaining() < 4 {
|
|
return nil, Unknown, d.mkError(ErrUnexpectedEOF)
|
|
}
|
|
if d.next() == 'a' && d.next() == 'l' && d.next() == 's' && d.next() == 'e' {
|
|
return false, Boolean, nil
|
|
}
|
|
return nil, Unknown, d.mkError(ErrSyntax, "in literal false")
|
|
case 't':
|
|
if d.remaining() < 3 {
|
|
return nil, Unknown, d.mkError(ErrUnexpectedEOF)
|
|
}
|
|
if d.next() == 'r' && d.next() == 'u' && d.next() == 'e' {
|
|
return true, Boolean, nil
|
|
}
|
|
return nil, Unknown, d.mkError(ErrSyntax, "in literal true")
|
|
case 'n':
|
|
if d.remaining() < 3 {
|
|
return nil, Unknown, d.mkError(ErrUnexpectedEOF)
|
|
}
|
|
if d.next() == 'u' && d.next() == 'l' && d.next() == 'l' {
|
|
return nil, Null, nil
|
|
}
|
|
return nil, Unknown, d.mkError(ErrSyntax, "in literal null")
|
|
case '[':
|
|
i, err := d.array()
|
|
return i, Array, err
|
|
case '{':
|
|
i, err := d.object()
|
|
return i, Object, err
|
|
default:
|
|
return nil, Unknown, d.mkError(ErrSyntax, "looking for beginning of value")
|
|
}
|
|
}
|
|
|
|
// string called by `any` or `object`(for map keys) after reading `"`
|
|
func (d *Decoder) string() (string, error) {
|
|
d.scratch.reset()
|
|
|
|
var (
|
|
c = d.next()
|
|
)
|
|
|
|
scan:
|
|
for {
|
|
switch {
|
|
case c == '"':
|
|
return string(d.scratch.bytes()), nil
|
|
case c == '\\':
|
|
c = d.next()
|
|
goto scan_esc
|
|
case c < 0x20:
|
|
return "", d.mkError(ErrSyntax, "in string literal")
|
|
// Coerce to well-formed UTF-8.
|
|
default:
|
|
d.scratch.add(c)
|
|
if d.remaining() == 0 {
|
|
return "", d.mkError(ErrSyntax, "in string literal")
|
|
}
|
|
c = d.next()
|
|
}
|
|
}
|
|
|
|
scan_esc:
|
|
switch c {
|
|
case '"', '\\', '/', '\'':
|
|
d.scratch.add(c)
|
|
case 'u':
|
|
goto scan_u
|
|
case 'b':
|
|
d.scratch.add('\b')
|
|
case 'f':
|
|
d.scratch.add('\f')
|
|
case 'n':
|
|
d.scratch.add('\n')
|
|
case 'r':
|
|
d.scratch.add('\r')
|
|
case 't':
|
|
d.scratch.add('\t')
|
|
default:
|
|
return "", d.mkError(ErrSyntax, "in string escape code")
|
|
}
|
|
c = d.next()
|
|
goto scan
|
|
|
|
scan_u:
|
|
r := d.u4()
|
|
if r < 0 {
|
|
return "", d.mkError(ErrSyntax, "in unicode escape sequence")
|
|
}
|
|
|
|
// check for proceeding surrogate pair
|
|
c = d.next()
|
|
if !utf16.IsSurrogate(r) || c != '\\' {
|
|
d.scratch.addRune(r)
|
|
goto scan
|
|
}
|
|
if c = d.next(); c != 'u' {
|
|
d.scratch.addRune(r)
|
|
goto scan_esc
|
|
}
|
|
|
|
r2 := d.u4()
|
|
if r2 < 0 {
|
|
return "", d.mkError(ErrSyntax, "in unicode escape sequence")
|
|
}
|
|
|
|
// write surrogate pair
|
|
d.scratch.addRune(utf16.DecodeRune(r, r2))
|
|
c = d.next()
|
|
goto scan
|
|
}
|
|
|
|
// u4 reads four bytes following a \u escape
|
|
func (d *Decoder) u4() rune {
|
|
// logic taken from:
|
|
// github.com/buger/jsonparser/blob/master/escape.go#L20
|
|
var h [4]int
|
|
for i := 0; i < 4; i++ {
|
|
c := d.next()
|
|
switch {
|
|
case c >= '0' && c <= '9':
|
|
h[i] = int(c - '0')
|
|
case c >= 'A' && c <= 'F':
|
|
h[i] = int(c - 'A' + 10)
|
|
case c >= 'a' && c <= 'f':
|
|
h[i] = int(c - 'a' + 10)
|
|
default:
|
|
return -1
|
|
}
|
|
}
|
|
return rune(h[0]<<12 + h[1]<<8 + h[2]<<4 + h[3])
|
|
}
|
|
|
|
// number called by `any` after reading number between 0 to 9
|
|
func (d *Decoder) number() (float64, error) {
|
|
d.scratch.reset()
|
|
|
|
var (
|
|
c = d.cur()
|
|
n float64
|
|
isFloat bool
|
|
)
|
|
|
|
// digits first
|
|
switch {
|
|
case c == '0':
|
|
d.scratch.add(c)
|
|
c = d.next()
|
|
case '1' <= c && c <= '9':
|
|
for ; c >= '0' && c <= '9'; c = d.next() {
|
|
n = 10*n + float64(c-'0')
|
|
d.scratch.add(c)
|
|
}
|
|
}
|
|
|
|
// . followed by 1 or more digits
|
|
if c == '.' {
|
|
isFloat = true
|
|
d.scratch.add(c)
|
|
|
|
// first char following must be digit
|
|
if c = d.next(); c < '0' && c > '9' {
|
|
return 0, d.mkError(ErrSyntax, "after decimal point in numeric literal")
|
|
}
|
|
d.scratch.add(c)
|
|
|
|
for {
|
|
if d.remaining() == 0 {
|
|
return 0, d.mkError(ErrUnexpectedEOF)
|
|
}
|
|
if c = d.next(); c < '0' || c > '9' {
|
|
break
|
|
}
|
|
d.scratch.add(c)
|
|
}
|
|
}
|
|
|
|
// e or E followed by an optional - or + and
|
|
// 1 or more digits.
|
|
if c == 'e' || c == 'E' {
|
|
isFloat = true
|
|
d.scratch.add(c)
|
|
|
|
if c = d.next(); c == '+' || c == '-' {
|
|
d.scratch.add(c)
|
|
if c = d.next(); c < '0' || c > '9' {
|
|
return 0, d.mkError(ErrSyntax, "in exponent of numeric literal")
|
|
}
|
|
d.scratch.add(c)
|
|
}
|
|
for ; c >= '0' && c <= '9'; c = d.next() {
|
|
d.scratch.add(c)
|
|
}
|
|
}
|
|
|
|
if isFloat {
|
|
var (
|
|
err error
|
|
sn string
|
|
)
|
|
sn = string(d.scratch.bytes())
|
|
if n, err = strconv.ParseFloat(sn, 64); err != nil {
|
|
return 0, err
|
|
}
|
|
}
|
|
|
|
d.back()
|
|
return n, nil
|
|
}
|
|
|
|
// array accept valid JSON array value
|
|
func (d *Decoder) array() ([]interface{}, error) {
|
|
d.depth++
|
|
|
|
var (
|
|
c byte
|
|
v interface{}
|
|
err error
|
|
array = make([]interface{}, 0)
|
|
)
|
|
|
|
// look ahead for ] - if the array is empty.
|
|
if c = d.skipSpaces(); c == ']' {
|
|
goto out
|
|
}
|
|
|
|
scan:
|
|
if v, err = d.emitAny(); err != nil {
|
|
goto out
|
|
}
|
|
|
|
if d.depth > d.emitDepth { // skip alloc for array if it won't be emitted
|
|
array = append(array, v)
|
|
}
|
|
|
|
// next token must be ',' or ']'
|
|
switch c = d.skipSpaces(); c {
|
|
case ',':
|
|
d.skipSpaces()
|
|
goto scan
|
|
case ']':
|
|
goto out
|
|
default:
|
|
err = d.mkError(ErrSyntax, "after array element")
|
|
}
|
|
|
|
out:
|
|
d.depth--
|
|
return array, err
|
|
}
|
|
|
|
// object accept valid JSON array value
|
|
func (d *Decoder) object() (KVS, error) {
|
|
d.depth++
|
|
|
|
var (
|
|
c byte
|
|
k string
|
|
v interface{}
|
|
t ValueType
|
|
err error
|
|
obj KVS
|
|
)
|
|
|
|
// skip allocating map if it will not be emitted
|
|
if d.depth > d.emitDepth {
|
|
obj = make(KVS, 0)
|
|
}
|
|
|
|
// if the object has no keys
|
|
if c = d.skipSpaces(); c == '}' {
|
|
goto out
|
|
}
|
|
|
|
scan:
|
|
for {
|
|
offset := d.pos - 1
|
|
|
|
// read string key
|
|
if c != '"' {
|
|
err = d.mkError(ErrSyntax, "looking for beginning of object key string")
|
|
break
|
|
}
|
|
if k, err = d.string(); err != nil {
|
|
break
|
|
}
|
|
|
|
// read colon before value
|
|
if c = d.skipSpaces(); c != ':' {
|
|
err = d.mkError(ErrSyntax, "after object key")
|
|
break
|
|
}
|
|
|
|
// read value
|
|
d.skipSpaces()
|
|
if d.emitKV {
|
|
if v, t, err = d.any(); err != nil {
|
|
break
|
|
}
|
|
if d.willEmit() {
|
|
d.metaCh <- &MetaValue{
|
|
Offset: int(offset),
|
|
Length: int(d.pos - offset),
|
|
Depth: d.depth,
|
|
Value: KV{k, v},
|
|
ValueType: t,
|
|
}
|
|
}
|
|
} else {
|
|
if v, err = d.emitAny(); err != nil {
|
|
break
|
|
}
|
|
}
|
|
|
|
if obj != nil {
|
|
obj = append(obj, KV{k, v})
|
|
}
|
|
|
|
// next token must be ',' or '}'
|
|
switch c = d.skipSpaces(); c {
|
|
case '}':
|
|
goto out
|
|
case ',':
|
|
c = d.skipSpaces()
|
|
goto scan
|
|
default:
|
|
err = d.mkError(ErrSyntax, "after object key:value pair")
|
|
}
|
|
}
|
|
|
|
out:
|
|
d.depth--
|
|
return obj, err
|
|
}
|
|
|
|
// returns the next char after white spaces
|
|
func (d *Decoder) skipSpaces() byte {
|
|
for d.pos < atomic.LoadInt64(&d.end) {
|
|
switch c := d.next(); c {
|
|
case '\n':
|
|
d.lineStart = d.pos
|
|
d.lineNo++
|
|
continue
|
|
case ' ', '\t', '\r':
|
|
continue
|
|
default:
|
|
return c
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// create syntax errors at current position, with optional context
|
|
func (d *Decoder) mkError(err SyntaxError, context ...string) error {
|
|
if len(context) > 0 {
|
|
err.context = context[0]
|
|
}
|
|
err.atChar = d.cur()
|
|
err.pos[0] = d.lineNo + 1
|
|
err.pos[1] = int(d.pos - d.lineStart)
|
|
return err
|
|
}
|