2021-04-18 15:41:13 -04:00
|
|
|
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
|
|
//
|
|
|
|
// This file is part of MinIO Object Storage stack
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2019-01-08 19:53:04 -05:00
|
|
|
|
|
|
|
package json
|
|
|
|
|
|
|
|
import (
|
|
|
|
"io"
|
2019-11-05 17:20:37 -05:00
|
|
|
"sync"
|
2019-01-08 19:53:04 -05:00
|
|
|
|
2024-09-23 15:35:41 -04:00
|
|
|
"github.com/minio/minio/internal/s3select/jstream"
|
2021-06-01 17:59:40 -04:00
|
|
|
"github.com/minio/minio/internal/s3select/sql"
|
2019-01-08 19:53:04 -05:00
|
|
|
)
|
|
|
|
|
2024-09-16 12:59:03 -04:00
|
|
|
// Limit single document size to 10MiB, 10x the AWS limit:
|
|
|
|
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/selecting-content-from-objects.html
|
|
|
|
const maxDocumentSize = 10 << 20
|
|
|
|
|
2019-01-08 19:53:04 -05:00
|
|
|
// Reader - JSON record reader for S3Select.
|
|
|
|
type Reader struct {
|
2019-02-06 21:34:42 -05:00
|
|
|
args *ReaderArgs
|
|
|
|
decoder *jstream.Decoder
|
|
|
|
valueCh chan *jstream.MetaValue
|
|
|
|
readCloser io.ReadCloser
|
2019-01-08 19:53:04 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// Read - reads single record.
|
2019-09-13 17:18:35 -04:00
|
|
|
func (r *Reader) Read(dst sql.Record) (sql.Record, error) {
|
2019-02-06 21:34:42 -05:00
|
|
|
v, ok := <-r.valueCh
|
|
|
|
if !ok {
|
|
|
|
if err := r.decoder.Err(); err != nil {
|
|
|
|
return nil, errJSONParsingError(err)
|
|
|
|
}
|
|
|
|
return nil, io.EOF
|
2019-01-08 19:53:04 -05:00
|
|
|
}
|
|
|
|
|
2019-03-09 11:13:37 -05:00
|
|
|
var kvs jstream.KVS
|
2019-02-06 21:34:42 -05:00
|
|
|
if v.ValueType == jstream.Object {
|
2019-03-09 11:13:37 -05:00
|
|
|
// This is a JSON object type (that preserves key
|
|
|
|
// order)
|
|
|
|
kvs = v.Value.(jstream.KVS)
|
2019-02-06 21:34:42 -05:00
|
|
|
} else {
|
2019-03-07 03:20:10 -05:00
|
|
|
// To be AWS S3 compatible Select for JSON needs to
|
|
|
|
// output non-object JSON as single column value
|
|
|
|
// i.e. a map with `_1` as key and value as the
|
|
|
|
// non-object.
|
2019-03-09 11:13:37 -05:00
|
|
|
kvs = jstream.KVS{jstream.KV{Key: "_1", Value: v.Value}}
|
2019-01-08 19:53:04 -05:00
|
|
|
}
|
|
|
|
|
2019-09-13 17:18:35 -04:00
|
|
|
dstRec, ok := dst.(*Record)
|
|
|
|
if !ok {
|
|
|
|
dstRec = &Record{}
|
|
|
|
}
|
|
|
|
dstRec.KVS = kvs
|
|
|
|
dstRec.SelectFormat = sql.SelectFmtJSON
|
|
|
|
return dstRec, nil
|
2019-01-08 19:53:04 -05:00
|
|
|
}
|
|
|
|
|
2019-09-13 17:18:35 -04:00
|
|
|
// Close - closes underlying reader.
|
2019-01-08 19:53:04 -05:00
|
|
|
func (r *Reader) Close() error {
|
2019-09-13 17:18:35 -04:00
|
|
|
// Close the input.
|
|
|
|
err := r.readCloser.Close()
|
|
|
|
for range r.valueCh {
|
|
|
|
// Drain values so we don't leak a goroutine.
|
|
|
|
// Since we have closed the input, it should fail rather quickly.
|
|
|
|
}
|
|
|
|
return err
|
2019-01-08 19:53:04 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewReader - creates new JSON reader using readCloser.
|
|
|
|
func NewReader(readCloser io.ReadCloser, args *ReaderArgs) *Reader {
|
2019-11-05 17:20:37 -05:00
|
|
|
readCloser = &syncReadCloser{rc: readCloser}
|
2024-09-23 15:35:41 -04:00
|
|
|
d := jstream.NewDecoder(io.LimitReader(readCloser, maxDocumentSize), 0).ObjectAsKVS().MaxDepth(100)
|
2019-01-08 19:53:04 -05:00
|
|
|
return &Reader{
|
2019-02-06 21:34:42 -05:00
|
|
|
args: args,
|
|
|
|
decoder: d,
|
|
|
|
valueCh: d.Stream(),
|
|
|
|
readCloser: readCloser,
|
2019-01-08 19:53:04 -05:00
|
|
|
}
|
|
|
|
}
|
2019-11-05 17:20:37 -05:00
|
|
|
|
2022-03-23 23:58:53 -04:00
|
|
|
// syncReadCloser will wrap a readcloser and make it safe to call Close while
|
|
|
|
// reads are running.
|
2019-11-05 17:20:37 -05:00
|
|
|
type syncReadCloser struct {
|
2022-03-23 23:58:53 -04:00
|
|
|
rc io.ReadCloser
|
|
|
|
mu sync.Mutex
|
2019-11-05 17:20:37 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
func (pr *syncReadCloser) Read(p []byte) (n int, err error) {
|
|
|
|
// This ensures that Close will block until Read has completed.
|
|
|
|
// This allows another goroutine to close the reader.
|
2022-03-23 23:58:53 -04:00
|
|
|
pr.mu.Lock()
|
|
|
|
defer pr.mu.Unlock()
|
|
|
|
if pr.rc == nil {
|
2019-11-05 17:20:37 -05:00
|
|
|
return 0, io.EOF
|
|
|
|
}
|
2022-03-23 23:58:53 -04:00
|
|
|
return pr.rc.Read(p)
|
2019-11-05 17:20:37 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
func (pr *syncReadCloser) Close() error {
|
2022-03-23 23:58:53 -04:00
|
|
|
pr.mu.Lock()
|
|
|
|
defer pr.mu.Unlock()
|
|
|
|
if pr.rc != nil {
|
|
|
|
err := pr.rc.Close()
|
|
|
|
pr.rc = nil
|
|
|
|
return err
|
2019-11-05 17:20:37 -05:00
|
|
|
}
|
2022-03-23 23:58:53 -04:00
|
|
|
return nil
|
2019-11-05 17:20:37 -05:00
|
|
|
}
|