mirror of
https://github.com/minio/minio.git
synced 2025-11-10 05:59:43 -05:00
Performance improvements to SELECT API on certain query operations (#6752)
This improves the performance of certain queries dramatically, such as 'count(*)' etc. Without this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m42.464s user 0m0.071s sys 0m0.010s ``` With this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m17.603s user 0m0.093s sys 0m0.008s ``` Almost a 250% improvement in performance. This PR avoids a lot of type conversions and instead relies on raw sequences of data and interprets them lazily. ``` benchcmp old new benchmark old ns/op new ns/op delta BenchmarkSQLAggregate_100K-4 551213 259782 -52.87% BenchmarkSQLAggregate_1M-4 6981901985 2432413729 -65.16% BenchmarkSQLAggregate_2M-4 13511978488 4536903552 -66.42% BenchmarkSQLAggregate_10M-4 68427084908 23266283336 -66.00% benchmark old allocs new allocs delta BenchmarkSQLAggregate_100K-4 2366 485 -79.50% BenchmarkSQLAggregate_1M-4 47455492 21462860 -54.77% BenchmarkSQLAggregate_2M-4 95163637 43110771 -54.70% BenchmarkSQLAggregate_10M-4 476959550 216906510 -54.52% benchmark old bytes new bytes delta BenchmarkSQLAggregate_100K-4 1233079 1086024 -11.93% BenchmarkSQLAggregate_1M-4 2607984120 557038536 -78.64% BenchmarkSQLAggregate_2M-4 5254103616 1128149168 -78.53% BenchmarkSQLAggregate_10M-4 26443524872 5722715992 -78.36% ```
This commit is contained in:
committed by
kannappanr
parent
f9779b24ad
commit
7e1661f4fa
@@ -17,11 +17,10 @@
|
||||
package json
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"bufio"
|
||||
"encoding/xml"
|
||||
"io"
|
||||
|
||||
jsoniter "github.com/json-iterator/go"
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
)
|
||||
|
||||
@@ -57,7 +56,7 @@ type Options struct {
|
||||
// jinput represents a record producing input from a formatted file or pipe.
|
||||
type jinput struct {
|
||||
options *Options
|
||||
reader *jsoniter.Decoder
|
||||
reader *bufio.Reader
|
||||
firstRow []string
|
||||
header []string
|
||||
minOutputLength int
|
||||
@@ -75,7 +74,7 @@ type jinput struct {
|
||||
func New(opts *Options) (format.Select, error) {
|
||||
reader := &jinput{
|
||||
options: opts,
|
||||
reader: jsoniter.NewDecoder(opts.ReadFrom),
|
||||
reader: bufio.NewReader(opts.ReadFrom),
|
||||
}
|
||||
reader.stats.BytesScanned = opts.StreamSize
|
||||
reader.stats.BytesProcessed = 0
|
||||
@@ -90,26 +89,21 @@ func (reader *jinput) Progress() bool {
|
||||
}
|
||||
|
||||
// UpdateBytesProcessed - populates the bytes Processed
|
||||
func (reader *jinput) UpdateBytesProcessed(record map[string]interface{}) {
|
||||
out, _ := json.Marshal(record)
|
||||
reader.stats.BytesProcessed += int64(len(out))
|
||||
func (reader *jinput) UpdateBytesProcessed(size int64) {
|
||||
reader.stats.BytesProcessed += size
|
||||
}
|
||||
|
||||
// Read the file and returns map[string]interface{}
|
||||
func (reader *jinput) Read() (map[string]interface{}, error) {
|
||||
dec := reader.reader
|
||||
var record interface{}
|
||||
for {
|
||||
err := dec.Decode(&record)
|
||||
// Read the file and returns
|
||||
func (reader *jinput) Read() ([]byte, error) {
|
||||
data, err := reader.reader.ReadBytes('\n')
|
||||
if err != nil {
|
||||
if err == io.EOF || err == io.ErrClosedPipe {
|
||||
break
|
||||
err = nil
|
||||
} else {
|
||||
err = format.ErrJSONParsingError
|
||||
}
|
||||
if err != nil {
|
||||
return nil, format.ErrJSONParsingError
|
||||
}
|
||||
return record.(map[string]interface{}), nil
|
||||
}
|
||||
return nil, nil
|
||||
return data, err
|
||||
}
|
||||
|
||||
// OutputFieldDelimiter - returns the delimiter specified in input request
|
||||
|
||||
Reference in New Issue
Block a user