mirror of
https://github.com/minio/minio.git
synced 2025-11-23 19:17:43 -05:00
Performance improvements to SELECT API on certain query operations (#6752)
This improves the performance of certain queries dramatically, such as 'count(*)' etc. Without this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m42.464s user 0m0.071s sys 0m0.010s ``` With this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m17.603s user 0m0.093s sys 0m0.008s ``` Almost a 250% improvement in performance. This PR avoids a lot of type conversions and instead relies on raw sequences of data and interprets them lazily. ``` benchcmp old new benchmark old ns/op new ns/op delta BenchmarkSQLAggregate_100K-4 551213 259782 -52.87% BenchmarkSQLAggregate_1M-4 6981901985 2432413729 -65.16% BenchmarkSQLAggregate_2M-4 13511978488 4536903552 -66.42% BenchmarkSQLAggregate_10M-4 68427084908 23266283336 -66.00% benchmark old allocs new allocs delta BenchmarkSQLAggregate_100K-4 2366 485 -79.50% BenchmarkSQLAggregate_1M-4 47455492 21462860 -54.77% BenchmarkSQLAggregate_2M-4 95163637 43110771 -54.70% BenchmarkSQLAggregate_10M-4 476959550 216906510 -54.52% benchmark old bytes new bytes delta BenchmarkSQLAggregate_100K-4 1233079 1086024 -11.93% BenchmarkSQLAggregate_1M-4 2607984120 557038536 -78.64% BenchmarkSQLAggregate_2M-4 5254103616 1128149168 -78.53% BenchmarkSQLAggregate_10M-4 26443524872 5722715992 -78.36% ```
This commit is contained in:
committed by
kannappanr
parent
f9779b24ad
commit
7e1661f4fa
@@ -19,17 +19,17 @@ package s3select
|
||||
import (
|
||||
"bytes"
|
||||
"compress/bzip2"
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
humanize "github.com/dustin/go-humanize"
|
||||
"github.com/klauspost/pgzip"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
"github.com/minio/minio/pkg/s3select/format/csv"
|
||||
"github.com/minio/minio/pkg/s3select/format/json"
|
||||
|
||||
humanize "github.com/dustin/go-humanize"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -40,18 +40,6 @@ const (
|
||||
continuationTime time.Duration = 5 * time.Second
|
||||
)
|
||||
|
||||
// ParseSelectTokens tokenizes the select query into required Columns, Alias, limit value
|
||||
// where clause, aggregate functions, myFunctions, error.
|
||||
type ParseSelectTokens struct {
|
||||
reqCols []string
|
||||
alias string
|
||||
myLimit int64
|
||||
whereClause interface{}
|
||||
aggFunctionNames []string
|
||||
myFuncs *SelectFuncs
|
||||
myErr error
|
||||
}
|
||||
|
||||
// Row is a Struct for keeping track of key aspects of a row.
|
||||
type Row struct {
|
||||
record string
|
||||
@@ -60,7 +48,7 @@ type Row struct {
|
||||
|
||||
// This function replaces "",'' with `` for the select parser
|
||||
func cleanExpr(expr string) string {
|
||||
r := strings.NewReplacer("\"", "`", "'", "`")
|
||||
r := strings.NewReplacer("\"", "`")
|
||||
return r.Replace(expr)
|
||||
}
|
||||
|
||||
@@ -68,7 +56,7 @@ func cleanExpr(expr string) string {
|
||||
func New(reader io.Reader, size int64, req ObjectSelectRequest) (s3s format.Select, err error) {
|
||||
switch req.InputSerialization.CompressionType {
|
||||
case SelectCompressionGZIP:
|
||||
if reader, err = gzip.NewReader(reader); err != nil {
|
||||
if reader, err = pgzip.NewReader(reader); err != nil {
|
||||
return nil, format.ErrTruncatedInput
|
||||
}
|
||||
case SelectCompressionBZIP:
|
||||
@@ -119,7 +107,7 @@ func New(reader io.Reader, size int64, req ObjectSelectRequest) (s3s format.Sele
|
||||
// response writer in a streaming fashion so that the client can actively use
|
||||
// the results before the query is finally finished executing. The
|
||||
func Execute(writer io.Writer, f format.Select) error {
|
||||
myRow := make(chan Row, 1000)
|
||||
rowCh := make(chan Row)
|
||||
curBuf := bytes.NewBuffer(make([]byte, humanize.MiByte))
|
||||
curBuf.Reset()
|
||||
progressTicker := time.NewTicker(progressTime)
|
||||
@@ -127,10 +115,10 @@ func Execute(writer io.Writer, f format.Select) error {
|
||||
defer progressTicker.Stop()
|
||||
defer continuationTimer.Stop()
|
||||
|
||||
go runSelectParser(f, myRow)
|
||||
go runSelectParser(f, rowCh)
|
||||
for {
|
||||
select {
|
||||
case row, ok := <-myRow:
|
||||
case row, ok := <-rowCh:
|
||||
if ok && row.err != nil {
|
||||
_, err := writeErrorMessage(row.err, curBuf).WriteTo(writer)
|
||||
flusher, okFlush := writer.(http.Flusher)
|
||||
@@ -141,7 +129,7 @@ func Execute(writer io.Writer, f format.Select) error {
|
||||
return err
|
||||
}
|
||||
curBuf.Reset()
|
||||
close(myRow)
|
||||
close(rowCh)
|
||||
return nil
|
||||
} else if ok {
|
||||
_, err := writeRecordMessage(row.record, curBuf).WriteTo(writer)
|
||||
|
||||
Reference in New Issue
Block a user