mirror of
https://github.com/minio/minio.git
synced 2025-11-07 21:02:58 -05:00
Performance improvements to SELECT API on certain query operations (#6752)
This improves the performance of certain queries dramatically, such as 'count(*)' etc. Without this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m42.464s user 0m0.071s sys 0m0.010s ``` With this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m17.603s user 0m0.093s sys 0m0.008s ``` Almost a 250% improvement in performance. This PR avoids a lot of type conversions and instead relies on raw sequences of data and interprets them lazily. ``` benchcmp old new benchmark old ns/op new ns/op delta BenchmarkSQLAggregate_100K-4 551213 259782 -52.87% BenchmarkSQLAggregate_1M-4 6981901985 2432413729 -65.16% BenchmarkSQLAggregate_2M-4 13511978488 4536903552 -66.42% BenchmarkSQLAggregate_10M-4 68427084908 23266283336 -66.00% benchmark old allocs new allocs delta BenchmarkSQLAggregate_100K-4 2366 485 -79.50% BenchmarkSQLAggregate_1M-4 47455492 21462860 -54.77% BenchmarkSQLAggregate_2M-4 95163637 43110771 -54.70% BenchmarkSQLAggregate_10M-4 476959550 216906510 -54.52% benchmark old bytes new bytes delta BenchmarkSQLAggregate_100K-4 1233079 1086024 -11.93% BenchmarkSQLAggregate_1M-4 2607984120 557038536 -78.64% BenchmarkSQLAggregate_2M-4 5254103616 1128149168 -78.53% BenchmarkSQLAggregate_10M-4 26443524872 5722715992 -78.36% ```
This commit is contained in:
committed by
kannappanr
parent
f9779b24ad
commit
7e1661f4fa
@@ -29,6 +29,7 @@ import (
|
||||
"github.com/minio/minio/pkg/event"
|
||||
"github.com/minio/minio/pkg/hash"
|
||||
"github.com/minio/minio/pkg/s3select"
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
)
|
||||
|
||||
// APIError structure
|
||||
@@ -1655,7 +1656,8 @@ func toAPIErrorCode(ctx context.Context, err error) (apiErr APIErrorCode) {
|
||||
apiErr = ErrEvaluatorBindingDoesNotExist
|
||||
case s3select.ErrMissingHeaders:
|
||||
apiErr = ErrMissingHeaders
|
||||
|
||||
case format.ErrParseInvalidPathComponent:
|
||||
apiErr = ErrMissingHeaders
|
||||
}
|
||||
|
||||
// Compression errors
|
||||
|
||||
@@ -230,9 +230,7 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
|
||||
|
||||
}
|
||||
if selectReq.InputSerialization.JSON != nil {
|
||||
if selectReq.InputSerialization.JSON.Type != s3select.JSONTypeDocument &&
|
||||
selectReq.InputSerialization.JSON.Type != s3select.JSONLinesType &&
|
||||
selectReq.InputSerialization.JSON.Type != "" {
|
||||
if selectReq.InputSerialization.JSON.Type != s3select.JSONLinesType {
|
||||
writeErrorResponse(w, ErrInvalidJSONType, r.URL)
|
||||
return
|
||||
}
|
||||
@@ -255,7 +253,16 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
|
||||
reader := readahead.NewReader(gr)
|
||||
defer reader.Close()
|
||||
|
||||
s3s, err := s3select.New(reader, objInfo.GetActualSize(), selectReq)
|
||||
size := objInfo.Size
|
||||
if objInfo.IsCompressed() {
|
||||
size = objInfo.GetActualSize()
|
||||
if size < 0 {
|
||||
writeErrorResponse(w, toAPIErrorCode(ctx, errInvalidDecompressedSize), r.URL)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
s3s, err := s3select.New(reader, size, selectReq)
|
||||
if err != nil {
|
||||
writeErrorResponse(w, toAPIErrorCode(ctx, err), r.URL)
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user