mirror of
https://github.com/minio/minio.git
synced 2025-01-14 16:25:01 -05:00
7e1661f4fa
This improves the performance of certain queries dramatically, such as 'count(*)' etc. Without this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m42.464s user 0m0.071s sys 0m0.010s ``` With this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m17.603s user 0m0.093s sys 0m0.008s ``` Almost a 250% improvement in performance. This PR avoids a lot of type conversions and instead relies on raw sequences of data and interprets them lazily. ``` benchcmp old new benchmark old ns/op new ns/op delta BenchmarkSQLAggregate_100K-4 551213 259782 -52.87% BenchmarkSQLAggregate_1M-4 6981901985 2432413729 -65.16% BenchmarkSQLAggregate_2M-4 13511978488 4536903552 -66.42% BenchmarkSQLAggregate_10M-4 68427084908 23266283336 -66.00% benchmark old allocs new allocs delta BenchmarkSQLAggregate_100K-4 2366 485 -79.50% BenchmarkSQLAggregate_1M-4 47455492 21462860 -54.77% BenchmarkSQLAggregate_2M-4 95163637 43110771 -54.70% BenchmarkSQLAggregate_10M-4 476959550 216906510 -54.52% benchmark old bytes new bytes delta BenchmarkSQLAggregate_100K-4 1233079 1086024 -11.93% BenchmarkSQLAggregate_1M-4 2607984120 557038536 -78.64% BenchmarkSQLAggregate_2M-4 5254103616 1128149168 -78.53% BenchmarkSQLAggregate_10M-4 26443524872 5722715992 -78.36% ```
64 lines
1.9 KiB
Go
64 lines
1.9 KiB
Go
/*
|
|
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package format
|
|
|
|
import "encoding/xml"
|
|
|
|
// Select Interface helper methods, implementing features needed for
|
|
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
|
|
type Select interface {
|
|
Type() Type
|
|
Read() ([]byte, error)
|
|
Header() []string
|
|
HasHeader() bool
|
|
OutputFieldDelimiter() string
|
|
UpdateBytesProcessed(int64)
|
|
Expression() string
|
|
UpdateBytesReturned(int64)
|
|
CreateStatXML() (string, error)
|
|
CreateProgressXML() (string, error)
|
|
ColNameErrs(columnNames []string) error
|
|
Progress() bool
|
|
}
|
|
|
|
// Progress represents a struct that represents the format for XML of the
|
|
// progress messages
|
|
type Progress struct {
|
|
XMLName xml.Name `xml:"Progress" json:"-"`
|
|
BytesScanned int64 `xml:"BytesScanned"`
|
|
BytesProcessed int64 `xml:"BytesProcessed"`
|
|
BytesReturned int64 `xml:"BytesReturned"`
|
|
}
|
|
|
|
// Stats represents a struct that represents the format for XML of the stat
|
|
// messages
|
|
type Stats struct {
|
|
XMLName xml.Name `xml:"Stats" json:"-"`
|
|
BytesScanned int64 `xml:"BytesScanned"`
|
|
BytesProcessed int64 `xml:"BytesProcessed"`
|
|
BytesReturned int64 `xml:"BytesReturned"`
|
|
}
|
|
|
|
// Type different types of support data format types.
|
|
type Type string
|
|
|
|
// Different data format types.
|
|
const (
|
|
JSON Type = "json"
|
|
CSV Type = "csv"
|
|
)
|