mirror of
https://github.com/minio/minio.git
synced 2025-01-11 23:13:23 -05:00
7e1661f4fa
This improves the performance of certain queries dramatically, such as 'count(*)' etc. Without this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m42.464s user 0m0.071s sys 0m0.010s ``` With this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m17.603s user 0m0.093s sys 0m0.008s ``` Almost a 250% improvement in performance. This PR avoids a lot of type conversions and instead relies on raw sequences of data and interprets them lazily. ``` benchcmp old new benchmark old ns/op new ns/op delta BenchmarkSQLAggregate_100K-4 551213 259782 -52.87% BenchmarkSQLAggregate_1M-4 6981901985 2432413729 -65.16% BenchmarkSQLAggregate_2M-4 13511978488 4536903552 -66.42% BenchmarkSQLAggregate_10M-4 68427084908 23266283336 -66.00% benchmark old allocs new allocs delta BenchmarkSQLAggregate_100K-4 2366 485 -79.50% BenchmarkSQLAggregate_1M-4 47455492 21462860 -54.77% BenchmarkSQLAggregate_2M-4 95163637 43110771 -54.70% BenchmarkSQLAggregate_10M-4 476959550 216906510 -54.52% benchmark old bytes new bytes delta BenchmarkSQLAggregate_100K-4 1233079 1086024 -11.93% BenchmarkSQLAggregate_1M-4 2607984120 557038536 -78.64% BenchmarkSQLAggregate_2M-4 5254103616 1128149168 -78.53% BenchmarkSQLAggregate_10M-4 26443524872 5722715992 -78.36% ```
224 lines
6.8 KiB
Go
224 lines
6.8 KiB
Go
/*
|
|
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package s3select
|
|
|
|
import (
|
|
"strings"
|
|
|
|
"github.com/tidwall/gjson"
|
|
"github.com/xwb1989/sqlparser"
|
|
|
|
"github.com/minio/minio/pkg/s3select/format"
|
|
)
|
|
|
|
// stringOps is a function which handles the case in a clause
|
|
// if there is a need to perform a string function
|
|
func stringOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string {
|
|
var value string
|
|
funcName := myFunc.Name.CompliantName()
|
|
switch tempArg := myFunc.Exprs[0].(type) {
|
|
case *sqlparser.AliasedExpr:
|
|
switch col := tempArg.Expr.(type) {
|
|
case *sqlparser.FuncExpr:
|
|
// myReturnVal is actually the tail recursive value being used in the eval func.
|
|
return applyStrFunc(gjson.Parse(myReturnVal), funcName)
|
|
case *sqlparser.ColName:
|
|
value = applyStrFunc(gjson.GetBytes(record, col.Name.CompliantName()), funcName)
|
|
case *sqlparser.SQLVal:
|
|
value = applyStrFunc(gjson.ParseBytes(col.Val), funcName)
|
|
}
|
|
}
|
|
return value
|
|
}
|
|
|
|
// coalOps is a function which decomposes a COALESCE func expr into its struct.
|
|
func coalOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string {
|
|
myArgs := make([]string, len(myFunc.Exprs))
|
|
|
|
for i, expr := range myFunc.Exprs {
|
|
switch tempArg := expr.(type) {
|
|
case *sqlparser.AliasedExpr:
|
|
switch col := tempArg.Expr.(type) {
|
|
case *sqlparser.FuncExpr:
|
|
// myReturnVal is actually the tail recursive value being used in the eval func.
|
|
return myReturnVal
|
|
case *sqlparser.ColName:
|
|
myArgs[i] = gjson.GetBytes(record, col.Name.CompliantName()).String()
|
|
case *sqlparser.SQLVal:
|
|
myArgs[i] = string(col.Val)
|
|
}
|
|
}
|
|
}
|
|
return processCoalNoIndex(myArgs)
|
|
}
|
|
|
|
// nullOps is a function which decomposes a NullIf func expr into its struct.
|
|
func nullOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string {
|
|
myArgs := make([]string, 2)
|
|
|
|
for i, expr := range myFunc.Exprs {
|
|
switch tempArg := expr.(type) {
|
|
case *sqlparser.AliasedExpr:
|
|
switch col := tempArg.Expr.(type) {
|
|
case *sqlparser.FuncExpr:
|
|
return myReturnVal
|
|
case *sqlparser.ColName:
|
|
myArgs[i] = gjson.GetBytes(record, col.Name.CompliantName()).String()
|
|
case *sqlparser.SQLVal:
|
|
myArgs[i] = string(col.Val)
|
|
}
|
|
}
|
|
}
|
|
if myArgs[0] == myArgs[1] {
|
|
return ""
|
|
}
|
|
return myArgs[0]
|
|
}
|
|
|
|
// isValidString is a function that ensures the
|
|
// current index is one with a StrFunc
|
|
func isValidFunc(myList []int, index int) bool {
|
|
if myList == nil {
|
|
return false
|
|
}
|
|
for _, i := range myList {
|
|
if i == index {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// processCoalNoIndex is a function which evaluates a given COALESCE clause.
|
|
func processCoalNoIndex(coalStore []string) string {
|
|
for _, coal := range coalStore {
|
|
if coal != "null" && coal != "missing" && coal != "" {
|
|
return coal
|
|
}
|
|
}
|
|
return "null"
|
|
}
|
|
|
|
// evaluateFuncExpr is a function that allows for tail recursive evaluation of
|
|
// nested function expressions
|
|
func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, record []byte) string {
|
|
if myVal == nil {
|
|
return myReturnVal
|
|
}
|
|
// retrieve all the relevant arguments of the function
|
|
var mySubFunc []*sqlparser.FuncExpr
|
|
mySubFunc = make([]*sqlparser.FuncExpr, len(myVal.Exprs))
|
|
for i, expr := range myVal.Exprs {
|
|
switch col := expr.(type) {
|
|
case *sqlparser.AliasedExpr:
|
|
switch temp := col.Expr.(type) {
|
|
case *sqlparser.FuncExpr:
|
|
mySubFunc[i] = temp
|
|
}
|
|
}
|
|
}
|
|
// Need to do tree recursion so as to explore all possible directions of the
|
|
// nested function recursion
|
|
for i := 0; i < len(mySubFunc); i++ {
|
|
if supportedString(myVal.Name.CompliantName()) {
|
|
if mySubFunc != nil {
|
|
return stringOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record))
|
|
}
|
|
return stringOps(myVal, record, myReturnVal)
|
|
} else if strings.ToUpper(myVal.Name.CompliantName()) == "NULLIF" {
|
|
if mySubFunc != nil {
|
|
return nullOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record))
|
|
}
|
|
return nullOps(myVal, record, myReturnVal)
|
|
} else if strings.ToUpper(myVal.Name.CompliantName()) == "COALESCE" {
|
|
if mySubFunc != nil {
|
|
return coalOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record))
|
|
}
|
|
return coalOps(myVal, record, myReturnVal)
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// evaluateFuncErr is a function that flags errors in nested functions.
|
|
func evaluateFuncErr(myVal *sqlparser.FuncExpr, reader format.Select) error {
|
|
if myVal == nil {
|
|
return nil
|
|
}
|
|
if !supportedFunc(myVal.Name.CompliantName()) {
|
|
return ErrUnsupportedSQLOperation
|
|
}
|
|
for _, expr := range myVal.Exprs {
|
|
switch tempArg := expr.(type) {
|
|
case *sqlparser.StarExpr:
|
|
return ErrParseUnsupportedCallWithStar
|
|
case *sqlparser.AliasedExpr:
|
|
switch col := tempArg.Expr.(type) {
|
|
case *sqlparser.FuncExpr:
|
|
if err := evaluateFuncErr(col, reader); err != nil {
|
|
return err
|
|
}
|
|
case *sqlparser.ColName:
|
|
if err := reader.ColNameErrs([]string{col.Name.CompliantName()}); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// evaluateIsExpr is a function for evaluating expressions of the form "column is ...."
|
|
func evaluateIsExpr(myFunc *sqlparser.IsExpr, row []byte, alias string) (bool, error) {
|
|
getMyVal := func() (myVal string) {
|
|
switch myIs := myFunc.Expr.(type) {
|
|
// case for literal val
|
|
case *sqlparser.SQLVal:
|
|
myVal = string(myIs.Val)
|
|
// case for nested func val
|
|
case *sqlparser.FuncExpr:
|
|
myVal = evaluateFuncExpr(myIs, "", row)
|
|
// case for col val
|
|
case *sqlparser.ColName:
|
|
myVal = gjson.GetBytes(row, myIs.Name.CompliantName()).String()
|
|
}
|
|
return myVal
|
|
}
|
|
|
|
operator := strings.ToLower(myFunc.Operator)
|
|
switch operator {
|
|
case "is null":
|
|
return getMyVal() == "", nil
|
|
case "is not null":
|
|
return getMyVal() != "", nil
|
|
default:
|
|
return false, ErrUnsupportedSQLOperation
|
|
}
|
|
}
|
|
|
|
// supportedString is a function that checks whether the function is a supported
|
|
// string one
|
|
func supportedString(strFunc string) bool {
|
|
return format.StringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER"})
|
|
}
|
|
|
|
// supportedFunc is a function that checks whether the function is a supported
|
|
// S3 one.
|
|
func supportedFunc(strFunc string) bool {
|
|
return format.StringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER", "COALESCE", "NULLIF"})
|
|
}
|