mirror of
https://github.com/minio/minio.git
synced 2025-11-20 18:06:10 -05:00
Performance improvements to SELECT API on certain query operations (#6752)
This improves the performance of certain queries dramatically, such as 'count(*)' etc. Without this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m42.464s user 0m0.071s sys 0m0.010s ``` With this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m17.603s user 0m0.093s sys 0m0.008s ``` Almost a 250% improvement in performance. This PR avoids a lot of type conversions and instead relies on raw sequences of data and interprets them lazily. ``` benchcmp old new benchmark old ns/op new ns/op delta BenchmarkSQLAggregate_100K-4 551213 259782 -52.87% BenchmarkSQLAggregate_1M-4 6981901985 2432413729 -65.16% BenchmarkSQLAggregate_2M-4 13511978488 4536903552 -66.42% BenchmarkSQLAggregate_10M-4 68427084908 23266283336 -66.00% benchmark old allocs new allocs delta BenchmarkSQLAggregate_100K-4 2366 485 -79.50% BenchmarkSQLAggregate_1M-4 47455492 21462860 -54.77% BenchmarkSQLAggregate_2M-4 95163637 43110771 -54.70% BenchmarkSQLAggregate_10M-4 476959550 216906510 -54.52% benchmark old bytes new bytes delta BenchmarkSQLAggregate_100K-4 1233079 1086024 -11.93% BenchmarkSQLAggregate_1M-4 2607984120 557038536 -78.64% BenchmarkSQLAggregate_2M-4 5254103616 1128149168 -78.53% BenchmarkSQLAggregate_10M-4 26443524872 5722715992 -78.36% ```
This commit is contained in:
committed by
kannappanr
parent
f9779b24ad
commit
7e1661f4fa
@@ -19,13 +19,15 @@ package s3select
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/xwb1989/sqlparser"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
)
|
||||
|
||||
// stringOps is a function which handles the case in a clause if there is a need
|
||||
// to perform a string function
|
||||
func stringOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) string {
|
||||
// stringOps is a function which handles the case in a clause
|
||||
// if there is a need to perform a string function
|
||||
func stringOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string {
|
||||
var value string
|
||||
funcName := myFunc.Name.CompliantName()
|
||||
switch tempArg := myFunc.Exprs[0].(type) {
|
||||
@@ -33,29 +35,29 @@ func stringOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) st
|
||||
switch col := tempArg.Expr.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
// myReturnVal is actually the tail recursive value being used in the eval func.
|
||||
return applyStrFunc(myReturnVal, funcName)
|
||||
return applyStrFunc(gjson.Parse(myReturnVal), funcName)
|
||||
case *sqlparser.ColName:
|
||||
value = applyStrFunc(jsonValue(col.Name.CompliantName(), record), funcName)
|
||||
value = applyStrFunc(gjson.GetBytes(record, col.Name.CompliantName()), funcName)
|
||||
case *sqlparser.SQLVal:
|
||||
value = applyStrFunc(string(col.Val), funcName)
|
||||
value = applyStrFunc(gjson.ParseBytes(col.Val), funcName)
|
||||
}
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
// coalOps is a function which decomposes a COALESCE func expr into its struct.
|
||||
func coalOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) string {
|
||||
func coalOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string {
|
||||
myArgs := make([]string, len(myFunc.Exprs))
|
||||
|
||||
for i := 0; i < len(myFunc.Exprs); i++ {
|
||||
switch tempArg := myFunc.Exprs[i].(type) {
|
||||
for i, expr := range myFunc.Exprs {
|
||||
switch tempArg := expr.(type) {
|
||||
case *sqlparser.AliasedExpr:
|
||||
switch col := tempArg.Expr.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
// myReturnVal is actually the tail recursive value being used in the eval func.
|
||||
return myReturnVal
|
||||
case *sqlparser.ColName:
|
||||
myArgs[i] = jsonValue(col.Name.CompliantName(), record)
|
||||
myArgs[i] = gjson.GetBytes(record, col.Name.CompliantName()).String()
|
||||
case *sqlparser.SQLVal:
|
||||
myArgs[i] = string(col.Val)
|
||||
}
|
||||
@@ -65,54 +67,47 @@ func coalOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) stri
|
||||
}
|
||||
|
||||
// nullOps is a function which decomposes a NullIf func expr into its struct.
|
||||
func nullOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) string {
|
||||
func nullOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string {
|
||||
myArgs := make([]string, 2)
|
||||
|
||||
for i := 0; i < len(myFunc.Exprs); i++ {
|
||||
switch tempArg := myFunc.Exprs[i].(type) {
|
||||
for i, expr := range myFunc.Exprs {
|
||||
switch tempArg := expr.(type) {
|
||||
case *sqlparser.AliasedExpr:
|
||||
switch col := tempArg.Expr.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
return myReturnVal
|
||||
case *sqlparser.ColName:
|
||||
myArgs[i] = jsonValue(col.Name.CompliantName(), record)
|
||||
myArgs[i] = gjson.GetBytes(record, col.Name.CompliantName()).String()
|
||||
case *sqlparser.SQLVal:
|
||||
myArgs[i] = string(col.Val)
|
||||
}
|
||||
}
|
||||
}
|
||||
return processNullIf(myArgs)
|
||||
if myArgs[0] == myArgs[1] {
|
||||
return ""
|
||||
}
|
||||
return myArgs[0]
|
||||
}
|
||||
|
||||
// isValidString is a function that ensures the current index is one with a
|
||||
// StrFunc
|
||||
// isValidString is a function that ensures the
|
||||
// current index is one with a StrFunc
|
||||
func isValidFunc(myList []int, index int) bool {
|
||||
if myList == nil {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < len(myList); i++ {
|
||||
if myList[i] == index {
|
||||
for _, i := range myList {
|
||||
if i == index {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// processNullIf is a function that evaluates a given NULLIF clause.
|
||||
func processNullIf(nullStore []string) string {
|
||||
nullValOne := nullStore[0]
|
||||
nullValTwo := nullStore[1]
|
||||
if nullValOne == nullValTwo {
|
||||
return ""
|
||||
}
|
||||
return nullValOne
|
||||
}
|
||||
|
||||
// processCoalNoIndex is a function which evaluates a given COALESCE clause.
|
||||
func processCoalNoIndex(coalStore []string) string {
|
||||
for i := 0; i < len(coalStore); i++ {
|
||||
if coalStore[i] != "null" && coalStore[i] != "missing" && coalStore[i] != "" {
|
||||
return coalStore[i]
|
||||
for _, coal := range coalStore {
|
||||
if coal != "null" && coal != "missing" && coal != "" {
|
||||
return coal
|
||||
}
|
||||
}
|
||||
return "null"
|
||||
@@ -120,15 +115,15 @@ func processCoalNoIndex(coalStore []string) string {
|
||||
|
||||
// evaluateFuncExpr is a function that allows for tail recursive evaluation of
|
||||
// nested function expressions
|
||||
func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, myRecord string) string {
|
||||
func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, record []byte) string {
|
||||
if myVal == nil {
|
||||
return myReturnVal
|
||||
}
|
||||
// retrieve all the relevant arguments of the function
|
||||
var mySubFunc []*sqlparser.FuncExpr
|
||||
mySubFunc = make([]*sqlparser.FuncExpr, len(myVal.Exprs))
|
||||
for i := 0; i < len(myVal.Exprs); i++ {
|
||||
switch col := myVal.Exprs[i].(type) {
|
||||
for i, expr := range myVal.Exprs {
|
||||
switch col := expr.(type) {
|
||||
case *sqlparser.AliasedExpr:
|
||||
switch temp := col.Expr.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
@@ -141,19 +136,19 @@ func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, myRecord st
|
||||
for i := 0; i < len(mySubFunc); i++ {
|
||||
if supportedString(myVal.Name.CompliantName()) {
|
||||
if mySubFunc != nil {
|
||||
return stringOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord))
|
||||
return stringOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record))
|
||||
}
|
||||
return stringOps(myVal, myRecord, myReturnVal)
|
||||
return stringOps(myVal, record, myReturnVal)
|
||||
} else if strings.ToUpper(myVal.Name.CompliantName()) == "NULLIF" {
|
||||
if mySubFunc != nil {
|
||||
return nullOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord))
|
||||
return nullOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record))
|
||||
}
|
||||
return nullOps(myVal, myRecord, myReturnVal)
|
||||
return nullOps(myVal, record, myReturnVal)
|
||||
} else if strings.ToUpper(myVal.Name.CompliantName()) == "COALESCE" {
|
||||
if mySubFunc != nil {
|
||||
return coalOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord))
|
||||
return coalOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record))
|
||||
}
|
||||
return coalOps(myVal, myRecord, myReturnVal)
|
||||
return coalOps(myVal, record, myReturnVal)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
@@ -167,8 +162,8 @@ func evaluateFuncErr(myVal *sqlparser.FuncExpr, reader format.Select) error {
|
||||
if !supportedFunc(myVal.Name.CompliantName()) {
|
||||
return ErrUnsupportedSQLOperation
|
||||
}
|
||||
for i := 0; i < len(myVal.Exprs); i++ {
|
||||
switch tempArg := myVal.Exprs[i].(type) {
|
||||
for _, expr := range myVal.Exprs {
|
||||
switch tempArg := expr.(type) {
|
||||
case *sqlparser.StarExpr:
|
||||
return ErrParseUnsupportedCallWithStar
|
||||
case *sqlparser.AliasedExpr:
|
||||
@@ -188,29 +183,31 @@ func evaluateFuncErr(myVal *sqlparser.FuncExpr, reader format.Select) error {
|
||||
}
|
||||
|
||||
// evaluateIsExpr is a function for evaluating expressions of the form "column is ...."
|
||||
func evaluateIsExpr(myFunc *sqlparser.IsExpr, row string, alias string) (bool, error) {
|
||||
operator := myFunc.Operator
|
||||
var myVal string
|
||||
switch myIs := myFunc.Expr.(type) {
|
||||
// case for literal val
|
||||
case *sqlparser.SQLVal:
|
||||
myVal = string(myIs.Val)
|
||||
// case for nested func val
|
||||
case *sqlparser.FuncExpr:
|
||||
myVal = evaluateFuncExpr(myIs, "", row)
|
||||
// case for col val
|
||||
case *sqlparser.ColName:
|
||||
myVal = jsonValue(myIs.Name.CompliantName(), row)
|
||||
func evaluateIsExpr(myFunc *sqlparser.IsExpr, row []byte, alias string) (bool, error) {
|
||||
getMyVal := func() (myVal string) {
|
||||
switch myIs := myFunc.Expr.(type) {
|
||||
// case for literal val
|
||||
case *sqlparser.SQLVal:
|
||||
myVal = string(myIs.Val)
|
||||
// case for nested func val
|
||||
case *sqlparser.FuncExpr:
|
||||
myVal = evaluateFuncExpr(myIs, "", row)
|
||||
// case for col val
|
||||
case *sqlparser.ColName:
|
||||
myVal = gjson.GetBytes(row, myIs.Name.CompliantName()).String()
|
||||
}
|
||||
return myVal
|
||||
}
|
||||
// case to evaluate is null
|
||||
if strings.ToLower(operator) == "is null" {
|
||||
return myVal == "", nil
|
||||
|
||||
operator := strings.ToLower(myFunc.Operator)
|
||||
switch operator {
|
||||
case "is null":
|
||||
return getMyVal() == "", nil
|
||||
case "is not null":
|
||||
return getMyVal() != "", nil
|
||||
default:
|
||||
return false, ErrUnsupportedSQLOperation
|
||||
}
|
||||
// case to evaluate is not null
|
||||
if strings.ToLower(operator) == "is not null" {
|
||||
return myVal != "", nil
|
||||
}
|
||||
return false, ErrUnsupportedSQLOperation
|
||||
}
|
||||
|
||||
// supportedString is a function that checks whether the function is a supported
|
||||
|
||||
Reference in New Issue
Block a user