mirror of
https://github.com/minio/minio.git
synced 2025-11-11 06:20:14 -05:00
Performance improvements to SELECT API on certain query operations (#6752)
This improves the performance of certain queries dramatically, such as 'count(*)' etc. Without this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m42.464s user 0m0.071s sys 0m0.010s ``` With this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m17.603s user 0m0.093s sys 0m0.008s ``` Almost a 250% improvement in performance. This PR avoids a lot of type conversions and instead relies on raw sequences of data and interprets them lazily. ``` benchcmp old new benchmark old ns/op new ns/op delta BenchmarkSQLAggregate_100K-4 551213 259782 -52.87% BenchmarkSQLAggregate_1M-4 6981901985 2432413729 -65.16% BenchmarkSQLAggregate_2M-4 13511978488 4536903552 -66.42% BenchmarkSQLAggregate_10M-4 68427084908 23266283336 -66.00% benchmark old allocs new allocs delta BenchmarkSQLAggregate_100K-4 2366 485 -79.50% BenchmarkSQLAggregate_1M-4 47455492 21462860 -54.77% BenchmarkSQLAggregate_2M-4 95163637 43110771 -54.70% BenchmarkSQLAggregate_10M-4 476959550 216906510 -54.52% benchmark old bytes new bytes delta BenchmarkSQLAggregate_100K-4 1233079 1086024 -11.93% BenchmarkSQLAggregate_1M-4 2607984120 557038536 -78.64% BenchmarkSQLAggregate_2M-4 5254103616 1128149168 -78.53% BenchmarkSQLAggregate_10M-4 26443524872 5722715992 -78.36% ```
This commit is contained in:
committed by
kannappanr
parent
f9779b24ad
commit
7e1661f4fa
@@ -19,13 +19,15 @@ package s3select
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/xwb1989/sqlparser"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
)
|
||||
|
||||
// stringOps is a function which handles the case in a clause if there is a need
|
||||
// to perform a string function
|
||||
func stringOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) string {
|
||||
// stringOps is a function which handles the case in a clause
|
||||
// if there is a need to perform a string function
|
||||
func stringOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string {
|
||||
var value string
|
||||
funcName := myFunc.Name.CompliantName()
|
||||
switch tempArg := myFunc.Exprs[0].(type) {
|
||||
@@ -33,29 +35,29 @@ func stringOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) st
|
||||
switch col := tempArg.Expr.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
// myReturnVal is actually the tail recursive value being used in the eval func.
|
||||
return applyStrFunc(myReturnVal, funcName)
|
||||
return applyStrFunc(gjson.Parse(myReturnVal), funcName)
|
||||
case *sqlparser.ColName:
|
||||
value = applyStrFunc(jsonValue(col.Name.CompliantName(), record), funcName)
|
||||
value = applyStrFunc(gjson.GetBytes(record, col.Name.CompliantName()), funcName)
|
||||
case *sqlparser.SQLVal:
|
||||
value = applyStrFunc(string(col.Val), funcName)
|
||||
value = applyStrFunc(gjson.ParseBytes(col.Val), funcName)
|
||||
}
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
// coalOps is a function which decomposes a COALESCE func expr into its struct.
|
||||
func coalOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) string {
|
||||
func coalOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string {
|
||||
myArgs := make([]string, len(myFunc.Exprs))
|
||||
|
||||
for i := 0; i < len(myFunc.Exprs); i++ {
|
||||
switch tempArg := myFunc.Exprs[i].(type) {
|
||||
for i, expr := range myFunc.Exprs {
|
||||
switch tempArg := expr.(type) {
|
||||
case *sqlparser.AliasedExpr:
|
||||
switch col := tempArg.Expr.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
// myReturnVal is actually the tail recursive value being used in the eval func.
|
||||
return myReturnVal
|
||||
case *sqlparser.ColName:
|
||||
myArgs[i] = jsonValue(col.Name.CompliantName(), record)
|
||||
myArgs[i] = gjson.GetBytes(record, col.Name.CompliantName()).String()
|
||||
case *sqlparser.SQLVal:
|
||||
myArgs[i] = string(col.Val)
|
||||
}
|
||||
@@ -65,54 +67,47 @@ func coalOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) stri
|
||||
}
|
||||
|
||||
// nullOps is a function which decomposes a NullIf func expr into its struct.
|
||||
func nullOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) string {
|
||||
func nullOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string {
|
||||
myArgs := make([]string, 2)
|
||||
|
||||
for i := 0; i < len(myFunc.Exprs); i++ {
|
||||
switch tempArg := myFunc.Exprs[i].(type) {
|
||||
for i, expr := range myFunc.Exprs {
|
||||
switch tempArg := expr.(type) {
|
||||
case *sqlparser.AliasedExpr:
|
||||
switch col := tempArg.Expr.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
return myReturnVal
|
||||
case *sqlparser.ColName:
|
||||
myArgs[i] = jsonValue(col.Name.CompliantName(), record)
|
||||
myArgs[i] = gjson.GetBytes(record, col.Name.CompliantName()).String()
|
||||
case *sqlparser.SQLVal:
|
||||
myArgs[i] = string(col.Val)
|
||||
}
|
||||
}
|
||||
}
|
||||
return processNullIf(myArgs)
|
||||
if myArgs[0] == myArgs[1] {
|
||||
return ""
|
||||
}
|
||||
return myArgs[0]
|
||||
}
|
||||
|
||||
// isValidString is a function that ensures the current index is one with a
|
||||
// StrFunc
|
||||
// isValidString is a function that ensures the
|
||||
// current index is one with a StrFunc
|
||||
func isValidFunc(myList []int, index int) bool {
|
||||
if myList == nil {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < len(myList); i++ {
|
||||
if myList[i] == index {
|
||||
for _, i := range myList {
|
||||
if i == index {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// processNullIf is a function that evaluates a given NULLIF clause.
|
||||
func processNullIf(nullStore []string) string {
|
||||
nullValOne := nullStore[0]
|
||||
nullValTwo := nullStore[1]
|
||||
if nullValOne == nullValTwo {
|
||||
return ""
|
||||
}
|
||||
return nullValOne
|
||||
}
|
||||
|
||||
// processCoalNoIndex is a function which evaluates a given COALESCE clause.
|
||||
func processCoalNoIndex(coalStore []string) string {
|
||||
for i := 0; i < len(coalStore); i++ {
|
||||
if coalStore[i] != "null" && coalStore[i] != "missing" && coalStore[i] != "" {
|
||||
return coalStore[i]
|
||||
for _, coal := range coalStore {
|
||||
if coal != "null" && coal != "missing" && coal != "" {
|
||||
return coal
|
||||
}
|
||||
}
|
||||
return "null"
|
||||
@@ -120,15 +115,15 @@ func processCoalNoIndex(coalStore []string) string {
|
||||
|
||||
// evaluateFuncExpr is a function that allows for tail recursive evaluation of
|
||||
// nested function expressions
|
||||
func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, myRecord string) string {
|
||||
func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, record []byte) string {
|
||||
if myVal == nil {
|
||||
return myReturnVal
|
||||
}
|
||||
// retrieve all the relevant arguments of the function
|
||||
var mySubFunc []*sqlparser.FuncExpr
|
||||
mySubFunc = make([]*sqlparser.FuncExpr, len(myVal.Exprs))
|
||||
for i := 0; i < len(myVal.Exprs); i++ {
|
||||
switch col := myVal.Exprs[i].(type) {
|
||||
for i, expr := range myVal.Exprs {
|
||||
switch col := expr.(type) {
|
||||
case *sqlparser.AliasedExpr:
|
||||
switch temp := col.Expr.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
@@ -141,19 +136,19 @@ func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, myRecord st
|
||||
for i := 0; i < len(mySubFunc); i++ {
|
||||
if supportedString(myVal.Name.CompliantName()) {
|
||||
if mySubFunc != nil {
|
||||
return stringOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord))
|
||||
return stringOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record))
|
||||
}
|
||||
return stringOps(myVal, myRecord, myReturnVal)
|
||||
return stringOps(myVal, record, myReturnVal)
|
||||
} else if strings.ToUpper(myVal.Name.CompliantName()) == "NULLIF" {
|
||||
if mySubFunc != nil {
|
||||
return nullOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord))
|
||||
return nullOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record))
|
||||
}
|
||||
return nullOps(myVal, myRecord, myReturnVal)
|
||||
return nullOps(myVal, record, myReturnVal)
|
||||
} else if strings.ToUpper(myVal.Name.CompliantName()) == "COALESCE" {
|
||||
if mySubFunc != nil {
|
||||
return coalOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord))
|
||||
return coalOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record))
|
||||
}
|
||||
return coalOps(myVal, myRecord, myReturnVal)
|
||||
return coalOps(myVal, record, myReturnVal)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
@@ -167,8 +162,8 @@ func evaluateFuncErr(myVal *sqlparser.FuncExpr, reader format.Select) error {
|
||||
if !supportedFunc(myVal.Name.CompliantName()) {
|
||||
return ErrUnsupportedSQLOperation
|
||||
}
|
||||
for i := 0; i < len(myVal.Exprs); i++ {
|
||||
switch tempArg := myVal.Exprs[i].(type) {
|
||||
for _, expr := range myVal.Exprs {
|
||||
switch tempArg := expr.(type) {
|
||||
case *sqlparser.StarExpr:
|
||||
return ErrParseUnsupportedCallWithStar
|
||||
case *sqlparser.AliasedExpr:
|
||||
@@ -188,29 +183,31 @@ func evaluateFuncErr(myVal *sqlparser.FuncExpr, reader format.Select) error {
|
||||
}
|
||||
|
||||
// evaluateIsExpr is a function for evaluating expressions of the form "column is ...."
|
||||
func evaluateIsExpr(myFunc *sqlparser.IsExpr, row string, alias string) (bool, error) {
|
||||
operator := myFunc.Operator
|
||||
var myVal string
|
||||
switch myIs := myFunc.Expr.(type) {
|
||||
// case for literal val
|
||||
case *sqlparser.SQLVal:
|
||||
myVal = string(myIs.Val)
|
||||
// case for nested func val
|
||||
case *sqlparser.FuncExpr:
|
||||
myVal = evaluateFuncExpr(myIs, "", row)
|
||||
// case for col val
|
||||
case *sqlparser.ColName:
|
||||
myVal = jsonValue(myIs.Name.CompliantName(), row)
|
||||
func evaluateIsExpr(myFunc *sqlparser.IsExpr, row []byte, alias string) (bool, error) {
|
||||
getMyVal := func() (myVal string) {
|
||||
switch myIs := myFunc.Expr.(type) {
|
||||
// case for literal val
|
||||
case *sqlparser.SQLVal:
|
||||
myVal = string(myIs.Val)
|
||||
// case for nested func val
|
||||
case *sqlparser.FuncExpr:
|
||||
myVal = evaluateFuncExpr(myIs, "", row)
|
||||
// case for col val
|
||||
case *sqlparser.ColName:
|
||||
myVal = gjson.GetBytes(row, myIs.Name.CompliantName()).String()
|
||||
}
|
||||
return myVal
|
||||
}
|
||||
// case to evaluate is null
|
||||
if strings.ToLower(operator) == "is null" {
|
||||
return myVal == "", nil
|
||||
|
||||
operator := strings.ToLower(myFunc.Operator)
|
||||
switch operator {
|
||||
case "is null":
|
||||
return getMyVal() == "", nil
|
||||
case "is not null":
|
||||
return getMyVal() != "", nil
|
||||
default:
|
||||
return false, ErrUnsupportedSQLOperation
|
||||
}
|
||||
// case to evaluate is not null
|
||||
if strings.ToLower(operator) == "is not null" {
|
||||
return myVal != "", nil
|
||||
}
|
||||
return false, ErrUnsupportedSQLOperation
|
||||
}
|
||||
|
||||
// supportedString is a function that checks whether the function is a supported
|
||||
|
||||
@@ -23,6 +23,8 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/tidwall/sjson"
|
||||
|
||||
"github.com/minio/minio/pkg/ioutil"
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
)
|
||||
@@ -96,7 +98,6 @@ func New(opts *Options) (format.Select, error) {
|
||||
reader.stats.BytesScanned = opts.StreamSize
|
||||
reader.stats.BytesProcessed = 0
|
||||
reader.stats.BytesReturned = 0
|
||||
|
||||
reader.firstRow = nil
|
||||
|
||||
reader.reader.FieldsPerRecord = -1
|
||||
@@ -120,7 +121,14 @@ func New(opts *Options) (format.Select, error) {
|
||||
|
||||
// Replace the spaces in columnnames with underscores
|
||||
func cleanHeader(columns []string) []string {
|
||||
for i := 0; i < len(columns); i++ {
|
||||
for i := range columns {
|
||||
// Even if header name is specified, some CSV's
|
||||
// might have column header names might be empty
|
||||
// and non-empty. In such a scenario we prepare
|
||||
// indexed value.
|
||||
if columns[i] == "" {
|
||||
columns[i] = "_" + strconv.Itoa(i)
|
||||
}
|
||||
columns[i] = strings.Replace(columns[i], " ", "_", -1)
|
||||
}
|
||||
return columns
|
||||
@@ -137,15 +145,14 @@ func (reader *cinput) readHeader() error {
|
||||
}
|
||||
reader.header = cleanHeader(reader.firstRow)
|
||||
reader.firstRow = nil
|
||||
reader.minOutputLength = len(reader.header)
|
||||
} else {
|
||||
reader.firstRow, readErr = reader.reader.Read()
|
||||
reader.header = make([]string, len(reader.firstRow))
|
||||
for i := 0; i < reader.minOutputLength; i++ {
|
||||
reader.header[i] = strconv.Itoa(i)
|
||||
for i := range reader.firstRow {
|
||||
reader.header[i] = "_" + strconv.Itoa(i)
|
||||
}
|
||||
|
||||
}
|
||||
reader.minOutputLength = len(reader.header)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -155,33 +162,24 @@ func (reader *cinput) Progress() bool {
|
||||
}
|
||||
|
||||
// UpdateBytesProcessed - populates the bytes Processed
|
||||
func (reader *cinput) UpdateBytesProcessed(record map[string]interface{}) {
|
||||
// Convert map to slice of values.
|
||||
values := []string{}
|
||||
for _, value := range record {
|
||||
values = append(values, value.(string))
|
||||
}
|
||||
|
||||
reader.stats.BytesProcessed += int64(len(values))
|
||||
func (reader *cinput) UpdateBytesProcessed(size int64) {
|
||||
reader.stats.BytesProcessed += size
|
||||
|
||||
}
|
||||
|
||||
// Read the file and returns map[string]interface{}
|
||||
func (reader *cinput) Read() (map[string]interface{}, error) {
|
||||
record := make(map[string]interface{})
|
||||
// Read returns byte sequence
|
||||
func (reader *cinput) Read() ([]byte, error) {
|
||||
dec := reader.readRecord()
|
||||
if dec != nil {
|
||||
if reader.options.HasHeader {
|
||||
columns := reader.header
|
||||
for i, value := range dec {
|
||||
record[columns[i]] = value
|
||||
}
|
||||
} else {
|
||||
for i, value := range dec {
|
||||
record["_"+strconv.Itoa(i)] = value
|
||||
var data []byte
|
||||
var err error
|
||||
for i, value := range dec {
|
||||
data, err = sjson.SetBytes(data, reader.header[i], value)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return record, nil
|
||||
return data, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -17,11 +17,10 @@
|
||||
package json
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"bufio"
|
||||
"encoding/xml"
|
||||
"io"
|
||||
|
||||
jsoniter "github.com/json-iterator/go"
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
)
|
||||
|
||||
@@ -57,7 +56,7 @@ type Options struct {
|
||||
// jinput represents a record producing input from a formatted file or pipe.
|
||||
type jinput struct {
|
||||
options *Options
|
||||
reader *jsoniter.Decoder
|
||||
reader *bufio.Reader
|
||||
firstRow []string
|
||||
header []string
|
||||
minOutputLength int
|
||||
@@ -75,7 +74,7 @@ type jinput struct {
|
||||
func New(opts *Options) (format.Select, error) {
|
||||
reader := &jinput{
|
||||
options: opts,
|
||||
reader: jsoniter.NewDecoder(opts.ReadFrom),
|
||||
reader: bufio.NewReader(opts.ReadFrom),
|
||||
}
|
||||
reader.stats.BytesScanned = opts.StreamSize
|
||||
reader.stats.BytesProcessed = 0
|
||||
@@ -90,26 +89,21 @@ func (reader *jinput) Progress() bool {
|
||||
}
|
||||
|
||||
// UpdateBytesProcessed - populates the bytes Processed
|
||||
func (reader *jinput) UpdateBytesProcessed(record map[string]interface{}) {
|
||||
out, _ := json.Marshal(record)
|
||||
reader.stats.BytesProcessed += int64(len(out))
|
||||
func (reader *jinput) UpdateBytesProcessed(size int64) {
|
||||
reader.stats.BytesProcessed += size
|
||||
}
|
||||
|
||||
// Read the file and returns map[string]interface{}
|
||||
func (reader *jinput) Read() (map[string]interface{}, error) {
|
||||
dec := reader.reader
|
||||
var record interface{}
|
||||
for {
|
||||
err := dec.Decode(&record)
|
||||
// Read the file and returns
|
||||
func (reader *jinput) Read() ([]byte, error) {
|
||||
data, err := reader.reader.ReadBytes('\n')
|
||||
if err != nil {
|
||||
if err == io.EOF || err == io.ErrClosedPipe {
|
||||
break
|
||||
err = nil
|
||||
} else {
|
||||
err = format.ErrJSONParsingError
|
||||
}
|
||||
if err != nil {
|
||||
return nil, format.ErrJSONParsingError
|
||||
}
|
||||
return record.(map[string]interface{}), nil
|
||||
}
|
||||
return nil, nil
|
||||
return data, err
|
||||
}
|
||||
|
||||
// OutputFieldDelimiter - returns the delimiter specified in input request
|
||||
|
||||
@@ -22,11 +22,11 @@ import "encoding/xml"
|
||||
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
|
||||
type Select interface {
|
||||
Type() Type
|
||||
Read() (map[string]interface{}, error)
|
||||
Read() ([]byte, error)
|
||||
Header() []string
|
||||
HasHeader() bool
|
||||
OutputFieldDelimiter() string
|
||||
UpdateBytesProcessed(record map[string]interface{})
|
||||
UpdateBytesProcessed(int64)
|
||||
Expression() string
|
||||
UpdateBytesReturned(int64)
|
||||
CreateStatXML() (string, error)
|
||||
|
||||
@@ -17,10 +17,8 @@
|
||||
package s3select
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@@ -32,64 +30,50 @@ import (
|
||||
// MaxExpressionLength - 256KiB
|
||||
const MaxExpressionLength = 256 * 1024
|
||||
|
||||
// matchesMyWhereClause takes map[string]interfaces{} , process the where clause and returns true if the row suffices
|
||||
func matchesMyWhereClause(record map[string]interface{}, alias string, whereClause interface{}) (bool, error) {
|
||||
// matchesMyWhereClause takes []byte, process the where clause and returns true if the row suffices
|
||||
func matchesMyWhereClause(record []byte, alias string, whereClause sqlparser.Expr) (bool, error) {
|
||||
var conversionColumn string
|
||||
var operator string
|
||||
var operand interface{}
|
||||
var operand gjson.Result
|
||||
if fmt.Sprintf("%v", whereClause) == "false" {
|
||||
return false, nil
|
||||
}
|
||||
out, err := json.Marshal(record)
|
||||
if err != nil {
|
||||
return false, ErrExternalEvalException
|
||||
}
|
||||
switch expr := whereClause.(type) {
|
||||
case *sqlparser.IsExpr:
|
||||
return evaluateIsExpr(expr, string(out), alias)
|
||||
return evaluateIsExpr(expr, record, alias)
|
||||
case *sqlparser.RangeCond:
|
||||
operator = expr.Operator
|
||||
if operator != "between" && operator != "not between" {
|
||||
return false, ErrUnsupportedSQLOperation
|
||||
}
|
||||
if operator == "not between" {
|
||||
result, err := evaluateBetween(expr, alias, string(out))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return !result, nil
|
||||
}
|
||||
result, err := evaluateBetween(expr, alias, string(out))
|
||||
result, err := evaluateBetween(expr, alias, record)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if operator == "not between" {
|
||||
return !result, nil
|
||||
}
|
||||
return result, nil
|
||||
case *sqlparser.ComparisonExpr:
|
||||
operator = expr.Operator
|
||||
switch right := expr.Right.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
operand = evaluateFuncExpr(right, "", string(out))
|
||||
operand = gjson.Parse(evaluateFuncExpr(right, "", record))
|
||||
case *sqlparser.SQLVal:
|
||||
var err error
|
||||
operand, err = evaluateParserType(right)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
operand = gjson.ParseBytes(right.Val)
|
||||
}
|
||||
var myVal string
|
||||
myVal = ""
|
||||
switch left := expr.Left.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
myVal = evaluateFuncExpr(left, "", string(out))
|
||||
myVal = evaluateFuncExpr(left, "", record)
|
||||
conversionColumn = ""
|
||||
case *sqlparser.ColName:
|
||||
conversionColumn = left.Name.CompliantName()
|
||||
}
|
||||
|
||||
if myVal != "" {
|
||||
return evaluateOperator(myVal, operator, operand)
|
||||
return evaluateOperator(gjson.Parse(myVal), operator, operand)
|
||||
}
|
||||
return evaluateOperator(jsonValue(conversionColumn, string(out)), operator, operand)
|
||||
return evaluateOperator(gjson.GetBytes(record, conversionColumn), operator, operand)
|
||||
case *sqlparser.AndExpr:
|
||||
var leftVal bool
|
||||
var rightVal bool
|
||||
@@ -127,58 +111,50 @@ func matchesMyWhereClause(record map[string]interface{}, alias string, whereClau
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func applyStrFunc(rawArg string, funcName string) string {
|
||||
func applyStrFunc(rawArg gjson.Result, funcName string) string {
|
||||
switch strings.ToUpper(funcName) {
|
||||
case "TRIM":
|
||||
// parser has an issue which does not allow it to support Trim with other
|
||||
// arguments
|
||||
return strings.Trim(rawArg, " ")
|
||||
// parser has an issue which does not allow it to support
|
||||
// Trim with other arguments
|
||||
return strings.Trim(rawArg.String(), " ")
|
||||
case "SUBSTRING":
|
||||
// TODO parser has an issue which does not support substring
|
||||
return rawArg
|
||||
// TODO: parser has an issue which does not support substring
|
||||
return rawArg.String()
|
||||
case "CHAR_LENGTH":
|
||||
return strconv.Itoa(len(rawArg))
|
||||
return strconv.Itoa(len(rawArg.String()))
|
||||
case "CHARACTER_LENGTH":
|
||||
return strconv.Itoa(len(rawArg))
|
||||
return strconv.Itoa(len(rawArg.String()))
|
||||
case "LOWER":
|
||||
return strings.ToLower(rawArg)
|
||||
return strings.ToLower(rawArg.String())
|
||||
case "UPPER":
|
||||
return strings.ToUpper(rawArg)
|
||||
return strings.ToUpper(rawArg.String())
|
||||
}
|
||||
return rawArg
|
||||
return rawArg.String()
|
||||
|
||||
}
|
||||
|
||||
// evaluateBetween is a function which evaluates a Between Clause.
|
||||
func evaluateBetween(betweenExpr *sqlparser.RangeCond, alias string, record string) (bool, error) {
|
||||
var colToVal interface{}
|
||||
var colFromVal interface{}
|
||||
func evaluateBetween(betweenExpr *sqlparser.RangeCond, alias string, record []byte) (bool, error) {
|
||||
var colToVal gjson.Result
|
||||
var colFromVal gjson.Result
|
||||
var conversionColumn string
|
||||
var funcName string
|
||||
switch colTo := betweenExpr.To.(type) {
|
||||
case sqlparser.Expr:
|
||||
switch colToMyVal := colTo.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
colToVal = stringOps(colToMyVal, record, "")
|
||||
colToVal = gjson.Parse(stringOps(colToMyVal, record, ""))
|
||||
case *sqlparser.SQLVal:
|
||||
var err error
|
||||
colToVal, err = evaluateParserType(colToMyVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
colToVal = gjson.ParseBytes(colToMyVal.Val)
|
||||
}
|
||||
}
|
||||
switch colFrom := betweenExpr.From.(type) {
|
||||
case sqlparser.Expr:
|
||||
switch colFromMyVal := colFrom.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
colFromVal = stringOps(colFromMyVal, record, "")
|
||||
colFromVal = gjson.Parse(stringOps(colFromMyVal, record, ""))
|
||||
case *sqlparser.SQLVal:
|
||||
var err error
|
||||
colFromVal, err = evaluateParserType(colFromMyVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
colFromVal = gjson.ParseBytes(colFromMyVal.Val)
|
||||
}
|
||||
}
|
||||
var myFuncVal string
|
||||
@@ -189,7 +165,7 @@ func evaluateBetween(betweenExpr *sqlparser.RangeCond, alias string, record stri
|
||||
case *sqlparser.ColName:
|
||||
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
|
||||
}
|
||||
toGreater, err := evaluateOperator(fmt.Sprintf("%v", colToVal), ">", colFromVal)
|
||||
toGreater, err := evaluateOperator(colToVal, ">", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
@@ -199,113 +175,87 @@ func evaluateBetween(betweenExpr *sqlparser.RangeCond, alias string, record stri
|
||||
return evalBetweenLess(conversionColumn, record, funcName, colFromVal, colToVal, myFuncVal)
|
||||
}
|
||||
|
||||
// evalBetweenGreater is a function which evaluates the between given that the
|
||||
// TO is > than the FROM.
|
||||
func evalBetweenGreater(conversionColumn string, record string, funcName string, colFromVal interface{}, colToVal interface{}, myColVal string) (bool, error) {
|
||||
func evalBetween(conversionColumn string, record []byte, funcName string, colFromVal gjson.Result, colToVal gjson.Result, myColVal string, operator string) (bool, error) {
|
||||
if format.IsInt(conversionColumn) {
|
||||
myVal, err := evaluateOperator(jsonValue("_"+conversionColumn, record), ">=", colFromVal)
|
||||
myVal, err := evaluateOperator(gjson.GetBytes(record, "_"+conversionColumn), operator, colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(jsonValue("_"+conversionColumn, record)))
|
||||
myOtherVal, err = evaluateOperator(colToVal, operator, gjson.GetBytes(record, "_"+conversionColumn))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
if myColVal != "" {
|
||||
myVal, err := evaluateOperator(myColVal, ">=", colFromVal)
|
||||
myVal, err := evaluateOperator(gjson.Parse(myColVal), operator, colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(myColVal))
|
||||
myOtherVal, err = evaluateOperator(colToVal, operator, gjson.Parse(myColVal))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
myVal, err := evaluateOperator(jsonValue(conversionColumn, record), ">=", colFromVal)
|
||||
myVal, err := evaluateOperator(gjson.GetBytes(record, conversionColumn), operator, colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(jsonValue(conversionColumn, record)))
|
||||
myOtherVal, err = evaluateOperator(colToVal, operator, gjson.GetBytes(record, conversionColumn))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
|
||||
// evalBetweenGreater is a function which evaluates the between given that the
|
||||
// TO is > than the FROM.
|
||||
func evalBetweenGreater(conversionColumn string, record []byte, funcName string, colFromVal gjson.Result, colToVal gjson.Result, myColVal string) (bool, error) {
|
||||
return evalBetween(conversionColumn, record, funcName, colFromVal, colToVal, myColVal, ">=")
|
||||
}
|
||||
|
||||
// evalBetweenLess is a function which evaluates the between given that the
|
||||
// FROM is > than the TO.
|
||||
func evalBetweenLess(conversionColumn string, record string, funcName string, colFromVal interface{}, colToVal interface{}, myColVal string) (bool, error) {
|
||||
if format.IsInt(conversionColumn) {
|
||||
// Subtract 1 out because the index starts at 1 for Amazon instead of 0.
|
||||
myVal, err := evaluateOperator(jsonValue("_"+conversionColumn, record), "<=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(jsonValue("_"+conversionColumn, record)))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
if myColVal != "" {
|
||||
myVal, err := evaluateOperator(myColVal, "<=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(myColVal))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
myVal, err := evaluateOperator(jsonValue(conversionColumn, record), "<=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(jsonValue(conversionColumn, record)))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
func evalBetweenLess(conversionColumn string, record []byte, funcName string, colFromVal gjson.Result, colToVal gjson.Result, myColVal string) (bool, error) {
|
||||
return evalBetween(conversionColumn, record, funcName, colFromVal, colToVal, myColVal, "<=")
|
||||
}
|
||||
|
||||
// This is a really important function it actually evaluates the boolean
|
||||
// statement and therefore actually returns a bool, it functions as the lowest
|
||||
// level of the state machine.
|
||||
func evaluateOperator(myTblVal string, operator string, operand interface{}) (bool, error) {
|
||||
func evaluateOperator(myTblVal gjson.Result, operator string, operand gjson.Result) (bool, error) {
|
||||
if err := checkValidOperator(operator); err != nil {
|
||||
return false, err
|
||||
}
|
||||
myRecordVal := checkStringType(myTblVal)
|
||||
myVal := reflect.ValueOf(myRecordVal)
|
||||
myOp := reflect.ValueOf(operand)
|
||||
|
||||
switch {
|
||||
case myVal.Kind() == reflect.String && myOp.Kind() == reflect.String:
|
||||
return stringEval(myVal.String(), operator, myOp.String())
|
||||
case myVal.Kind() == reflect.Float64 && myOp.Kind() == reflect.Float64:
|
||||
return floatEval(myVal.Float(), operator, myOp.Float())
|
||||
case myVal.Kind() == reflect.Int && myOp.Kind() == reflect.Int:
|
||||
return intEval(myVal.Int(), operator, myOp.Int())
|
||||
case myVal.Kind() == reflect.Int && myOp.Kind() == reflect.String:
|
||||
stringVs := strconv.Itoa(int(myVal.Int()))
|
||||
return stringEval(stringVs, operator, myOp.String())
|
||||
case myVal.Kind() == reflect.Float64 && myOp.Kind() == reflect.String:
|
||||
stringVs := strconv.FormatFloat(myVal.Float(), 'f', 6, 64)
|
||||
return stringEval(stringVs, operator, myOp.String())
|
||||
case myVal.Kind() != myOp.Kind():
|
||||
if !myTblVal.Exists() {
|
||||
return false, nil
|
||||
}
|
||||
return false, ErrUnsupportedSyntax
|
||||
switch {
|
||||
case operand.Type == gjson.String || operand.Type == gjson.Null:
|
||||
return stringEval(myTblVal.String(), operator, operand.String())
|
||||
case operand.Type == gjson.Number:
|
||||
opInt := format.IsInt(operand.Raw)
|
||||
tblValInt := format.IsInt(strings.Trim(myTblVal.Raw, "\""))
|
||||
if opInt && tblValInt {
|
||||
return intEval(int64(myTblVal.Float()), operator, operand.Int())
|
||||
}
|
||||
if !opInt && !tblValInt {
|
||||
return floatEval(myTblVal.Float(), operator, operand.Float())
|
||||
}
|
||||
switch operator {
|
||||
case "!=":
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
case myTblVal.Type != operand.Type:
|
||||
return false, nil
|
||||
default:
|
||||
return false, ErrUnsupportedSyntax
|
||||
}
|
||||
}
|
||||
|
||||
// checkValidOperator ensures that the current operator is supported
|
||||
@@ -319,19 +269,6 @@ func checkValidOperator(operator string) error {
|
||||
return ErrParseUnknownOperator
|
||||
}
|
||||
|
||||
// checkStringType converts the value from the csv to the appropriate one.
|
||||
func checkStringType(tblVal string) interface{} {
|
||||
intVal, err := strconv.Atoi(tblVal)
|
||||
if err == nil {
|
||||
return intVal
|
||||
}
|
||||
floatVal, err := strconv.ParseFloat(tblVal, 64)
|
||||
if err == nil {
|
||||
return floatVal
|
||||
}
|
||||
return tblVal
|
||||
}
|
||||
|
||||
// stringEval is for evaluating the state of string comparison.
|
||||
func stringEval(myRecordVal string, operator string, myOperand string) (bool, error) {
|
||||
switch operator {
|
||||
@@ -586,48 +523,17 @@ func aggFuncToStr(aggVals []float64, f format.Select) string {
|
||||
}
|
||||
|
||||
// checkForDuplicates ensures we do not have an ambigious column name.
|
||||
func checkForDuplicates(columns []string, columnsMap map[string]int, hasDuplicates map[string]bool, lowercaseColumnsMap map[string]int) error {
|
||||
for i := 0; i < len(columns); i++ {
|
||||
columns[i] = strings.Replace(columns[i], " ", "_", len(columns[i]))
|
||||
func checkForDuplicates(columns []string, columnsMap map[string]int) error {
|
||||
for i, column := range columns {
|
||||
columns[i] = strings.Replace(column, " ", "_", len(column))
|
||||
if _, exist := columnsMap[columns[i]]; exist {
|
||||
return ErrAmbiguousFieldName
|
||||
}
|
||||
columnsMap[columns[i]] = i
|
||||
// This checks that if a key has already been put into the map, that we're
|
||||
// setting its appropriate value in has duplicates to be true.
|
||||
if _, exist := lowercaseColumnsMap[strings.ToLower(columns[i])]; exist {
|
||||
hasDuplicates[strings.ToLower(columns[i])] = true
|
||||
} else {
|
||||
lowercaseColumnsMap[strings.ToLower(columns[i])] = i
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// evaluateParserType is a function that takes a SQL value and returns it as an
|
||||
// interface converted into the appropriate value.
|
||||
func evaluateParserType(col *sqlparser.SQLVal) (interface{}, error) {
|
||||
colDataType := col.Type
|
||||
var val interface{}
|
||||
switch colDataType {
|
||||
case 0:
|
||||
val = string(col.Val)
|
||||
case 1:
|
||||
intVersion, isInt := strconv.Atoi(string(col.Val))
|
||||
if isInt != nil {
|
||||
return nil, ErrIntegerOverflow
|
||||
}
|
||||
val = intVersion
|
||||
case 2:
|
||||
floatVersion, isFloat := strconv.ParseFloat(string(col.Val), 64)
|
||||
if isFloat != nil {
|
||||
return nil, ErrIntegerOverflow
|
||||
}
|
||||
val = floatVersion
|
||||
}
|
||||
return val, nil
|
||||
}
|
||||
|
||||
// parseErrs is the function which handles all the errors that could occur
|
||||
// through use of function arguments such as column names in NULLIF
|
||||
func parseErrs(columnNames []string, whereClause interface{}, alias string, myFuncs SelectFuncs, f format.Select) error {
|
||||
@@ -655,10 +561,3 @@ func parseErrs(columnNames []string, whereClause interface{}, alias string, myFu
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// It return the value corresponding to the tag in Json .
|
||||
// Input is the Key and row is the JSON string
|
||||
func jsonValue(input string, row string) string {
|
||||
value := gjson.Get(row, input)
|
||||
return value.String()
|
||||
}
|
||||
|
||||
@@ -19,17 +19,17 @@ package s3select
|
||||
import (
|
||||
"bytes"
|
||||
"compress/bzip2"
|
||||
"compress/gzip"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
humanize "github.com/dustin/go-humanize"
|
||||
"github.com/klauspost/pgzip"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
"github.com/minio/minio/pkg/s3select/format/csv"
|
||||
"github.com/minio/minio/pkg/s3select/format/json"
|
||||
|
||||
humanize "github.com/dustin/go-humanize"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -40,18 +40,6 @@ const (
|
||||
continuationTime time.Duration = 5 * time.Second
|
||||
)
|
||||
|
||||
// ParseSelectTokens tokenizes the select query into required Columns, Alias, limit value
|
||||
// where clause, aggregate functions, myFunctions, error.
|
||||
type ParseSelectTokens struct {
|
||||
reqCols []string
|
||||
alias string
|
||||
myLimit int64
|
||||
whereClause interface{}
|
||||
aggFunctionNames []string
|
||||
myFuncs *SelectFuncs
|
||||
myErr error
|
||||
}
|
||||
|
||||
// Row is a Struct for keeping track of key aspects of a row.
|
||||
type Row struct {
|
||||
record string
|
||||
@@ -60,7 +48,7 @@ type Row struct {
|
||||
|
||||
// This function replaces "",'' with `` for the select parser
|
||||
func cleanExpr(expr string) string {
|
||||
r := strings.NewReplacer("\"", "`", "'", "`")
|
||||
r := strings.NewReplacer("\"", "`")
|
||||
return r.Replace(expr)
|
||||
}
|
||||
|
||||
@@ -68,7 +56,7 @@ func cleanExpr(expr string) string {
|
||||
func New(reader io.Reader, size int64, req ObjectSelectRequest) (s3s format.Select, err error) {
|
||||
switch req.InputSerialization.CompressionType {
|
||||
case SelectCompressionGZIP:
|
||||
if reader, err = gzip.NewReader(reader); err != nil {
|
||||
if reader, err = pgzip.NewReader(reader); err != nil {
|
||||
return nil, format.ErrTruncatedInput
|
||||
}
|
||||
case SelectCompressionBZIP:
|
||||
@@ -119,7 +107,7 @@ func New(reader io.Reader, size int64, req ObjectSelectRequest) (s3s format.Sele
|
||||
// response writer in a streaming fashion so that the client can actively use
|
||||
// the results before the query is finally finished executing. The
|
||||
func Execute(writer io.Writer, f format.Select) error {
|
||||
myRow := make(chan Row, 1000)
|
||||
rowCh := make(chan Row)
|
||||
curBuf := bytes.NewBuffer(make([]byte, humanize.MiByte))
|
||||
curBuf.Reset()
|
||||
progressTicker := time.NewTicker(progressTime)
|
||||
@@ -127,10 +115,10 @@ func Execute(writer io.Writer, f format.Select) error {
|
||||
defer progressTicker.Stop()
|
||||
defer continuationTimer.Stop()
|
||||
|
||||
go runSelectParser(f, myRow)
|
||||
go runSelectParser(f, rowCh)
|
||||
for {
|
||||
select {
|
||||
case row, ok := <-myRow:
|
||||
case row, ok := <-rowCh:
|
||||
if ok && row.err != nil {
|
||||
_, err := writeErrorMessage(row.err, curBuf).WriteTo(writer)
|
||||
flusher, okFlush := writer.(http.Flusher)
|
||||
@@ -141,7 +129,7 @@ func Execute(writer io.Writer, f format.Select) error {
|
||||
return err
|
||||
}
|
||||
curBuf.Reset()
|
||||
close(myRow)
|
||||
close(rowCh)
|
||||
return nil
|
||||
} else if ok {
|
||||
_, err := writeRecordMessage(row.record, curBuf).WriteTo(writer)
|
||||
|
||||
@@ -122,12 +122,12 @@ func writeHeaderSize(headerLength int) []byte {
|
||||
}
|
||||
|
||||
// writeCRC writes the CRC for both the prelude and and the end of the protocol.
|
||||
func writeCRC(myBuffer []byte) []byte {
|
||||
func writeCRC(buffer []byte) []byte {
|
||||
// Calculate the CRC here:
|
||||
myCRC := make([]byte, 4)
|
||||
cksum := crc32.ChecksumIEEE(myBuffer)
|
||||
binary.BigEndian.PutUint32(myCRC, cksum)
|
||||
return myCRC
|
||||
crc := make([]byte, 4)
|
||||
cksum := crc32.ChecksumIEEE(buffer)
|
||||
binary.BigEndian.PutUint32(crc, cksum)
|
||||
return crc
|
||||
}
|
||||
|
||||
// writePayload writes the Payload for those protocols which the Payload is
|
||||
|
||||
@@ -17,13 +17,13 @@
|
||||
package s3select
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"math"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/xwb1989/sqlparser"
|
||||
)
|
||||
|
||||
@@ -36,29 +36,28 @@ type SelectFuncs struct {
|
||||
|
||||
// RunSqlParser allows us to easily bundle all the functions from above and run
|
||||
// them in the appropriate order.
|
||||
func runSelectParser(f format.Select, myRow chan Row) {
|
||||
reqCols, alias, myLimit, whereClause, aggFunctionNames, myFuncs, myErr := ParseSelect(f)
|
||||
if myErr != nil {
|
||||
myRow <- Row{
|
||||
err: myErr,
|
||||
func runSelectParser(f format.Select, rowCh chan Row) {
|
||||
reqCols, alias, limit, wc, aggFunctionNames, fns, err := ParseSelect(f)
|
||||
if err != nil {
|
||||
rowCh <- Row{
|
||||
err: err,
|
||||
}
|
||||
return
|
||||
}
|
||||
processSelectReq(reqCols, alias, whereClause, myLimit, aggFunctionNames, myRow, myFuncs, f)
|
||||
|
||||
processSelectReq(reqCols, alias, wc, limit, aggFunctionNames, rowCh, fns, f)
|
||||
}
|
||||
|
||||
// ParseSelect parses the SELECT expression, and effectively tokenizes it into
|
||||
// its separate parts. It returns the requested column names,alias,limit of
|
||||
// records, and the where clause.
|
||||
func ParseSelect(f format.Select) ([]string, string, int64, interface{}, []string, SelectFuncs, error) {
|
||||
func ParseSelect(f format.Select) ([]string, string, int64, sqlparser.Expr, []string, SelectFuncs, error) {
|
||||
var sFuncs = SelectFuncs{}
|
||||
var whereClause interface{}
|
||||
var whereClause sqlparser.Expr
|
||||
var alias string
|
||||
var limit int64
|
||||
|
||||
stmt, err := sqlparser.Parse(f.Expression())
|
||||
// TODO Maybe can parse their errors a bit to return some more of the s3 errors
|
||||
// TODO: Maybe can parse their errors a bit to return some more of the s3 errors
|
||||
if err != nil {
|
||||
return nil, "", 0, nil, nil, sFuncs, ErrLexerInvalidChar
|
||||
}
|
||||
@@ -66,73 +65,64 @@ func ParseSelect(f format.Select) ([]string, string, int64, interface{}, []strin
|
||||
switch stmt := stmt.(type) {
|
||||
case *sqlparser.Select:
|
||||
// evaluates the where clause
|
||||
functionNames := make([]string, len(stmt.SelectExprs))
|
||||
fnNames := make([]string, len(stmt.SelectExprs))
|
||||
columnNames := make([]string, len(stmt.SelectExprs))
|
||||
|
||||
if stmt.Where != nil {
|
||||
switch expr := stmt.Where.Expr.(type) {
|
||||
default:
|
||||
whereClause = expr
|
||||
case *sqlparser.ComparisonExpr:
|
||||
whereClause = expr
|
||||
}
|
||||
whereClause = stmt.Where.Expr
|
||||
}
|
||||
if stmt.SelectExprs != nil {
|
||||
for i := 0; i < len(stmt.SelectExprs); i++ {
|
||||
switch expr := stmt.SelectExprs[i].(type) {
|
||||
case *sqlparser.StarExpr:
|
||||
columnNames[0] = "*"
|
||||
case *sqlparser.AliasedExpr:
|
||||
switch smallerexpr := expr.Expr.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
if smallerexpr.IsAggregate() {
|
||||
functionNames[i] = smallerexpr.Name.CompliantName()
|
||||
// Will return function name
|
||||
// Case to deal with if we have functions and not an asterix
|
||||
switch tempagg := smallerexpr.Exprs[0].(type) {
|
||||
case *sqlparser.StarExpr:
|
||||
columnNames[0] = "*"
|
||||
if smallerexpr.Name.CompliantName() != "count" {
|
||||
return nil, "", 0, nil, nil, sFuncs, ErrParseUnsupportedCallWithStar
|
||||
}
|
||||
case *sqlparser.AliasedExpr:
|
||||
switch col := tempagg.Expr.(type) {
|
||||
case *sqlparser.BinaryExpr:
|
||||
return nil, "", 0, nil, nil, sFuncs, ErrParseNonUnaryAgregateFunctionCall
|
||||
case *sqlparser.ColName:
|
||||
columnNames[i] = col.Name.CompliantName()
|
||||
}
|
||||
for i, sexpr := range stmt.SelectExprs {
|
||||
switch expr := sexpr.(type) {
|
||||
case *sqlparser.StarExpr:
|
||||
columnNames[0] = "*"
|
||||
case *sqlparser.AliasedExpr:
|
||||
switch smallerexpr := expr.Expr.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
if smallerexpr.IsAggregate() {
|
||||
fnNames[i] = smallerexpr.Name.CompliantName()
|
||||
// Will return function name
|
||||
// Case to deal with if we have functions and not an asterix
|
||||
switch tempagg := smallerexpr.Exprs[0].(type) {
|
||||
case *sqlparser.StarExpr:
|
||||
columnNames[0] = "*"
|
||||
if smallerexpr.Name.CompliantName() != "count" {
|
||||
return nil, "", 0, nil, nil, sFuncs, ErrParseUnsupportedCallWithStar
|
||||
}
|
||||
// Case to deal with if COALESCE was used..
|
||||
} else if supportedFunc(smallerexpr.Name.CompliantName()) {
|
||||
if sFuncs.funcExpr == nil {
|
||||
sFuncs.funcExpr = make([]*sqlparser.FuncExpr, len(stmt.SelectExprs))
|
||||
sFuncs.index = make([]int, len(stmt.SelectExprs))
|
||||
case *sqlparser.AliasedExpr:
|
||||
switch col := tempagg.Expr.(type) {
|
||||
case *sqlparser.BinaryExpr:
|
||||
return nil, "", 0, nil, nil, sFuncs, ErrParseNonUnaryAgregateFunctionCall
|
||||
case *sqlparser.ColName:
|
||||
columnNames[i] = col.Name.CompliantName()
|
||||
}
|
||||
sFuncs.funcExpr[i] = smallerexpr
|
||||
sFuncs.index[i] = i
|
||||
} else {
|
||||
return nil, "", 0, nil, nil, sFuncs, ErrUnsupportedSQLOperation
|
||||
}
|
||||
case *sqlparser.ColName:
|
||||
columnNames[i] = smallerexpr.Name.CompliantName()
|
||||
// Case to deal with if COALESCE was used..
|
||||
} else if supportedFunc(smallerexpr.Name.CompliantName()) {
|
||||
if sFuncs.funcExpr == nil {
|
||||
sFuncs.funcExpr = make([]*sqlparser.FuncExpr, len(stmt.SelectExprs))
|
||||
sFuncs.index = make([]int, len(stmt.SelectExprs))
|
||||
}
|
||||
sFuncs.funcExpr[i] = smallerexpr
|
||||
sFuncs.index[i] = i
|
||||
} else {
|
||||
return nil, "", 0, nil, nil, sFuncs, ErrUnsupportedSQLOperation
|
||||
}
|
||||
case *sqlparser.ColName:
|
||||
columnNames[i] = smallerexpr.Name.CompliantName()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This code retrieves the alias and makes sure it is set to the correct
|
||||
// value, if not it sets it to the tablename
|
||||
if (stmt.From) != nil {
|
||||
for i := 0; i < len(stmt.From); i++ {
|
||||
switch smallerexpr := stmt.From[i].(type) {
|
||||
case *sqlparser.JoinTableExpr:
|
||||
return nil, "", 0, nil, nil, sFuncs, ErrParseMalformedJoin
|
||||
case *sqlparser.AliasedTableExpr:
|
||||
alias = smallerexpr.As.CompliantName()
|
||||
if alias == "" {
|
||||
alias = sqlparser.GetTableName(smallerexpr.Expr).CompliantName()
|
||||
}
|
||||
for _, fexpr := range stmt.From {
|
||||
switch smallerexpr := fexpr.(type) {
|
||||
case *sqlparser.JoinTableExpr:
|
||||
return nil, "", 0, nil, nil, sFuncs, ErrParseMalformedJoin
|
||||
case *sqlparser.AliasedTableExpr:
|
||||
alias = smallerexpr.As.CompliantName()
|
||||
if alias == "" {
|
||||
alias = sqlparser.GetTableName(smallerexpr.Expr).CompliantName()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -153,143 +143,170 @@ func ParseSelect(f format.Select) ([]string, string, int64, interface{}, []strin
|
||||
if err := parseErrs(columnNames, whereClause, alias, sFuncs, f); err != nil {
|
||||
return nil, "", 0, nil, nil, sFuncs, err
|
||||
}
|
||||
return columnNames, alias, limit, whereClause, functionNames, sFuncs, nil
|
||||
return columnNames, alias, limit, whereClause, fnNames, sFuncs, nil
|
||||
}
|
||||
return nil, "", 0, nil, nil, sFuncs, nil
|
||||
}
|
||||
|
||||
type columnKv struct {
|
||||
Key string
|
||||
Value int
|
||||
}
|
||||
|
||||
func columnsIndex(reqColNames []string, f format.Select) ([]columnKv, error) {
|
||||
var (
|
||||
columnsKv []columnKv
|
||||
columnsMap = make(map[string]int)
|
||||
columns = f.Header()
|
||||
)
|
||||
if f.HasHeader() {
|
||||
err := checkForDuplicates(columns, columnsMap)
|
||||
if format.IsInt(reqColNames[0]) {
|
||||
err = ErrMissingHeaders
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for k, v := range columnsMap {
|
||||
columnsKv = append(columnsKv, columnKv{
|
||||
Key: k,
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
for i := range columns {
|
||||
columnsKv = append(columnsKv, columnKv{
|
||||
Key: "_" + strconv.Itoa(i),
|
||||
Value: i,
|
||||
})
|
||||
}
|
||||
}
|
||||
sort.Slice(columnsKv, func(i, j int) bool {
|
||||
return columnsKv[i].Value < columnsKv[j].Value
|
||||
})
|
||||
return columnsKv, nil
|
||||
}
|
||||
|
||||
// This is the main function, It goes row by row and for records which validate
|
||||
// the where clause it currently prints the appropriate row given the requested
|
||||
// columns.
|
||||
func processSelectReq(reqColNames []string, alias string, whereClause interface{}, limitOfRecords int64, functionNames []string, myRow chan Row, myFunc SelectFuncs, f format.Select) {
|
||||
func processSelectReq(reqColNames []string, alias string, wc sqlparser.Expr, lrecords int64, fnNames []string, rowCh chan Row, fn SelectFuncs, f format.Select) {
|
||||
counter := -1
|
||||
var columns []string
|
||||
filtrCount := 0
|
||||
functionFlag := false
|
||||
// My values is used to store our aggregation values if we need to store them.
|
||||
myAggVals := make([]float64, len(reqColNames))
|
||||
// LowercasecolumnsMap is used in accordance with hasDuplicates so that we can
|
||||
// raise the error "Ambigious" if a case insensitive column is provided and we
|
||||
// have multiple matches.
|
||||
lowercaseColumnsMap := make(map[string]int)
|
||||
hasDuplicates := make(map[string]bool)
|
||||
// ColumnsMap stores our columns and their index.
|
||||
columnsMap := make(map[string]int)
|
||||
if limitOfRecords == 0 {
|
||||
limitOfRecords = math.MaxInt64
|
||||
|
||||
// Values used to store our aggregation values.
|
||||
aggVals := make([]float64, len(reqColNames))
|
||||
if lrecords == 0 {
|
||||
lrecords = math.MaxInt64
|
||||
}
|
||||
|
||||
columnsKv, err := columnsIndex(reqColNames, f)
|
||||
if err != nil {
|
||||
rowCh <- Row{
|
||||
err: err,
|
||||
}
|
||||
return
|
||||
}
|
||||
var results = make([]string, len(columnsKv))
|
||||
|
||||
for {
|
||||
record, err := f.Read()
|
||||
if err != nil {
|
||||
myRow <- Row{
|
||||
rowCh <- Row{
|
||||
err: err,
|
||||
}
|
||||
return
|
||||
}
|
||||
if record == nil {
|
||||
if functionFlag {
|
||||
myRow <- Row{
|
||||
record: aggFuncToStr(myAggVals, f) + "\n",
|
||||
rowCh <- Row{
|
||||
record: aggFuncToStr(aggVals, f) + "\n",
|
||||
}
|
||||
}
|
||||
close(myRow)
|
||||
close(rowCh)
|
||||
return
|
||||
}
|
||||
|
||||
out, _ := json.Marshal(record)
|
||||
f.UpdateBytesProcessed(record)
|
||||
f.UpdateBytesProcessed(int64(len(record)))
|
||||
|
||||
if counter == -1 && f.HasHeader() && len(f.Header()) > 0 {
|
||||
columns = f.Header()
|
||||
myErr := checkForDuplicates(columns, columnsMap, hasDuplicates, lowercaseColumnsMap)
|
||||
if format.IsInt(reqColNames[0]) {
|
||||
myErr = ErrMissingHeaders
|
||||
}
|
||||
if myErr != nil {
|
||||
myRow <- Row{
|
||||
err: myErr,
|
||||
}
|
||||
return
|
||||
}
|
||||
} else if counter == -1 && len(f.Header()) > 0 {
|
||||
columns = f.Header()
|
||||
for i := 0; i < len(columns); i++ {
|
||||
columnsMap["_"+strconv.Itoa(i)] = i
|
||||
}
|
||||
|
||||
}
|
||||
// Return in case the number of record reaches the LIMIT defined in select query
|
||||
if int64(filtrCount) == limitOfRecords && limitOfRecords != 0 {
|
||||
close(myRow)
|
||||
// Return in case the number of record reaches the LIMIT
|
||||
// defined in select query
|
||||
if int64(filtrCount) == lrecords {
|
||||
close(rowCh)
|
||||
return
|
||||
}
|
||||
|
||||
// The call to the where function clause,ensures that the rows we print match our where clause.
|
||||
condition, myErr := matchesMyWhereClause(record, alias, whereClause)
|
||||
if myErr != nil {
|
||||
myRow <- Row{
|
||||
err: myErr,
|
||||
// The call to the where function clause, ensures that
|
||||
// the rows we print match our where clause.
|
||||
condition, err := matchesMyWhereClause(record, alias, wc)
|
||||
if err != nil {
|
||||
rowCh <- Row{
|
||||
err: err,
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if condition {
|
||||
// if its an asterix we just print everything in the row
|
||||
if reqColNames[0] == "*" && functionNames[0] == "" {
|
||||
var row Row
|
||||
if reqColNames[0] == "*" && fnNames[0] == "" {
|
||||
switch f.Type() {
|
||||
case format.CSV:
|
||||
row = Row{
|
||||
record: strings.Join(convertToSlice(columnsMap, record, string(out)), f.OutputFieldDelimiter()) + "\n",
|
||||
for i, kv := range columnsKv {
|
||||
results[i] = gjson.GetBytes(record, kv.Key).String()
|
||||
}
|
||||
rowCh <- Row{
|
||||
record: strings.Join(results, f.OutputFieldDelimiter()) + "\n",
|
||||
}
|
||||
case format.JSON:
|
||||
row = Row{
|
||||
record: string(out) + "\n",
|
||||
rowCh <- Row{
|
||||
record: string(record) + "\n",
|
||||
}
|
||||
}
|
||||
myRow <- row
|
||||
} else if alias != "" {
|
||||
// This is for dealing with the case of if we have to deal with a
|
||||
// request for a column with an index e.g A_1.
|
||||
if format.IsInt(reqColNames[0]) {
|
||||
// This checks whether any aggregation function was called as now we
|
||||
// no longer will go through printing each row, and only print at the end
|
||||
if len(functionNames) > 0 && functionNames[0] != "" {
|
||||
if len(fnNames) > 0 && fnNames[0] != "" {
|
||||
functionFlag = true
|
||||
aggregationFunctions(counter, filtrCount, myAggVals, reqColNames, functionNames, string(out))
|
||||
aggregationFns(counter, filtrCount, aggVals, reqColNames, fnNames, record)
|
||||
} else {
|
||||
// The code below finds the appropriate columns of the row given the
|
||||
// indicies provided in the SQL request and utilizes the map to
|
||||
// retrieve the correct part of the row.
|
||||
myQueryRow, myErr := processColNameIndex(string(out), reqColNames, columns, f)
|
||||
if myErr != nil {
|
||||
myRow <- Row{
|
||||
err: myErr,
|
||||
// indicies provided in the SQL request.
|
||||
var rowStr string
|
||||
rowStr, err = processColNameIndex(record, reqColNames, f)
|
||||
if err != nil {
|
||||
rowCh <- Row{
|
||||
err: err,
|
||||
}
|
||||
return
|
||||
}
|
||||
myRow <- Row{
|
||||
record: myQueryRow + "\n",
|
||||
rowCh <- Row{
|
||||
record: rowStr + "\n",
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// This code does aggregation if we were provided column names in the
|
||||
// form of acutal names rather an indices.
|
||||
if len(functionNames) > 0 && functionNames[0] != "" {
|
||||
// form of actual names rather an indices.
|
||||
if len(fnNames) > 0 && fnNames[0] != "" {
|
||||
functionFlag = true
|
||||
aggregationFunctions(counter, filtrCount, myAggVals, reqColNames, functionNames, string(out))
|
||||
aggregationFns(counter, filtrCount, aggVals, reqColNames, fnNames, record)
|
||||
} else {
|
||||
// This code prints the appropriate part of the row given the filter
|
||||
// and select request, if the select request was based on column
|
||||
// names rather than indices.
|
||||
myQueryRow, myErr := processColNameLiteral(string(out), reqColNames, myFunc, f)
|
||||
if myErr != nil {
|
||||
myRow <- Row{
|
||||
err: myErr,
|
||||
var rowStr string
|
||||
rowStr, err = processColNameLiteral(record, reqColNames, fn, f)
|
||||
if err != nil {
|
||||
rowCh <- Row{
|
||||
err: err,
|
||||
}
|
||||
return
|
||||
}
|
||||
myRow <- Row{
|
||||
record: myQueryRow + "\n",
|
||||
rowCh <- Row{
|
||||
record: rowStr + "\n",
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -304,7 +321,7 @@ func processSelectReq(reqColNames []string, alias string, whereClause interface{
|
||||
func processColumnNames(reqColNames []string, alias string, f format.Select) error {
|
||||
switch f.Type() {
|
||||
case format.CSV:
|
||||
for i := 0; i < len(reqColNames); i++ {
|
||||
for i := range reqColNames {
|
||||
// The code below basically cleans the column name of its alias and other
|
||||
// syntax, so that we can extract its pure name.
|
||||
reqColNames[i] = cleanCol(reqColNames[i], alias)
|
||||
@@ -316,45 +333,43 @@ func processColumnNames(reqColNames []string, alias string, f format.Select) err
|
||||
return nil
|
||||
}
|
||||
|
||||
// processColNameIndex is the function which creates the row for an index based
|
||||
// query.
|
||||
func processColNameIndex(record string, reqColNames []string, columns []string, f format.Select) (string, error) {
|
||||
row := make([]string, len(reqColNames))
|
||||
for i := 0; i < len(reqColNames); i++ {
|
||||
// processColNameIndex is the function which creates the row for an index based query.
|
||||
func processColNameIndex(record []byte, reqColNames []string, f format.Select) (string, error) {
|
||||
var row []string
|
||||
for _, colName := range reqColNames {
|
||||
// COALESCE AND NULLIF do not support index based access.
|
||||
if reqColNames[0] == "0" {
|
||||
return "", format.ErrInvalidColumnIndex
|
||||
}
|
||||
mytempindex, err := strconv.Atoi(reqColNames[i])
|
||||
if mytempindex > len(columns) {
|
||||
return "", format.ErrInvalidColumnIndex
|
||||
}
|
||||
|
||||
cindex, err := strconv.Atoi(colName)
|
||||
if err != nil {
|
||||
return "", ErrMissingHeaders
|
||||
}
|
||||
// Subtract 1 because AWS Indexing is not 0 based, it starts at 1 generating the key like "_1".
|
||||
row[i] = jsonValue(string("_"+strconv.Itoa(mytempindex-1)), record)
|
||||
if cindex > len(f.Header()) {
|
||||
return "", format.ErrInvalidColumnIndex
|
||||
}
|
||||
|
||||
// Subtract 1 because SELECT indexing is not 0 based, it
|
||||
// starts at 1 generating the key like "_1".
|
||||
row = append(row, gjson.GetBytes(record, string("_"+strconv.Itoa(cindex-1))).String())
|
||||
}
|
||||
rowStr := strings.Join(row, f.OutputFieldDelimiter())
|
||||
if len(rowStr) > MaxCharsPerRecord {
|
||||
return "", ErrOverMaxRecordSize
|
||||
}
|
||||
|
||||
return rowStr, nil
|
||||
}
|
||||
|
||||
// processColNameLiteral is the function which creates the row for an name based
|
||||
// query.
|
||||
func processColNameLiteral(record string, reqColNames []string, myFunc SelectFuncs, f format.Select) (string, error) {
|
||||
// processColNameLiteral is the function which creates the row for an name based query.
|
||||
func processColNameLiteral(record []byte, reqColNames []string, fn SelectFuncs, f format.Select) (string, error) {
|
||||
row := make([]string, len(reqColNames))
|
||||
for i := 0; i < len(reqColNames); i++ {
|
||||
for i, colName := range reqColNames {
|
||||
// this is the case to deal with COALESCE.
|
||||
if reqColNames[i] == "" && isValidFunc(myFunc.index, i) {
|
||||
row[i] = evaluateFuncExpr(myFunc.funcExpr[i], "", record)
|
||||
if colName == "" && isValidFunc(fn.index, i) {
|
||||
row[i] = evaluateFuncExpr(fn.funcExpr[i], "", record)
|
||||
continue
|
||||
}
|
||||
row[i] = jsonValue(reqColNames[i], record)
|
||||
row[i] = gjson.GetBytes(record, colName).String()
|
||||
}
|
||||
rowStr := strings.Join(row, f.OutputFieldDelimiter())
|
||||
if len(rowStr) > MaxCharsPerRecord {
|
||||
@@ -363,81 +378,57 @@ func processColNameLiteral(record string, reqColNames []string, myFunc SelectFun
|
||||
return rowStr, nil
|
||||
}
|
||||
|
||||
// aggregationFunctions is a function which performs the actual aggregation
|
||||
// aggregationFns is a function which performs the actual aggregation
|
||||
// methods on the given row, it uses an array defined in the main parsing
|
||||
// function to keep track of values.
|
||||
func aggregationFunctions(counter int, filtrCount int, myAggVals []float64, storeReqCols []string, storeFunctions []string, record string) error {
|
||||
for i := 0; i < len(storeFunctions); i++ {
|
||||
if storeFunctions[i] == "" {
|
||||
i++
|
||||
} else if storeFunctions[i] == "count" {
|
||||
myAggVals[i]++
|
||||
} else {
|
||||
// If column names are provided as an index it'll use this if statement instead of the else/
|
||||
func aggregationFns(counter int, filtrCount int, aggVals []float64, storeReqCols []string, storeFns []string, record []byte) error {
|
||||
for i, storeFn := range storeFns {
|
||||
switch storeFn {
|
||||
case "":
|
||||
continue
|
||||
case "count":
|
||||
aggVals[i]++
|
||||
default:
|
||||
// Column names are provided as an index it'll use
|
||||
// this if statement instead.
|
||||
var convAggFloat float64
|
||||
if format.IsInt(storeReqCols[i]) {
|
||||
myIndex, _ := strconv.Atoi(storeReqCols[i])
|
||||
convAggFloat, _ = strconv.ParseFloat(jsonValue(string("_"+strconv.Itoa(myIndex)), record), 64)
|
||||
|
||||
index, _ := strconv.Atoi(storeReqCols[i])
|
||||
convAggFloat = gjson.GetBytes(record, "_"+strconv.Itoa(index)).Float()
|
||||
} else {
|
||||
// case that the columns are in the form of named columns rather than indices.
|
||||
convAggFloat, _ = strconv.ParseFloat(jsonValue(storeReqCols[i], record), 64)
|
||||
// Named columns rather than indices.
|
||||
convAggFloat = gjson.GetBytes(record, storeReqCols[i]).Float()
|
||||
}
|
||||
// This if statement is for calculating the min.
|
||||
if storeFunctions[i] == "min" {
|
||||
switch storeFn {
|
||||
case "min":
|
||||
if counter == -1 {
|
||||
myAggVals[i] = math.MaxFloat64
|
||||
aggVals[i] = math.MaxFloat64
|
||||
}
|
||||
if convAggFloat < myAggVals[i] {
|
||||
myAggVals[i] = convAggFloat
|
||||
if convAggFloat < aggVals[i] {
|
||||
aggVals[i] = convAggFloat
|
||||
}
|
||||
|
||||
} else if storeFunctions[i] == "max" {
|
||||
// This if statement is for calculating the max.
|
||||
case "max":
|
||||
// Calculate the max.
|
||||
if counter == -1 {
|
||||
myAggVals[i] = math.SmallestNonzeroFloat64
|
||||
aggVals[i] = math.SmallestNonzeroFloat64
|
||||
}
|
||||
if convAggFloat > myAggVals[i] {
|
||||
myAggVals[i] = convAggFloat
|
||||
if convAggFloat > aggVals[i] {
|
||||
aggVals[i] = convAggFloat
|
||||
}
|
||||
|
||||
} else if storeFunctions[i] == "sum" {
|
||||
// This if statement is for calculating the sum.
|
||||
myAggVals[i] += convAggFloat
|
||||
|
||||
} else if storeFunctions[i] == "avg" {
|
||||
// This if statement is for calculating the average.
|
||||
case "sum":
|
||||
// Calculate the sum.
|
||||
aggVals[i] += convAggFloat
|
||||
case "avg":
|
||||
// Calculating the average.
|
||||
if filtrCount == 0 {
|
||||
myAggVals[i] = convAggFloat
|
||||
aggVals[i] = convAggFloat
|
||||
} else {
|
||||
myAggVals[i] = (convAggFloat + (myAggVals[i] * float64(filtrCount))) / float64((filtrCount + 1))
|
||||
aggVals[i] = (convAggFloat + (aggVals[i] * float64(filtrCount))) / float64((filtrCount + 1))
|
||||
}
|
||||
} else {
|
||||
default:
|
||||
return ErrParseNonUnaryAgregateFunctionCall
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// convertToSlice takes the map[string]interface{} and convert it to []string
|
||||
func convertToSlice(columnsMap map[string]int, record map[string]interface{}, marshalledRecord string) []string {
|
||||
var result []string
|
||||
type kv struct {
|
||||
Key string
|
||||
Value int
|
||||
}
|
||||
var ss []kv
|
||||
for k, v := range columnsMap {
|
||||
ss = append(ss, kv{k, v})
|
||||
}
|
||||
sort.Slice(ss, func(i, j int) bool {
|
||||
return ss[i].Value < ss[j].Value
|
||||
})
|
||||
for _, kv := range ss {
|
||||
if _, ok := record[kv.Key]; ok {
|
||||
result = append(result, jsonValue(kv.Key, marshalledRecord))
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -17,34 +17,20 @@
|
||||
package s3select
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"math/rand"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
humanize "github.com/dustin/go-humanize"
|
||||
"github.com/tidwall/gjson"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
)
|
||||
|
||||
// Unit Test for the checkForDuplicates function.
|
||||
func TestCheckForDuplicates(t *testing.T) {
|
||||
tables := []struct {
|
||||
myReq []string
|
||||
myHeaders map[string]int
|
||||
myDup map[string]bool
|
||||
myLow map[string]int
|
||||
myErr error
|
||||
}{
|
||||
{[]string{"name", "id", "last_name", "last_name"}, make(map[string]int), make(map[string]bool), make(map[string]int), ErrAmbiguousFieldName},
|
||||
{[]string{"name", "id", "last_name", "another_name"}, make(map[string]int), make(map[string]bool), make(map[string]int), nil},
|
||||
}
|
||||
|
||||
for _, table := range tables {
|
||||
err := checkForDuplicates(table.myReq, table.myHeaders, table.myDup, table.myLow)
|
||||
if err != table.myErr {
|
||||
t.Error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This function returns the index of a string in a list
|
||||
func stringIndex(a string, list []string) int {
|
||||
for i, v := range list {
|
||||
@@ -55,9 +41,9 @@ func stringIndex(a string, list []string) int {
|
||||
return -1
|
||||
}
|
||||
|
||||
// TestMyHelperFunctions is a unit test which tests some small helper string
|
||||
// functions.
|
||||
func TestMyHelperFunctions(t *testing.T) {
|
||||
// TestHelperFunctions is a unit test which tests some
|
||||
// small helper string functions.
|
||||
func TestHelperFunctions(t *testing.T) {
|
||||
tables := []struct {
|
||||
myReq string
|
||||
myList []string
|
||||
@@ -78,37 +64,44 @@ func TestMyHelperFunctions(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyStateMachine is a unit test which ensures that the lowest level of the
|
||||
// TestStateMachine is a unit test which ensures that the lowest level of the
|
||||
// interpreter is converting properly.
|
||||
func TestMyStateMachine(t *testing.T) {
|
||||
func TestStateMachine(t *testing.T) {
|
||||
tables := []struct {
|
||||
operand interface{}
|
||||
operand string
|
||||
operator string
|
||||
leftArg string
|
||||
err error
|
||||
expected bool
|
||||
}{
|
||||
{"", ">", "2012", nil, true},
|
||||
{"2005", ">", "2012", nil, true},
|
||||
{2005, ">", "2012", nil, true},
|
||||
{2012.0000, ">", "2014.000", nil, true},
|
||||
{"NA", ">", "2014.000", nil, false},
|
||||
{2014, ">", "Random", nil, false},
|
||||
{"2005", ">", "2012", nil, true},
|
||||
{"2012.0000", ">", "2014.000", nil, true},
|
||||
{"2012", "!=", "2014.000", nil, true},
|
||||
{"NA", ">", "2014.000", nil, true},
|
||||
{"2012", ">", "2014.000", nil, false},
|
||||
{"2012.0000", ">", "2014", nil, false},
|
||||
{"", "<", "2012", nil, false},
|
||||
{"2012.0000", "<", "2014.000", nil, false},
|
||||
{"2014", ">", "Random", nil, false},
|
||||
{"test3", ">", "aandom", nil, false},
|
||||
{"true", ">", "true", ErrUnsupportedSyntax, false},
|
||||
}
|
||||
for _, table := range tables {
|
||||
val, err := evaluateOperator(table.leftArg, table.operator, table.operand)
|
||||
for i, table := range tables {
|
||||
val, err := evaluateOperator(gjson.Parse(table.leftArg), table.operator, gjson.Parse(table.operand))
|
||||
if err != table.err {
|
||||
t.Error()
|
||||
t.Errorf("Test %d: expected %v, got %v", i+1, table.err, err)
|
||||
}
|
||||
if val != table.expected {
|
||||
t.Error()
|
||||
t.Errorf("Test %d: expected %t, got %t", i+1, table.expected, val)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyOperators is a unit test which ensures that the appropriate values are
|
||||
// TestOperators is a unit test which ensures that the appropriate values are
|
||||
// being returned from the operators functions.
|
||||
func TestMyOperators(t *testing.T) {
|
||||
func TestOperators(t *testing.T) {
|
||||
tables := []struct {
|
||||
operator string
|
||||
err error
|
||||
@@ -124,27 +117,8 @@ func TestMyOperators(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyConversion ensures that the conversion of the value from the csv
|
||||
// happens correctly.
|
||||
func TestMyConversion(t *testing.T) {
|
||||
tables := []struct {
|
||||
myTblVal string
|
||||
expected reflect.Kind
|
||||
}{
|
||||
{"2014", reflect.Int},
|
||||
{"2014.000", reflect.Float64},
|
||||
{"String!!!", reflect.String},
|
||||
}
|
||||
for _, table := range tables {
|
||||
val := reflect.ValueOf(checkStringType(table.myTblVal)).Kind()
|
||||
if val != table.expected {
|
||||
t.Error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unit tests for the main function that performs aggreggation.
|
||||
func TestMyAggregationFunc(t *testing.T) {
|
||||
func TestAggregationFunc(t *testing.T) {
|
||||
columnsMap := make(map[string]int)
|
||||
columnsMap["Col1"] = 0
|
||||
columnsMap["Col2"] = 1
|
||||
@@ -155,22 +129,23 @@ func TestMyAggregationFunc(t *testing.T) {
|
||||
columnsMap map[string]int
|
||||
storeReqCols []string
|
||||
storeFunctions []string
|
||||
record string
|
||||
record []byte
|
||||
err error
|
||||
expectedVal float64
|
||||
}{
|
||||
{10, 5, []float64{10, 11, 12, 13, 14}, columnsMap, []string{"Col1"}, []string{"count"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 11},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"min"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 1},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"max"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 10},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"sum"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 11},
|
||||
{1, 1, []float64{10}, columnsMap, []string{"Col1"}, []string{"avg"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 5.500},
|
||||
{10, 5, []float64{0.0000}, columnsMap, []string{"Col1"}, []string{"random"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", ErrParseNonUnaryAgregateFunctionCall, 0},
|
||||
{0, 5, []float64{0}, columnsMap, []string{"0"}, []string{"count"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 1},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"1"}, []string{"min"}, "{\"_1\":\"1\",\"_2\":\"2\"}", nil, 1},
|
||||
{10, 5, []float64{10, 11, 12, 13, 14}, columnsMap, []string{"Col1"}, []string{"count"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"), nil, 11},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"min"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"), nil, 1},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"max"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"), nil, 10},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"sum"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"), nil, 11},
|
||||
{1, 1, []float64{10}, columnsMap, []string{"Col1"}, []string{"avg"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"), nil, 5.500},
|
||||
{10, 5, []float64{0.0000}, columnsMap, []string{"Col1"}, []string{"random"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"),
|
||||
ErrParseNonUnaryAgregateFunctionCall, 0},
|
||||
{0, 5, []float64{0}, columnsMap, []string{"0"}, []string{"count"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"), nil, 1},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"1"}, []string{"min"}, []byte("{\"_1\":\"1\",\"_2\":\"2\"}"), nil, 1},
|
||||
}
|
||||
|
||||
for _, table := range tables {
|
||||
err := aggregationFunctions(table.counter, table.filtrCount, table.myAggVals, table.storeReqCols, table.storeFunctions, table.record)
|
||||
err := aggregationFns(table.counter, table.filtrCount, table.myAggVals, table.storeReqCols, table.storeFunctions, table.record)
|
||||
if table.err != err {
|
||||
t.Error()
|
||||
}
|
||||
@@ -181,9 +156,9 @@ func TestMyAggregationFunc(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyStringComparator is a unit test which ensures that the appropriate
|
||||
// TestStringComparator is a unit test which ensures that the appropriate
|
||||
// values are being compared for strings.
|
||||
func TestMyStringComparator(t *testing.T) {
|
||||
func TestStringComparator(t *testing.T) {
|
||||
tables := []struct {
|
||||
operand string
|
||||
operator string
|
||||
@@ -211,9 +186,9 @@ func TestMyStringComparator(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyFloatComparator is a unit test which ensures that the appropriate
|
||||
// TestFloatComparator is a unit test which ensures that the appropriate
|
||||
// values are being compared for floats.
|
||||
func TestMyFloatComparator(t *testing.T) {
|
||||
func TestFloatComparator(t *testing.T) {
|
||||
tables := []struct {
|
||||
operand float64
|
||||
operator string
|
||||
@@ -240,9 +215,9 @@ func TestMyFloatComparator(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyIntComparator is a unit test which ensures that the appropriate values
|
||||
// TestIntComparator is a unit test which ensures that the appropriate values
|
||||
// are being compared for ints.
|
||||
func TestMyIntComparator(t *testing.T) {
|
||||
func TestIntComparator(t *testing.T) {
|
||||
tables := []struct {
|
||||
operand int64
|
||||
operator string
|
||||
@@ -269,9 +244,9 @@ func TestMyIntComparator(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestMySizeFunction is a function which provides unit testing for the function
|
||||
// TestSizeFunction is a function which provides unit testing for the function
|
||||
// which calculates size.
|
||||
func TestMySizeFunction(t *testing.T) {
|
||||
func TestSizeFunction(t *testing.T) {
|
||||
tables := []struct {
|
||||
myRecord []string
|
||||
expected int64
|
||||
@@ -471,20 +446,19 @@ func TestMatch(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyFuncProcessing is a unit test which ensures that the appropriate values are
|
||||
// TestFuncProcessing is a unit test which ensures that the appropriate values are
|
||||
// being returned from the Processing... functions.
|
||||
func TestMyFuncProcessing(t *testing.T) {
|
||||
func TestFuncProcessing(t *testing.T) {
|
||||
tables := []struct {
|
||||
myString string
|
||||
nullList []string
|
||||
coalList []string
|
||||
myValString string
|
||||
myValCoal string
|
||||
myValNull string
|
||||
stringFunc string
|
||||
}{
|
||||
{"lower", []string{"yo", "yo"}, []string{"random", "hello", "random"}, "LOWER", "random", "", "UPPER"},
|
||||
{"LOWER", []string{"null", "random"}, []string{"missing", "hello", "random"}, "lower", "hello", "null", "LOWER"},
|
||||
{"lower", []string{"random", "hello", "random"}, "LOWER", "random", "", "UPPER"},
|
||||
{"LOWER", []string{"missing", "hello", "random"}, "lower", "hello", "null", "LOWER"},
|
||||
}
|
||||
for _, table := range tables {
|
||||
if table.coalList != nil {
|
||||
@@ -493,16 +467,145 @@ func TestMyFuncProcessing(t *testing.T) {
|
||||
t.Error()
|
||||
}
|
||||
}
|
||||
if table.nullList != nil {
|
||||
myVal := processNullIf(table.nullList)
|
||||
if myVal != table.myValNull {
|
||||
t.Error()
|
||||
}
|
||||
}
|
||||
myVal := applyStrFunc(table.myString, table.stringFunc)
|
||||
myVal := applyStrFunc(gjson.Result{
|
||||
Type: gjson.String,
|
||||
Str: table.myString,
|
||||
}, table.stringFunc)
|
||||
if myVal != table.myValString {
|
||||
t.Error()
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
const charset = "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||
|
||||
var seededRand = rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
|
||||
func StringWithCharset(length int, charset string) string {
|
||||
b := make([]byte, length)
|
||||
for i := range b {
|
||||
b[i] = charset[seededRand.Intn(len(charset))]
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
func String(length int) string {
|
||||
return StringWithCharset(length, charset)
|
||||
}
|
||||
|
||||
func genCSV(b *bytes.Buffer, records int) error {
|
||||
b.Reset()
|
||||
w := csv.NewWriter(b)
|
||||
w.Write([]string{"id", "name", "age", "city"})
|
||||
|
||||
for i := 0; i < records; i++ {
|
||||
w.Write([]string{
|
||||
strconv.Itoa(i),
|
||||
String(10),
|
||||
String(5),
|
||||
String(10),
|
||||
})
|
||||
}
|
||||
|
||||
// Write any buffered data to the underlying writer (standard output).
|
||||
w.Flush()
|
||||
|
||||
return w.Error()
|
||||
}
|
||||
|
||||
func benchmarkSQLAll(b *testing.B, records int) {
|
||||
benchmarkSQL(b, records, "select * from S3Object")
|
||||
}
|
||||
|
||||
func benchmarkSQLAggregate(b *testing.B, records int) {
|
||||
benchmarkSQL(b, records, "select count(*) from S3Object")
|
||||
}
|
||||
|
||||
func benchmarkSQL(b *testing.B, records int, query string) {
|
||||
var (
|
||||
buf bytes.Buffer
|
||||
output bytes.Buffer
|
||||
)
|
||||
genCSV(&buf, records)
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
|
||||
sreq := ObjectSelectRequest{}
|
||||
sreq.Expression = query
|
||||
sreq.ExpressionType = QueryExpressionTypeSQL
|
||||
sreq.InputSerialization.CSV = &struct {
|
||||
FileHeaderInfo CSVFileHeaderInfo
|
||||
RecordDelimiter string
|
||||
FieldDelimiter string
|
||||
QuoteCharacter string
|
||||
QuoteEscapeCharacter string
|
||||
Comments string
|
||||
}{}
|
||||
sreq.InputSerialization.CSV.FileHeaderInfo = CSVFileHeaderInfoUse
|
||||
sreq.InputSerialization.CSV.RecordDelimiter = "\n"
|
||||
sreq.InputSerialization.CSV.FieldDelimiter = ","
|
||||
|
||||
sreq.OutputSerialization.CSV = &struct {
|
||||
QuoteFields CSVQuoteFields
|
||||
RecordDelimiter string
|
||||
FieldDelimiter string
|
||||
QuoteCharacter string
|
||||
QuoteEscapeCharacter string
|
||||
}{}
|
||||
sreq.OutputSerialization.CSV.RecordDelimiter = "\n"
|
||||
sreq.OutputSerialization.CSV.FieldDelimiter = ","
|
||||
|
||||
s3s, err := New(&buf, int64(buf.Len()), sreq)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
output.Reset()
|
||||
if err = Execute(&output, s3s); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkSQLAggregate_100K - benchmark count(*) function with 100k records.
|
||||
func BenchmarkSQLAggregate_100K(b *testing.B) {
|
||||
benchmarkSQLAggregate(b, humanize.KiByte*100)
|
||||
}
|
||||
|
||||
// BenchmarkSQLAggregate_1M - benchmark count(*) function with 1m records.
|
||||
func BenchmarkSQLAggregate_1M(b *testing.B) {
|
||||
benchmarkSQLAggregate(b, humanize.MiByte)
|
||||
}
|
||||
|
||||
// BenchmarkSQLAggregate_2M - benchmark count(*) function with 2m records.
|
||||
func BenchmarkSQLAggregate_2M(b *testing.B) {
|
||||
benchmarkSQLAggregate(b, 2*humanize.MiByte)
|
||||
}
|
||||
|
||||
// BenchmarkSQLAggregate_10M - benchmark count(*) function with 10m records.
|
||||
func BenchmarkSQLAggregate_10M(b *testing.B) {
|
||||
benchmarkSQLAggregate(b, 10*humanize.MiByte)
|
||||
}
|
||||
|
||||
// BenchmarkSQLAll_100K - benchmark * function with 100k records.
|
||||
func BenchmarkSQLAll_100K(b *testing.B) {
|
||||
benchmarkSQLAll(b, humanize.KiByte*100)
|
||||
}
|
||||
|
||||
// BenchmarkSQLAll_1M - benchmark * function with 1m records.
|
||||
func BenchmarkSQLAll_1M(b *testing.B) {
|
||||
benchmarkSQLAll(b, humanize.MiByte)
|
||||
}
|
||||
|
||||
// BenchmarkSQLAll_2M - benchmark * function with 2m records.
|
||||
func BenchmarkSQLAll_2M(b *testing.B) {
|
||||
benchmarkSQLAll(b, 2*humanize.MiByte)
|
||||
}
|
||||
|
||||
// BenchmarkSQLAll_10M - benchmark * function with 10m records.
|
||||
func BenchmarkSQLAll_10M(b *testing.B) {
|
||||
benchmarkSQLAll(b, 10*humanize.MiByte)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user