minio/pkg/s3select/helpers.go
Arjun Mishra 7c14cdb60e S3 Select API Support for CSV (#6127)
Add support for trivial where clause cases
2018-08-15 03:30:19 -07:00

755 lines
22 KiB
Go

/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package s3select
import (
"fmt"
"reflect"
"strconv"
"strings"
"github.com/xwb1989/sqlparser"
)
// This function processes size so that we can calculate bytes BytesProcessed.
func processSize(myrecord []string) int64 {
if len(myrecord) > 0 {
var size int64
size = int64(len(myrecord)-1) + 1
for i := range myrecord {
size += int64(len(myrecord[i]))
}
return size
}
return 0
}
// This function finds whether a string is in a list
func stringInSlice(x string, list []string) bool {
for _, y := range list {
if x == y {
return true
}
}
return false
}
// This function returns the index of a string in a list
func stringIndex(a string, list []string) int {
for i := range list {
if list[i] == a {
return i
}
}
return -1
}
// Returns a true or false, whether a string can be represented as an int.
func representsInt(s string) bool {
if _, err := strconv.Atoi(s); err == nil {
return true
}
return false
}
// The function below processes the where clause into an acutal boolean given a
// row
func matchesMyWhereClause(row []string, columnNames map[string]int, alias string, whereClause interface{}) (bool, error) {
// This particular logic deals with the details of casting, e.g if we have to
// cast a column of string numbers into int's for comparison.
var conversionColumn string
var operator string
var operand interface{}
if fmt.Sprintf("%v", whereClause) == "false" {
return false, nil
}
switch expr := whereClause.(type) {
case *sqlparser.IsExpr:
return evaluateIsExpr(expr, row, columnNames, alias)
case *sqlparser.RangeCond:
operator = expr.Operator
if operator != "between" && operator != "not between" {
return false, ErrUnsupportedSQLOperation
}
if operator == "not between" {
myResult, err := evaluateBetween(expr, alias, row, columnNames)
if err != nil {
return false, err
}
return !myResult, nil
}
myResult, err := evaluateBetween(expr, alias, row, columnNames)
if err != nil {
return false, err
}
return myResult, nil
case *sqlparser.ComparisonExpr:
operator = expr.Operator
switch right := expr.Right.(type) {
case *sqlparser.FuncExpr:
operand = evaluateFuncExpr(right, "", row, columnNames)
case *sqlparser.SQLVal:
var err error
operand, err = evaluateParserType(right)
if err != nil {
return false, err
}
}
var myVal string
myVal = ""
switch left := expr.Left.(type) {
case *sqlparser.FuncExpr:
myVal = evaluateFuncExpr(left, "", row, columnNames)
conversionColumn = ""
case *sqlparser.ColName:
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
}
if representsInt(conversionColumn) {
intCol, err := strconv.Atoi(conversionColumn)
if err != nil {
return false, err
}
// Subtract 1 out because the index starts at 1 for Amazon instead of 0.
return evaluateOperator(row[intCol-1], operator, operand)
}
if myVal != "" {
return evaluateOperator(myVal, operator, operand)
}
return evaluateOperator(row[columnNames[conversionColumn]], operator, operand)
case *sqlparser.AndExpr:
var leftVal bool
var rightVal bool
switch left := expr.Left.(type) {
case *sqlparser.ComparisonExpr:
temp, err := matchesMyWhereClause(row, columnNames, alias, left)
if err != nil {
return false, err
}
leftVal = temp
}
switch right := expr.Right.(type) {
case *sqlparser.ComparisonExpr:
temp, err := matchesMyWhereClause(row, columnNames, alias, right)
if err != nil {
return false, err
}
rightVal = temp
}
return (rightVal && leftVal), nil
case *sqlparser.OrExpr:
var leftVal bool
var rightVal bool
switch left := expr.Left.(type) {
case *sqlparser.ComparisonExpr:
leftVal, _ = matchesMyWhereClause(row, columnNames, alias, left)
}
switch right := expr.Right.(type) {
case *sqlparser.ComparisonExpr:
rightVal, _ = matchesMyWhereClause(row, columnNames, alias, right)
}
return (rightVal || leftVal), nil
}
return true, nil
}
func applyStrFunc(rawArg string, funcName string) string {
switch strings.ToUpper(funcName) {
case "TRIM":
// parser has an issue which does not allow it to support Trim with other
// arguments
return strings.Trim(rawArg, " ")
case "SUBSTRING":
// TODO parser has an issue which does not support substring
return rawArg
case "CHAR_LENGTH":
return strconv.Itoa(len(rawArg))
case "CHARACTER_LENGTH":
return strconv.Itoa(len(rawArg))
case "LOWER":
return strings.ToLower(rawArg)
case "UPPER":
return strings.ToUpper(rawArg)
}
return rawArg
}
// This is a really important function it actually evaluates the boolean
// statement and therefore actually returns a bool, it functions as the lowest
// level of the state machine.
func evaluateOperator(myTblVal string, operator string, operand interface{}) (bool, error) {
if err := checkValidOperator(operator); err != nil {
return false, err
}
myRecordVal := checkStringType(myTblVal)
myVal := reflect.ValueOf(myRecordVal)
myOp := reflect.ValueOf(operand)
switch {
case myVal.Kind() == reflect.String && myOp.Kind() == reflect.String:
return stringEval(myVal.String(), operator, myOp.String())
case myVal.Kind() == reflect.Float64 && myOp.Kind() == reflect.Float64:
return floatEval(myVal.Float(), operator, myOp.Float())
case myVal.Kind() == reflect.Int && myOp.Kind() == reflect.Int:
return intEval(myVal.Int(), operator, myOp.Int())
case myVal.Kind() == reflect.Int && myOp.Kind() == reflect.String:
stringVs := strconv.Itoa(int(myVal.Int()))
return stringEval(stringVs, operator, myOp.String())
case myVal.Kind() == reflect.Float64 && myOp.Kind() == reflect.String:
stringVs := strconv.FormatFloat(myVal.Float(), 'f', 6, 64)
return stringEval(stringVs, operator, myOp.String())
case myVal.Kind() != myOp.Kind():
return false, nil
}
return false, ErrUnsupportedSyntax
}
// checkValidOperator ensures that the current operator is supported
func checkValidOperator(operator string) error {
listOfOps := []string{">", "<", "=", "<=", ">=", "!=", "like"}
for i := range listOfOps {
if operator == listOfOps[i] {
return nil
}
}
return ErrParseUnknownOperator
}
// checkStringType converts the value from the csv to the appropriate one.
func checkStringType(myTblVal string) interface{} {
myInt, isInt := strconv.Atoi(myTblVal)
myFloat, isFloat := strconv.ParseFloat(myTblVal, 64)
if isInt == nil {
return myInt
} else if isFloat == nil {
return myFloat
} else {
return myTblVal
}
}
// stringEval is for evaluating the state of string comparison.
func stringEval(myRecordVal string, operator string, myOperand string) (bool, error) {
switch operator {
case ">":
return myRecordVal > myOperand, nil
case "<":
return myRecordVal < myOperand, nil
case "=":
return myRecordVal == myOperand, nil
case "<=":
return myRecordVal <= myOperand, nil
case ">=":
return myRecordVal >= myOperand, nil
case "!=":
return myRecordVal != myOperand, nil
case "like":
return likeConvert(myOperand, myRecordVal)
}
return false, ErrUnsupportedSyntax
}
// intEval is for evaluating integer comparisons.
func intEval(myRecordVal int64, operator string, myOperand int64) (bool, error) {
switch operator {
case ">":
return myRecordVal > myOperand, nil
case "<":
return myRecordVal < myOperand, nil
case "=":
return myRecordVal == myOperand, nil
case "<=":
return myRecordVal <= myOperand, nil
case ">=":
return myRecordVal >= myOperand, nil
case "!=":
return myRecordVal != myOperand, nil
}
return false, ErrUnsupportedSyntax
}
// floatEval is for evaluating the comparison of floats.
func floatEval(myRecordVal float64, operator string, myOperand float64) (bool, error) {
// Basically need some logic thats like, if the types dont match check for a cast
switch operator {
case ">":
return myRecordVal > myOperand, nil
case "<":
return myRecordVal < myOperand, nil
case "=":
return myRecordVal == myOperand, nil
case "<=":
return myRecordVal <= myOperand, nil
case ">=":
return myRecordVal >= myOperand, nil
case "!=":
return myRecordVal != myOperand, nil
}
return false, ErrUnsupportedSyntax
}
// prefixMatch allows for matching a prefix only like query e.g a%
func prefixMatch(pattern string, record string) bool {
for i := 0; i < len(pattern)-1; i++ {
if pattern[i] != record[i] && pattern[i] != byte('_') {
return false
}
}
return true
}
// suffixMatch allows for matching a suffix only like query e.g %an
func suffixMatch(pattern string, record string) bool {
for i := len(pattern) - 1; i > 0; i-- {
if pattern[i] != record[len(record)-(len(pattern)-i)] && pattern[i] != byte('_') {
return false
}
}
return true
}
// This function is for evaluating select statements which are case sensitive
func likeConvert(pattern string, record string) (bool, error) {
// If pattern is empty just return false
if pattern == "" || record == "" {
return false, nil
}
// for suffix match queries e.g %a
if len(pattern) >= 2 && pattern[0] == byte('%') && strings.Count(pattern, "%") == 1 {
return suffixMatch(pattern, record), nil
}
// for prefix match queries e.g a%
if len(pattern) >= 2 && pattern[len(pattern)-1] == byte('%') && strings.Count(pattern, "%") == 1 {
return prefixMatch(pattern, record), nil
}
charCount := 0
currPos := 0
// Loop through the pattern so that a boolean can be returned
for i := 0; i < len(pattern); i++ {
if pattern[i] == byte('_') {
// if its an underscore it can be anything so shift current position for
// pattern and string
charCount++
// if there have been more characters in the pattern than record, clearly
// there should be a return
if i != len(pattern)-1 {
if pattern[i+1] != byte('%') && pattern[i+1] != byte('_') {
if currPos != len(record)-1 && pattern[i+1] != record[currPos+1] {
return false, nil
}
}
}
if charCount > len(record) {
return false, nil
}
// if the pattern has been fully evaluated, then just return.
if len(pattern) == i+1 {
return true, nil
}
i++
currPos++
}
if pattern[i] == byte('%') || pattern[i] == byte('*') {
// if there is a wildcard then want to return true if its last and flag it.
if currPos == len(record) {
return false, nil
}
if i+1 == len(pattern) {
return true, nil
}
} else {
charCount++
matched := false
// iterate through the pattern and check if there is a match for the
// character
for currPos < len(record) {
if record[currPos] == pattern[i] || pattern[i] == byte('_') {
matched = true
break
}
currPos++
}
currPos++
// if the character did not match then return should occur.
if !matched {
return false, nil
}
}
}
if charCount > len(record) {
return false, nil
}
if currPos < len(record) {
return false, nil
}
return true, nil
}
// TrimQuotes allows the following to occur select "name", we need to trim the
// quotes to reference our map of columnNames.
func trimQuotes(s string) string {
if len(s) >= 2 {
if c := s[len(s)-1]; s[0] == c && (c == '"') {
return s[1 : len(s)-1]
}
}
return s
}
// cleanCol cleans a column name from the parser so that the name is returned to
// original.
func cleanCol(myCol string, alias string) string {
if len(myCol) <= 0 {
return myCol
}
if !strings.HasPrefix(myCol, alias) && myCol[0] == '_' {
myCol = alias + myCol
}
if strings.Contains(myCol, ".") {
myCol = strings.Replace(myCol, alias+"._", "", len(myCol))
}
myCol = strings.Replace(myCol, alias+"_", "", len(myCol))
return myCol
}
// evaluateBetween is a function which evaluates a Between Clause.
func evaluateBetween(betweenExpr *sqlparser.RangeCond, alias string, record []string, columnNames map[string]int) (bool, error) {
var colToVal interface{}
var colFromVal interface{}
var conversionColumn string
var funcName string
switch colTo := betweenExpr.To.(type) {
case sqlparser.Expr:
switch colToMyVal := colTo.(type) {
case *sqlparser.FuncExpr:
var temp string
temp = stringOps(colToMyVal, record, "", columnNames)
colToVal = []byte(temp)
case *sqlparser.SQLVal:
var err error
colToVal, err = evaluateParserType(colToMyVal)
if err != nil {
return false, err
}
}
}
switch colFrom := betweenExpr.From.(type) {
case sqlparser.Expr:
switch colFromMyVal := colFrom.(type) {
case *sqlparser.FuncExpr:
colFromVal = stringOps(colFromMyVal, record, "", columnNames)
case *sqlparser.SQLVal:
var err error
colFromVal, err = evaluateParserType(colFromMyVal)
if err != nil {
return false, err
}
}
}
var myFuncVal string
myFuncVal = ""
switch left := betweenExpr.Left.(type) {
case *sqlparser.FuncExpr:
myFuncVal = evaluateFuncExpr(left, "", record, columnNames)
conversionColumn = ""
case *sqlparser.ColName:
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
}
toGreater, err := evaluateOperator(fmt.Sprintf("%v", colToVal), ">", colFromVal)
if err != nil {
return false, err
}
if toGreater {
return evalBetweenGreater(conversionColumn, record, funcName, columnNames, colFromVal, colToVal, myFuncVal)
}
return evalBetweenLess(conversionColumn, record, funcName, columnNames, colFromVal, colToVal, myFuncVal)
}
// evalBetweenLess is a function which evaluates the between given that the
// FROM is > than the TO.
func evalBetweenLess(conversionColumn string, record []string, funcName string, columnNames map[string]int, colFromVal interface{}, colToVal interface{}, myCoalVal string) (bool, error) {
if representsInt(conversionColumn) {
myIndex, _ := strconv.Atoi(conversionColumn)
// Subtract 1 out because the index starts at 1 for Amazon instead of 0.
myVal, err := evaluateOperator(record[myIndex-1], "<=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(record[myIndex-1]))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
if myCoalVal != "" {
myVal, err := evaluateOperator(myCoalVal, "<=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(myCoalVal))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
myVal, err := evaluateOperator(record[columnNames[conversionColumn]], "<=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(record[columnNames[conversionColumn]]))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
// evalBetweenGreater is a function which evaluates the between given that the
// TO is > than the FROM.
func evalBetweenGreater(conversionColumn string, record []string, funcName string, columnNames map[string]int, colFromVal interface{}, colToVal interface{}, myCoalVal string) (bool, error) {
if representsInt(conversionColumn) {
myIndex, _ := strconv.Atoi(conversionColumn)
myVal, err := evaluateOperator(record[myIndex-1], ">=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(record[myIndex-1]))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
if myCoalVal != "" {
myVal, err := evaluateOperator(myCoalVal, ">=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(myCoalVal))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
myVal, err := evaluateOperator(record[columnNames[conversionColumn]], ">=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(record[columnNames[conversionColumn]]))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
// whereClauseNameErrs is a function which returns an error if there is a column
// in the where clause which does not exist.
func (reader *Input) whereClauseNameErrs(whereClause interface{}, alias string) error {
var conversionColumn string
switch expr := whereClause.(type) {
// case for checking errors within a clause of the form "col_name is ..."
case *sqlparser.IsExpr:
switch myCol := expr.Expr.(type) {
case *sqlparser.FuncExpr:
if err := reader.evaluateFuncErr(myCol); err != nil {
return err
}
case *sqlparser.ColName:
conversionColumn = cleanCol(myCol.Name.CompliantName(), alias)
}
case *sqlparser.RangeCond:
switch left := expr.Left.(type) {
case *sqlparser.FuncExpr:
if err := reader.evaluateFuncErr(left); err != nil {
return err
}
case *sqlparser.ColName:
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
}
case *sqlparser.ComparisonExpr:
switch left := expr.Left.(type) {
case *sqlparser.FuncExpr:
if err := reader.evaluateFuncErr(left); err != nil {
return err
}
case *sqlparser.ColName:
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
}
case *sqlparser.AndExpr:
switch left := expr.Left.(type) {
case *sqlparser.ComparisonExpr:
return reader.whereClauseNameErrs(left, alias)
}
switch right := expr.Right.(type) {
case *sqlparser.ComparisonExpr:
return reader.whereClauseNameErrs(right, alias)
}
case *sqlparser.OrExpr:
switch left := expr.Left.(type) {
case *sqlparser.ComparisonExpr:
return reader.whereClauseNameErrs(left, alias)
}
switch right := expr.Right.(type) {
case *sqlparser.ComparisonExpr:
return reader.whereClauseNameErrs(right, alias)
}
}
if conversionColumn != "" {
return reader.colNameErrs([]string{conversionColumn})
}
return nil
}
// qualityCheck ensures the row has enough separators.
func qualityCheck(row string, amountOfSep int, sep string) string {
for i := 0; i < amountOfSep; i++ {
row = row + sep
}
return row
}
// writeRow helps to write the row regardless of how many entries.
func writeRow(myRow string, myEntry string, delimiter string, numOfReqCols int) string {
if myEntry == "" && len(myRow) == 0 && numOfReqCols == 1 {
return myEntry
}
if myEntry == "" && len(myRow) == 0 {
return myEntry + delimiter
}
if len(myRow) == 1 && myRow[0] == ',' {
return myRow + myEntry
}
if len(myRow) == 0 {
return myEntry
}
return myRow + delimiter + myEntry
}
// colNameErrs is a function which makes sure that the headers are requested are
// present in the file otherwise it throws an error.
func (reader *Input) colNameErrs(columnNames []string) error {
for i := 0; i < len(columnNames); i++ {
if columnNames[i] == "" {
continue
}
if !representsInt(columnNames[i]) && !reader.options.HeaderOpt {
return ErrInvalidColumnIndex
}
if representsInt(columnNames[i]) {
tempInt, _ := strconv.Atoi(columnNames[i])
if tempInt > len(reader.Header()) || tempInt == 0 {
return ErrInvalidColumnIndex
}
} else {
if reader.options.HeaderOpt && !stringInSlice(columnNames[i], reader.Header()) {
return ErrMissingHeaders
}
}
}
return nil
}
// aggFuncToStr converts an array of floats into a properly formatted string.
func (reader *Input) aggFuncToStr(myAggVals []float64) string {
myRow := strconv.FormatFloat(myAggVals[0], 'f', 6, 64)
for i := 1; i < len(myAggVals); i++ {
aggregateval := strconv.FormatFloat(myAggVals[i], 'f', 6, 64)
myRow = myRow + reader.options.OutputFieldDelimiter + aggregateval
}
return myRow
}
// checkForDuplicates ensures we do not have an ambigious column name.
func checkForDuplicates(columns []string, columnsMap map[string]int, hasDuplicates map[string]bool, lowercaseColumnsMap map[string]int) error {
for i := 0; i < len(columns); i++ {
columns[i] = strings.Replace(columns[i], " ", "_", len(columns[i]))
if _, exist := columnsMap[columns[i]]; exist {
return ErrAmbiguousFieldName
}
columnsMap[columns[i]] = i
// This checks that if a key has already been put into the map, that we're
// setting its appropriate value in has duplicates to be true.
if _, exist := lowercaseColumnsMap[strings.ToLower(columns[i])]; exist {
hasDuplicates[strings.ToLower(columns[i])] = true
} else {
lowercaseColumnsMap[strings.ToLower(columns[i])] = i
}
}
return nil
}
// evaluateParserType is a function that takes a SQL value and returns it as an
// interface converted into the appropriate value.
func evaluateParserType(col *sqlparser.SQLVal) (interface{}, error) {
colDataType := col.Type
var val interface{}
switch colDataType {
case 0:
val = string(col.Val)
case 1:
intVersion, isInt := strconv.Atoi(string(col.Val))
if isInt != nil {
return nil, ErrIntegerOverflow
}
val = intVersion
case 2:
floatVersion, isFloat := strconv.ParseFloat(string(col.Val), 64)
if isFloat != nil {
return nil, ErrIntegerOverflow
}
val = floatVersion
}
return val, nil
}
// parseErrs is the function which handles all the errors that could occur
// through use of function arguments such as column names in NULLIF
func (reader *Input) parseErrs(columnNames []string, whereClause interface{}, alias string, myFuncs *SelectFuncs) error {
// Below code cleans up column names.
reader.processColumnNames(columnNames, alias)
if columnNames[0] != "*" {
if err := reader.colNameErrs(columnNames); err != nil {
return err
}
}
// Below code ensures the whereClause has no errors.
if whereClause != nil {
tempClause := whereClause
if err := reader.whereClauseNameErrs(tempClause, alias); err != nil {
return err
}
}
for i := 0; i < len(myFuncs.funcExpr); i++ {
if myFuncs.funcExpr[i] == nil {
continue
}
if err := reader.evaluateFuncErr(myFuncs.funcExpr[i]); err != nil {
return err
}
}
return nil
}