S3 Select API Support for CSV (#6127)

Add support for trivial where clause cases
This commit is contained in:
Arjun Mishra
2018-08-15 03:30:19 -07:00
committed by kannappanr
parent 0e02328c98
commit 7c14cdb60e
59 changed files with 30860 additions and 3 deletions

486
pkg/s3select/errors.go Normal file
View File

@@ -0,0 +1,486 @@
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package s3select
import "errors"
//S3 errors below
// ErrBusy is an error if the service is too busy.
var ErrBusy = errors.New("The service is unavailable. Please retry")
// ErrUnauthorizedAccess is an error if you lack the appropriate credentials to
// access the object.
var ErrUnauthorizedAccess = errors.New("You are not authorized to perform this operation")
// ErrExpressionTooLong is an error if your SQL expression too long for
// processing.
var ErrExpressionTooLong = errors.New("The SQL expression is too long: The maximum byte-length for the SQL expression is 256 KB")
// ErrIllegalSQLFunctionArgument is an error if you provide an illegal argument
// in the SQL function.
var ErrIllegalSQLFunctionArgument = errors.New("Illegal argument was used in the SQL function")
// ErrInvalidColumnIndex is an error if you provide a column index which is not
// valid.
var ErrInvalidColumnIndex = errors.New("Column index in the SQL expression is invalid")
// ErrInvalidKeyPath is an error if you provide a key in the SQL expression that
// is invalid.
var ErrInvalidKeyPath = errors.New("Key path in the SQL expression is invalid")
// ErrColumnTooLong is an error if your query results in a column that is
// greater than the max amount of characters per column of 1mb
var ErrColumnTooLong = errors.New("The length of a column in the result is greater than maxCharsPerColumn of 1 MB")
// ErrOverMaxColumn is an error if the number of columns from the resulting
// query is greater than 1Mb.
var ErrOverMaxColumn = errors.New("The number of columns in the result is greater than maxColumnNumber of 1 MB")
// ErrOverMaxRecordSize is an error if the length of a record in the result is
// greater than 1 Mb.
var ErrOverMaxRecordSize = errors.New("The length of a record in the result is greater than maxCharsPerRecord of 1 MB")
// ErrMissingHeaders is an error if some of the headers that are requested in
// the Select Query are not present in the file.
var ErrMissingHeaders = errors.New("Some headers in the query are missing from the file. Check the file and try again")
// ErrInvalidCompressionFormat is an error if an unsupported compression type is
// utilized with the select object query.
var ErrInvalidCompressionFormat = errors.New("The file is not in a supported compression format. Only GZIP is supported at this time")
// ErrTruncatedInput is an error if the object is not compressed properly and an
// error occurs during decompression.
var ErrTruncatedInput = errors.New("Object decompression failed. Check that the object is properly compressed using the format specified in the request")
// ErrInvalidFileHeaderInfo is an error if the argument provided to the
// FileHeader Argument is incorrect.
var ErrInvalidFileHeaderInfo = errors.New("The FileHeaderInfo is invalid. Only NONE, USE, and IGNORE are supported")
// ErrInvalidJSONType is an error if the json format provided as an argument is
// invalid.
var ErrInvalidJSONType = errors.New("The JsonType is invalid. Only DOCUMENT and LINES are supported at this time")
// ErrInvalidQuoteFields is an error if the arguments provided to the
// QuoteFields options are not valid.
var ErrInvalidQuoteFields = errors.New("The QuoteFields is invalid. Only ALWAYS and ASNEEDED are supported")
// ErrInvalidRequestParameter is an error if the value of a parameter in the
// request element is not valid.
var ErrInvalidRequestParameter = errors.New("The value of a parameter in Request element is invalid. Check the service API documentation and try again")
// ErrCSVParsingError is an error if the CSV presents an error while being
// parsed.
var ErrCSVParsingError = errors.New("Encountered an Error parsing the CSV file. Check the file and try again")
// ErrJSONParsingError is an error if while parsing the JSON an error arises.
var ErrJSONParsingError = errors.New("Encountered an error parsing the JSON file. Check the file and try again")
// ErrExternalEvalException is an error that arises if the query can not be
// evaluated.
var ErrExternalEvalException = errors.New("The query cannot be evaluated. Check the file and try again")
// ErrInvalidDataType is an error that occurs if the SQL expression contains an
// invalid data type.
var ErrInvalidDataType = errors.New("The SQL expression contains an invalid data type")
// ErrUnrecognizedFormatException is an error that arises if there is an invalid
// record type.
var ErrUnrecognizedFormatException = errors.New("Encountered an invalid record type")
// ErrInvalidTextEncoding is an error if the text encoding is not valid.
var ErrInvalidTextEncoding = errors.New("Invalid encoding type. Only UTF-8 encoding is supported at this time")
// ErrInvalidTableAlias is an error that arises if the table alias provided in
// the SQL expression is invalid.
var ErrInvalidTableAlias = errors.New("The SQL expression contains an invalid table alias")
// ErrMultipleDataSourcesUnsupported is an error that arises if multiple data
// sources are provided.
var ErrMultipleDataSourcesUnsupported = errors.New("Multiple data sources are not supported")
// ErrMissingRequiredParameter is an error that arises if a required argument
// is omitted from the Request.
var ErrMissingRequiredParameter = errors.New("The Request entity is missing a required parameter. Check the service documentation and try again")
// ErrObjectSerializationConflict is an error that arises if an unsupported
// output seralization is provided.
var ErrObjectSerializationConflict = errors.New("The Request entity can only contain one of CSV or JSON. Check the service documentation and try again")
// ErrUnsupportedSQLOperation is an error that arises if an unsupported SQL
// operation is used.
var ErrUnsupportedSQLOperation = errors.New("Encountered an unsupported SQL operation")
// ErrUnsupportedSQLStructure is an error that occurs if an unsupported SQL
// structure is used.
var ErrUnsupportedSQLStructure = errors.New("Encountered an unsupported SQL structure. Check the SQL Reference")
// ErrUnsupportedStorageClass is an error that occurs if an invalid storace
// class is present.
var ErrUnsupportedStorageClass = errors.New("Encountered an invalid storage class. Only STANDARD, STANDARD_IA, and ONEZONE_IA storage classes are supported at this time")
// ErrUnsupportedSyntax is an error that occurs if invalid syntax is present in
// the query.
var ErrUnsupportedSyntax = errors.New("Encountered invalid syntax")
// ErrUnsupportedRangeHeader is an error that occurs if a range header is
// provided.
var ErrUnsupportedRangeHeader = errors.New("Range header is not supported for this operation")
// ErrLexerInvalidChar is an error that occurs if the SQL expression contains an
// invalid character.
var ErrLexerInvalidChar = errors.New("The SQL expression contains an invalid character")
// ErrLexerInvalidOperator is an error that occurs if an invalid operator is
// used.
var ErrLexerInvalidOperator = errors.New("The SQL expression contains an invalid operator")
// ErrLexerInvalidLiteral is an error that occurs if an invalid literal is used.
var ErrLexerInvalidLiteral = errors.New("The SQL expression contains an invalid literal")
// ErrLexerInvalidIONLiteral is an error that occurs if an invalid operator is
// used
var ErrLexerInvalidIONLiteral = errors.New("The SQL expression contains an invalid operator")
// ErrParseExpectedDatePart is an error that occurs if the date part is not
// found in the SQL expression.
var ErrParseExpectedDatePart = errors.New("Did not find the expected date part in the SQL expression")
// ErrParseExpectedKeyword is an error that occurs if the expected keyword was
// not found in the expression.
var ErrParseExpectedKeyword = errors.New("Did not find the expected keyword in the SQL expression")
// ErrParseExpectedTokenType is an error that occurs if the expected token is
// not found in the SQL expression.
var ErrParseExpectedTokenType = errors.New("Did not find the expected token in the SQL expression")
// ErrParseExpected2TokenTypes is an error that occurs if 2 token types are not
// found.
var ErrParseExpected2TokenTypes = errors.New("Did not find the expected token in the SQL expression")
// ErrParseExpectedNumber is an error that occurs if a number is expected but
// not found in the expression.
var ErrParseExpectedNumber = errors.New("Did not find the expected number in the SQL expression")
// ErrParseExpectedRightParenBuiltinFunctionCall is an error that occurs if a
// right parenthesis is missing.
var ErrParseExpectedRightParenBuiltinFunctionCall = errors.New("Did not find the expected right parenthesis character in the SQL expression")
// ErrParseExpectedTypeName is an error that occurs if a type name is expected
// but not found.
var ErrParseExpectedTypeName = errors.New("Did not find the expected type name in the SQL expression")
// ErrParseExpectedWhenClause is an error that occurs if a When clause is
// expected but not found.
var ErrParseExpectedWhenClause = errors.New("Did not find the expected WHEN clause in the SQL expression. CASE is not supported")
// ErrParseUnsupportedToken is an error that occurs if the SQL expression
// contains an unsupported token.
var ErrParseUnsupportedToken = errors.New("The SQL expression contains an unsupported token")
// ErrParseUnsupportedLiteralsGroupBy is an error that occurs if the SQL
// expression has an unsupported use of Group By.
var ErrParseUnsupportedLiteralsGroupBy = errors.New("The SQL expression contains an unsupported use of GROUP BY")
// ErrParseExpectedMember is an error that occurs if there is an unsupported use
// of member in the SQL expression.
var ErrParseExpectedMember = errors.New("The SQL expression contains an unsupported use of MEMBER")
// ErrParseUnsupportedSelect is an error that occurs if there is an unsupported
// use of Select.
var ErrParseUnsupportedSelect = errors.New("The SQL expression contains an unsupported use of SELECT")
// ErrParseUnsupportedCase is an error that occurs if there is an unsupported
// use of case.
var ErrParseUnsupportedCase = errors.New("The SQL expression contains an unsupported use of CASE")
// ErrParseUnsupportedCaseClause is an error that occurs if there is an
// unsupported use of case.
var ErrParseUnsupportedCaseClause = errors.New("The SQL expression contains an unsupported use of CASE")
// ErrParseUnsupportedAlias is an error that occurs if there is an unsupported
// use of Alias.
var ErrParseUnsupportedAlias = errors.New("The SQL expression contains an unsupported use of ALIAS")
// ErrParseUnsupportedSyntax is an error that occurs if there is an
// UnsupportedSyntax in the SQL expression.
var ErrParseUnsupportedSyntax = errors.New("The SQL expression contains unsupported syntax")
// ErrParseUnknownOperator is an error that occurs if there is an invalid
// operator present in the SQL expression.
var ErrParseUnknownOperator = errors.New("The SQL expression contains an invalid operator")
// ErrParseInvalidPathComponent is an error that occurs if there is an invalid
// path component.
var ErrParseInvalidPathComponent = errors.New("The SQL expression contains an invalid path component")
// ErrParseMissingIdentAfterAt is an error that occurs if the wrong symbol
// follows the "@" symbol in the SQL expression.
var ErrParseMissingIdentAfterAt = errors.New("Did not find the expected identifier after the @ symbol in the SQL expression")
// ErrParseUnexpectedOperator is an error that occurs if the SQL expression
// contains an unexpected operator.
var ErrParseUnexpectedOperator = errors.New("The SQL expression contains an unexpected operator")
// ErrParseUnexpectedTerm is an error that occurs if the SQL expression contains
// an unexpected term.
var ErrParseUnexpectedTerm = errors.New("The SQL expression contains an unexpected term")
// ErrParseUnexpectedToken is an error that occurs if the SQL expression
// contains an unexpected token.
var ErrParseUnexpectedToken = errors.New("The SQL expression contains an unexpected token")
// ErrParseUnexpectedKeyword is an error that occurs if the SQL expression
// contains an unexpected keyword.
var ErrParseUnexpectedKeyword = errors.New("The SQL expression contains an unexpected keyword")
// ErrParseExpectedExpression is an error that occurs if the SQL expression is
// not found.
var ErrParseExpectedExpression = errors.New("Did not find the expected SQL expression")
// ErrParseExpectedLeftParenAfterCast is an error that occurs if the left
// parenthesis is missing after a cast in the SQL expression.
var ErrParseExpectedLeftParenAfterCast = errors.New("Did not find the expected left parenthesis after CAST in the SQL expression")
// ErrParseExpectedLeftParenValueConstructor is an error that occurs if the left
// parenthesis is not found in the SQL expression.
var ErrParseExpectedLeftParenValueConstructor = errors.New("Did not find expected the left parenthesis in the SQL expression")
// ErrParseExpectedLeftParenBuiltinFunctionCall is an error that occurs if the
// left parenthesis is not found in the SQL expression function call.
var ErrParseExpectedLeftParenBuiltinFunctionCall = errors.New("Did not find the expected left parenthesis in the SQL expression")
// ErrParseExpectedArgumentDelimiter is an error that occurs if the argument
// delimiter for the SQL expression is not provided.
var ErrParseExpectedArgumentDelimiter = errors.New("Did not find the expected argument delimiter in the SQL expression")
// ErrParseCastArity is an error that occurs because the CAST has incorrect
// arity.
var ErrParseCastArity = errors.New("The SQL expression CAST has incorrect arity")
// ErrParseInvalidTypeParam is an error that occurs because there is an invalid
// parameter value.
var ErrParseInvalidTypeParam = errors.New("The SQL expression contains an invalid parameter value")
// ErrParseEmptySelect is an error that occurs because the SQL expression
// contains an empty Select
var ErrParseEmptySelect = errors.New("The SQL expression contains an empty SELECT")
// ErrParseSelectMissingFrom is an error that occurs because there is a missing
// From after the Select List.
var ErrParseSelectMissingFrom = errors.New("The SQL expression contains a missing FROM after SELECT list")
// ErrParseExpectedIdentForGroupName is an error that occurs because Group is
// not supported in the SQL expression.
var ErrParseExpectedIdentForGroupName = errors.New("GROUP is not supported in the SQL expression")
// ErrParseExpectedIdentForAlias is an error that occurs if expected identifier
// for alias is not in the SQL expression.
var ErrParseExpectedIdentForAlias = errors.New("Did not find the expected identifier for the alias in the SQL expression")
// ErrParseUnsupportedCallWithStar is an error that occurs if COUNT is used with
// an argument other than "*".
var ErrParseUnsupportedCallWithStar = errors.New("Only COUNT with (*) as a parameter is supported in the SQL expression")
// ErrParseNonUnaryAgregateFunctionCall is an error that occurs if more than one
// argument is provided as an argument for aggregation functions.
var ErrParseNonUnaryAgregateFunctionCall = errors.New("Only one argument is supported for aggregate functions in the SQL expression")
// ErrParseMalformedJoin is an error that occurs if a "join" operation is
// attempted in the SQL expression as this is not supported.
var ErrParseMalformedJoin = errors.New("JOIN is not supported in the SQL expression")
// ErrParseExpectedIdentForAt is an error that occurs if after "AT" an Alias
// identifier is not provided.
var ErrParseExpectedIdentForAt = errors.New("Did not find the expected identifier for AT name in the SQL expression")
// ErrParseAsteriskIsNotAloneInSelectList is an error that occurs if in addition
// to an asterix, more column names are provided as arguments in the SQL
// expression.
var ErrParseAsteriskIsNotAloneInSelectList = errors.New("Other expressions are not allowed in the SELECT list when '*' is used without dot notation in the SQL expression")
// ErrParseCannotMixSqbAndWildcardInSelectList is an error that occurs if list
// indexing and an asterix are mixed in the SQL expression.
var ErrParseCannotMixSqbAndWildcardInSelectList = errors.New("Cannot mix [] and * in the same expression in a SELECT list in SQL expression")
// ErrParseInvalidContextForWildcardInSelectList is an error that occurs if the
// asterix is used improperly within the SQL expression.
var ErrParseInvalidContextForWildcardInSelectList = errors.New("Invalid use of * in SELECT list in the SQL expression")
// ErrEvaluatorBindingDoesNotExist is an error that occurs if a column name or
// path provided in the expression does not exist.
var ErrEvaluatorBindingDoesNotExist = errors.New("A column name or a path provided does not exist in the SQL expression")
// ErrIncorrectSQLFunctionArgumentType is an error that occurs if the wrong
// argument is provided to a SQL function.
var ErrIncorrectSQLFunctionArgumentType = errors.New("Incorrect type of arguments in function call in the SQL expression")
// ErrAmbiguousFieldName is an error that occurs if the column name which is not
// case sensitive, is not descriptive enough to retrieve a singular column.
var ErrAmbiguousFieldName = errors.New("Field name matches to multiple fields in the file. Check the SQL expression and the file, and try again")
// ErrEvaluatorInvalidArguments is an error that occurs if there are not the
// correct number of arguments in a functional call to a SQL expression.
var ErrEvaluatorInvalidArguments = errors.New("Incorrect number of arguments in the function call in the SQL expression")
// ErrValueParseFailure is an error that occurs if the Time Stamp is not parsed
// correctly in the SQL expression.
var ErrValueParseFailure = errors.New("Time stamp parse failure in the SQL expression")
// ErrIntegerOverflow is an error that occurs if there is an IntegerOverflow or
// IntegerUnderFlow in the SQL expression.
var ErrIntegerOverflow = errors.New("Int overflow or underflow in the SQL expression")
// ErrLikeInvalidInputs is an error that occurs if invalid inputs are provided
// to the argument LIKE Clause.
var ErrLikeInvalidInputs = errors.New("Invalid argument given to the LIKE clause in the SQL expression")
// ErrCastFailed occurs if the attempt to convert data types in the cast is not
// done correctly.
var ErrCastFailed = errors.New("Attempt to convert from one data type to another using CAST failed in the SQL expression")
// ErrInvalidCast is an error that occurs if the attempt to convert data types
// failed and was done in an improper fashion.
var ErrInvalidCast = errors.New("Attempt to convert from one data type to another using CAST failed in the SQL expression")
// ErrEvaluatorInvalidTimestampFormatPattern is an error that occurs if the Time
// Stamp Format needs more additional fields to be filled.
var ErrEvaluatorInvalidTimestampFormatPattern = errors.New("Time stamp format pattern requires additional fields in the SQL expression")
// ErrEvaluatorInvalidTimestampFormatPatternSymbolForParsing is an error that
// occurs if the format of the time stamp can not be parsed.
var ErrEvaluatorInvalidTimestampFormatPatternSymbolForParsing = errors.New("Time stamp format pattern contains a valid format symbol that cannot be applied to time stamp parsing in the SQL expression")
// ErrEvaluatorTimestampFormatPatternDuplicateFields is an error that occurs if
// the time stamp format pattern contains multiple format specifications which
// can not be clearly resolved.
var ErrEvaluatorTimestampFormatPatternDuplicateFields = errors.New("Time stamp format pattern contains multiple format specifiers representing the time stamp field in the SQL expression")
//ErrEvaluatorTimestampFormatPatternHourClockAmPmMismatch is an error that
//occurs if the time stamp format pattern contains a 12 hour day of format but
//does not have an AM/PM field.
var ErrEvaluatorTimestampFormatPatternHourClockAmPmMismatch = errors.New("Time stamp format pattern contains a 12-hour hour of day format symbol but doesn't also contain an AM/PM field, or it contains a 24-hour hour of day format specifier and contains an AM/PM field in the SQL expression")
// ErrEvaluatorUnterminatedTimestampFormatPatternToken is an error that occurs
// if there is an unterminated token in the SQL expression for time stamp
// format.
var ErrEvaluatorUnterminatedTimestampFormatPatternToken = errors.New("Time stamp format pattern contains unterminated token in the SQL expression")
// ErrEvaluatorInvalidTimestampFormatPatternToken is an error that occurs if
// there is an invalid token in the time stamp format within the SQL expression.
var ErrEvaluatorInvalidTimestampFormatPatternToken = errors.New("Time stamp format pattern contains an invalid token in the SQL expression")
// ErrEvaluatorInvalidTimestampFormatPatternSymbol is an error that occurs if
// the time stamp format pattern has an invalid symbol within the SQL
// expression.
var ErrEvaluatorInvalidTimestampFormatPatternSymbol = errors.New("Time stamp format pattern contains an invalid symbol in the SQL expression")
// S3 select API errors - TODO fix the errors.
var errorCodeResponse = map[error]string{
ErrBusy: "Busy",
ErrUnauthorizedAccess: "UnauthorizedAccess",
ErrExpressionTooLong: "ExpressionTooLong",
ErrIllegalSQLFunctionArgument: "IllegalSqlFunctionArgument",
ErrInvalidColumnIndex: "InvalidColumnIndex",
ErrInvalidKeyPath: "InvalidKeyPath",
ErrColumnTooLong: "ColumnTooLong",
ErrOverMaxColumn: "OverMaxColumn",
ErrOverMaxRecordSize: "OverMaxRecordSize",
ErrMissingHeaders: "MissingHeaders",
ErrInvalidCompressionFormat: "InvalidCompressionFormat",
ErrTruncatedInput: "TruncatedInput",
ErrInvalidFileHeaderInfo: "InvalidFileHeaderInfo",
ErrInvalidJSONType: "InvalidJsonType",
ErrInvalidQuoteFields: "InvalidQuoteFields",
ErrInvalidRequestParameter: "InvalidRequestParameter",
ErrCSVParsingError: "CSVParsingError",
ErrJSONParsingError: "JSONParsingError",
ErrExternalEvalException: "ExternalEvalException",
ErrInvalidDataType: "InvalidDataType",
ErrUnrecognizedFormatException: "UnrecognizedFormatException",
ErrInvalidTextEncoding: "InvalidTextEncoding",
ErrInvalidTableAlias: "InvalidTableAlias",
ErrMultipleDataSourcesUnsupported: "MultipleDataSourcesUnsupported",
ErrMissingRequiredParameter: "MissingRequiredParameter",
ErrObjectSerializationConflict: "ObjectSerializationConflict",
ErrUnsupportedSQLOperation: "UnsupportedSqlOperation",
ErrUnsupportedSQLStructure: "UnsupportedSqlStructure",
ErrUnsupportedStorageClass: "UnsupportedStorageClass",
ErrUnsupportedSyntax: "UnsupportedSyntax",
ErrUnsupportedRangeHeader: "UnsupportedRangeHeader",
ErrLexerInvalidChar: "LexerInvalidChar",
ErrLexerInvalidOperator: "LexerInvalidOperator",
ErrLexerInvalidLiteral: "LexerInvalidLiteral",
ErrLexerInvalidIONLiteral: "LexerInvalidIONLiteral",
ErrParseExpectedDatePart: "ParseExpectedDatePart",
ErrParseExpectedKeyword: "ParseExpectedKeyword",
ErrParseExpectedTokenType: "ParseExpectedTokenType",
ErrParseExpected2TokenTypes: "ParseExpected2TokenTypes",
ErrParseExpectedNumber: "ParseExpectedNumber",
ErrParseExpectedRightParenBuiltinFunctionCall: "ParseExpectedRightParenBuiltinFunctionCall",
ErrParseExpectedTypeName: "ParseExpectedTypeName",
ErrParseExpectedWhenClause: "ParseExpectedWhenClause",
ErrParseUnsupportedToken: "ParseUnsupportedToken",
ErrParseUnsupportedLiteralsGroupBy: "ParseUnsupportedLiteralsGroupBy",
ErrParseExpectedMember: "ParseExpectedMember",
ErrParseUnsupportedSelect: "ParseUnsupportedSelect",
ErrParseUnsupportedCase: "ParseUnsupportedCase:",
ErrParseUnsupportedCaseClause: "ParseUnsupportedCaseClause",
ErrParseUnsupportedAlias: "ParseUnsupportedAlias",
ErrParseUnsupportedSyntax: "ParseUnsupportedSyntax",
ErrParseUnknownOperator: "ParseUnknownOperator",
ErrParseInvalidPathComponent: "ParseInvalidPathComponent",
ErrParseMissingIdentAfterAt: "ParseMissingIdentAfterAt",
ErrParseUnexpectedOperator: "ParseUnexpectedOperator",
ErrParseUnexpectedTerm: "ParseUnexpectedTerm",
ErrParseUnexpectedToken: "ParseUnexpectedToken",
ErrParseUnexpectedKeyword: "ParseUnexpectedKeyword",
ErrParseExpectedExpression: "ParseExpectedExpression",
ErrParseExpectedLeftParenAfterCast: "ParseExpectedLeftParenAfterCast",
ErrParseExpectedLeftParenValueConstructor: "ParseExpectedLeftParenValueConstructor",
ErrParseExpectedLeftParenBuiltinFunctionCall: "ParseExpectedLeftParenBuiltinFunctionCall",
ErrParseExpectedArgumentDelimiter: "ParseExpectedArgumentDelimiter",
ErrParseCastArity: "ParseCastArity",
ErrParseInvalidTypeParam: "ParseInvalidTypeParam",
ErrParseEmptySelect: "ParseEmptySelect",
ErrParseSelectMissingFrom: "ParseSelectMissingFrom",
ErrParseExpectedIdentForGroupName: "ParseExpectedIdentForGroupName",
ErrParseExpectedIdentForAlias: "ParseExpectedIdentForAlias",
ErrParseUnsupportedCallWithStar: "ParseUnsupportedCallWithStar",
ErrParseNonUnaryAgregateFunctionCall: "ParseNonUnaryAgregateFunctionCall",
ErrParseMalformedJoin: "ParseMalformedJoin",
ErrParseExpectedIdentForAt: "ParseExpectedIdentForAt",
ErrParseAsteriskIsNotAloneInSelectList: "ParseAsteriskIsNotAloneInSelectList",
ErrParseCannotMixSqbAndWildcardInSelectList: "ParseCannotMixSqbAndWildcardInSelectList",
ErrParseInvalidContextForWildcardInSelectList: "ParseInvalidContextForWildcardInSelectList",
ErrEvaluatorBindingDoesNotExist: "EvaluatorBindingDoesNotExist",
ErrIncorrectSQLFunctionArgumentType: "IncorrectSqlFunctionArgumentType",
ErrAmbiguousFieldName: "AmbiguousFieldName",
ErrEvaluatorInvalidArguments: "EvaluatorInvalidArguments",
ErrValueParseFailure: "ValueParseFailure",
ErrIntegerOverflow: "IntegerOverflow",
ErrLikeInvalidInputs: "LikeInvalidInputs",
ErrCastFailed: "CastFailed",
ErrInvalidCast: "Attempt to convert from one data type to another using CAST failed in the SQL expression.",
ErrEvaluatorInvalidTimestampFormatPattern: "EvaluatorInvalidTimestampFormatPattern",
ErrEvaluatorInvalidTimestampFormatPatternSymbolForParsing: "EvaluatorInvalidTimestampFormatPatternSymbolForParsing",
ErrEvaluatorTimestampFormatPatternDuplicateFields: "EvaluatorTimestampFormatPatternDuplicateFields",
ErrEvaluatorTimestampFormatPatternHourClockAmPmMismatch: "EvaluatorTimestampFormatPatternHourClockAmPmMismatch",
ErrEvaluatorUnterminatedTimestampFormatPatternToken: "EvaluatorUnterminatedTimestampFormatPatternToken",
ErrEvaluatorInvalidTimestampFormatPatternToken: "EvaluatorInvalidTimestampFormatPatternToken",
ErrEvaluatorInvalidTimestampFormatPatternSymbol: "EvaluatorInvalidTimestampFormatPatternSymbol",
}

231
pkg/s3select/funcEval.go Normal file
View File

@@ -0,0 +1,231 @@
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package s3select
import (
"strings"
"github.com/xwb1989/sqlparser"
)
// stringOps is a function which handles the case in a clause if there is a need
// to perform a string function
func stringOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, columnsMap map[string]int) string {
var value string
funcName := myFunc.Name.CompliantName()
switch tempArg := myFunc.Exprs[0].(type) {
case *sqlparser.AliasedExpr:
switch col := tempArg.Expr.(type) {
case *sqlparser.FuncExpr:
// myReturnVal is actually the tail recursive value being used in the eval func.
return applyStrFunc(myReturnVal, funcName)
case *sqlparser.ColName:
value = applyStrFunc(record[columnsMap[col.Name.CompliantName()]], funcName)
case *sqlparser.SQLVal:
value = applyStrFunc(string(col.Val), funcName)
}
}
return value
}
// coalOps is a function which decomposes a COALESCE func expr into its struct.
func coalOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, columnsMap map[string]int) string {
myArgs := make([]string, len(myFunc.Exprs))
for i := 0; i < len(myFunc.Exprs); i++ {
switch tempArg := myFunc.Exprs[i].(type) {
case *sqlparser.AliasedExpr:
switch col := tempArg.Expr.(type) {
case *sqlparser.FuncExpr:
// myReturnVal is actually the tail recursive value being used in the eval func.
return myReturnVal
case *sqlparser.ColName:
myArgs[i] = record[columnsMap[col.Name.CompliantName()]]
case *sqlparser.SQLVal:
myArgs[i] = string(col.Val)
}
}
}
return processCoalNoIndex(myArgs)
}
// nullOps is a function which decomposes a NullIf func expr into its struct.
func nullOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, columnsMap map[string]int) string {
myArgs := make([]string, 2)
for i := 0; i < len(myFunc.Exprs); i++ {
switch tempArg := myFunc.Exprs[i].(type) {
case *sqlparser.AliasedExpr:
switch col := tempArg.Expr.(type) {
case *sqlparser.FuncExpr:
return myReturnVal
case *sqlparser.ColName:
myArgs[i] = record[columnsMap[col.Name.CompliantName()]]
case *sqlparser.SQLVal:
myArgs[i] = string(col.Val)
}
}
}
return processNullIf(myArgs)
}
// isValidString is a function that ensures the current index is one with a
// StrFunc
func isValidFunc(myList []int, index int) bool {
if myList == nil {
return false
}
for i := 0; i < len(myList); i++ {
if myList[i] == index {
return true
}
}
return false
}
// processNullIf is a function that evaluates a given NULLIF clause.
func processNullIf(nullStore []string) string {
nullValOne := nullStore[0]
nullValTwo := nullStore[1]
if nullValOne == nullValTwo {
return ""
}
return nullValOne
}
// processCoalNoIndex is a function which evaluates a given COALESCE clause.
func processCoalNoIndex(coalStore []string) string {
for i := 0; i < len(coalStore); i++ {
if coalStore[i] != "null" && coalStore[i] != "missing" && coalStore[i] != "" {
return coalStore[i]
}
}
return "null"
}
// evaluateFuncExpr is a function that allows for tail recursive evaluation of
// nested function expressions.
func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, myRecord []string, columnsMap map[string]int) string {
if myVal == nil {
return myReturnVal
}
// retrieve all the relevant arguments of the function
var mySubFunc []*sqlparser.FuncExpr
mySubFunc = make([]*sqlparser.FuncExpr, len(myVal.Exprs))
for i := 0; i < len(myVal.Exprs); i++ {
switch col := myVal.Exprs[i].(type) {
case *sqlparser.AliasedExpr:
switch temp := col.Expr.(type) {
case *sqlparser.FuncExpr:
mySubFunc[i] = temp
}
}
}
// Need to do tree recursion so as to explore all possible directions of the
// nested function recursion
for i := 0; i < len(mySubFunc); i++ {
if supportedString(myVal.Name.CompliantName()) {
if mySubFunc != nil {
return stringOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord, columnsMap), columnsMap)
}
return stringOps(myVal, myRecord, myReturnVal, columnsMap)
} else if strings.ToUpper(myVal.Name.CompliantName()) == "NULLIF" {
if mySubFunc != nil {
return nullOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord, columnsMap), columnsMap)
}
return nullOps(myVal, myRecord, myReturnVal, columnsMap)
} else if strings.ToUpper(myVal.Name.CompliantName()) == "COALESCE" {
if mySubFunc != nil {
return coalOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord, columnsMap), columnsMap)
}
return coalOps(myVal, myRecord, myReturnVal, columnsMap)
}
}
return ""
}
// evaluateFuncErr is a function that flags errors in nested functions.
func (reader *Input) evaluateFuncErr(myVal *sqlparser.FuncExpr) error {
if myVal == nil {
return nil
}
if !supportedFunc(myVal.Name.CompliantName()) {
return ErrUnsupportedSQLOperation
}
for i := 0; i < len(myVal.Exprs); i++ {
switch tempArg := myVal.Exprs[i].(type) {
case *sqlparser.StarExpr:
return ErrParseUnsupportedCallWithStar
case *sqlparser.AliasedExpr:
switch col := tempArg.Expr.(type) {
case *sqlparser.FuncExpr:
if err := reader.evaluateFuncErr(col); err != nil {
return err
}
case *sqlparser.ColName:
if err := reader.colNameErrs([]string{col.Name.CompliantName()}); err != nil {
return err
}
}
}
}
return nil
}
// evaluateIsExpr is a function for evaluating expressions of the form "column
// is ...."
func evaluateIsExpr(myFunc *sqlparser.IsExpr, row []string, columnNames map[string]int, alias string) (bool, error) {
operator := myFunc.Operator
var colName string
var myVal string
switch myIs := myFunc.Expr.(type) {
// case for literal val
case *sqlparser.SQLVal:
myVal = string(myIs.Val)
// case for nested func val
case *sqlparser.FuncExpr:
myVal = evaluateFuncExpr(myIs, "", row, columnNames)
// case for col val
case *sqlparser.ColName:
colName = cleanCol(myIs.Name.CompliantName(), alias)
}
// case if it is a col val
if colName != "" {
myVal = row[columnNames[colName]]
}
// case to evaluate is null
if strings.ToLower(operator) == "is null" {
return myVal == "", nil
}
// case to evaluate is not null
if strings.ToLower(operator) == "is not null" {
return myVal != "", nil
}
return false, ErrUnsupportedSQLOperation
}
// supportedString is a function that checks whether the function is a supported
// string one
func supportedString(strFunc string) bool {
return stringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER"})
}
// supportedFunc is a function that checks whether the function is a supported
// S3 one.
func supportedFunc(strFunc string) bool {
return stringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER", "COALESCE", "NULLIF"})
}

754
pkg/s3select/helpers.go Normal file
View File

@@ -0,0 +1,754 @@
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package s3select
import (
"fmt"
"reflect"
"strconv"
"strings"
"github.com/xwb1989/sqlparser"
)
// This function processes size so that we can calculate bytes BytesProcessed.
func processSize(myrecord []string) int64 {
if len(myrecord) > 0 {
var size int64
size = int64(len(myrecord)-1) + 1
for i := range myrecord {
size += int64(len(myrecord[i]))
}
return size
}
return 0
}
// This function finds whether a string is in a list
func stringInSlice(x string, list []string) bool {
for _, y := range list {
if x == y {
return true
}
}
return false
}
// This function returns the index of a string in a list
func stringIndex(a string, list []string) int {
for i := range list {
if list[i] == a {
return i
}
}
return -1
}
// Returns a true or false, whether a string can be represented as an int.
func representsInt(s string) bool {
if _, err := strconv.Atoi(s); err == nil {
return true
}
return false
}
// The function below processes the where clause into an acutal boolean given a
// row
func matchesMyWhereClause(row []string, columnNames map[string]int, alias string, whereClause interface{}) (bool, error) {
// This particular logic deals with the details of casting, e.g if we have to
// cast a column of string numbers into int's for comparison.
var conversionColumn string
var operator string
var operand interface{}
if fmt.Sprintf("%v", whereClause) == "false" {
return false, nil
}
switch expr := whereClause.(type) {
case *sqlparser.IsExpr:
return evaluateIsExpr(expr, row, columnNames, alias)
case *sqlparser.RangeCond:
operator = expr.Operator
if operator != "between" && operator != "not between" {
return false, ErrUnsupportedSQLOperation
}
if operator == "not between" {
myResult, err := evaluateBetween(expr, alias, row, columnNames)
if err != nil {
return false, err
}
return !myResult, nil
}
myResult, err := evaluateBetween(expr, alias, row, columnNames)
if err != nil {
return false, err
}
return myResult, nil
case *sqlparser.ComparisonExpr:
operator = expr.Operator
switch right := expr.Right.(type) {
case *sqlparser.FuncExpr:
operand = evaluateFuncExpr(right, "", row, columnNames)
case *sqlparser.SQLVal:
var err error
operand, err = evaluateParserType(right)
if err != nil {
return false, err
}
}
var myVal string
myVal = ""
switch left := expr.Left.(type) {
case *sqlparser.FuncExpr:
myVal = evaluateFuncExpr(left, "", row, columnNames)
conversionColumn = ""
case *sqlparser.ColName:
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
}
if representsInt(conversionColumn) {
intCol, err := strconv.Atoi(conversionColumn)
if err != nil {
return false, err
}
// Subtract 1 out because the index starts at 1 for Amazon instead of 0.
return evaluateOperator(row[intCol-1], operator, operand)
}
if myVal != "" {
return evaluateOperator(myVal, operator, operand)
}
return evaluateOperator(row[columnNames[conversionColumn]], operator, operand)
case *sqlparser.AndExpr:
var leftVal bool
var rightVal bool
switch left := expr.Left.(type) {
case *sqlparser.ComparisonExpr:
temp, err := matchesMyWhereClause(row, columnNames, alias, left)
if err != nil {
return false, err
}
leftVal = temp
}
switch right := expr.Right.(type) {
case *sqlparser.ComparisonExpr:
temp, err := matchesMyWhereClause(row, columnNames, alias, right)
if err != nil {
return false, err
}
rightVal = temp
}
return (rightVal && leftVal), nil
case *sqlparser.OrExpr:
var leftVal bool
var rightVal bool
switch left := expr.Left.(type) {
case *sqlparser.ComparisonExpr:
leftVal, _ = matchesMyWhereClause(row, columnNames, alias, left)
}
switch right := expr.Right.(type) {
case *sqlparser.ComparisonExpr:
rightVal, _ = matchesMyWhereClause(row, columnNames, alias, right)
}
return (rightVal || leftVal), nil
}
return true, nil
}
func applyStrFunc(rawArg string, funcName string) string {
switch strings.ToUpper(funcName) {
case "TRIM":
// parser has an issue which does not allow it to support Trim with other
// arguments
return strings.Trim(rawArg, " ")
case "SUBSTRING":
// TODO parser has an issue which does not support substring
return rawArg
case "CHAR_LENGTH":
return strconv.Itoa(len(rawArg))
case "CHARACTER_LENGTH":
return strconv.Itoa(len(rawArg))
case "LOWER":
return strings.ToLower(rawArg)
case "UPPER":
return strings.ToUpper(rawArg)
}
return rawArg
}
// This is a really important function it actually evaluates the boolean
// statement and therefore actually returns a bool, it functions as the lowest
// level of the state machine.
func evaluateOperator(myTblVal string, operator string, operand interface{}) (bool, error) {
if err := checkValidOperator(operator); err != nil {
return false, err
}
myRecordVal := checkStringType(myTblVal)
myVal := reflect.ValueOf(myRecordVal)
myOp := reflect.ValueOf(operand)
switch {
case myVal.Kind() == reflect.String && myOp.Kind() == reflect.String:
return stringEval(myVal.String(), operator, myOp.String())
case myVal.Kind() == reflect.Float64 && myOp.Kind() == reflect.Float64:
return floatEval(myVal.Float(), operator, myOp.Float())
case myVal.Kind() == reflect.Int && myOp.Kind() == reflect.Int:
return intEval(myVal.Int(), operator, myOp.Int())
case myVal.Kind() == reflect.Int && myOp.Kind() == reflect.String:
stringVs := strconv.Itoa(int(myVal.Int()))
return stringEval(stringVs, operator, myOp.String())
case myVal.Kind() == reflect.Float64 && myOp.Kind() == reflect.String:
stringVs := strconv.FormatFloat(myVal.Float(), 'f', 6, 64)
return stringEval(stringVs, operator, myOp.String())
case myVal.Kind() != myOp.Kind():
return false, nil
}
return false, ErrUnsupportedSyntax
}
// checkValidOperator ensures that the current operator is supported
func checkValidOperator(operator string) error {
listOfOps := []string{">", "<", "=", "<=", ">=", "!=", "like"}
for i := range listOfOps {
if operator == listOfOps[i] {
return nil
}
}
return ErrParseUnknownOperator
}
// checkStringType converts the value from the csv to the appropriate one.
func checkStringType(myTblVal string) interface{} {
myInt, isInt := strconv.Atoi(myTblVal)
myFloat, isFloat := strconv.ParseFloat(myTblVal, 64)
if isInt == nil {
return myInt
} else if isFloat == nil {
return myFloat
} else {
return myTblVal
}
}
// stringEval is for evaluating the state of string comparison.
func stringEval(myRecordVal string, operator string, myOperand string) (bool, error) {
switch operator {
case ">":
return myRecordVal > myOperand, nil
case "<":
return myRecordVal < myOperand, nil
case "=":
return myRecordVal == myOperand, nil
case "<=":
return myRecordVal <= myOperand, nil
case ">=":
return myRecordVal >= myOperand, nil
case "!=":
return myRecordVal != myOperand, nil
case "like":
return likeConvert(myOperand, myRecordVal)
}
return false, ErrUnsupportedSyntax
}
// intEval is for evaluating integer comparisons.
func intEval(myRecordVal int64, operator string, myOperand int64) (bool, error) {
switch operator {
case ">":
return myRecordVal > myOperand, nil
case "<":
return myRecordVal < myOperand, nil
case "=":
return myRecordVal == myOperand, nil
case "<=":
return myRecordVal <= myOperand, nil
case ">=":
return myRecordVal >= myOperand, nil
case "!=":
return myRecordVal != myOperand, nil
}
return false, ErrUnsupportedSyntax
}
// floatEval is for evaluating the comparison of floats.
func floatEval(myRecordVal float64, operator string, myOperand float64) (bool, error) {
// Basically need some logic thats like, if the types dont match check for a cast
switch operator {
case ">":
return myRecordVal > myOperand, nil
case "<":
return myRecordVal < myOperand, nil
case "=":
return myRecordVal == myOperand, nil
case "<=":
return myRecordVal <= myOperand, nil
case ">=":
return myRecordVal >= myOperand, nil
case "!=":
return myRecordVal != myOperand, nil
}
return false, ErrUnsupportedSyntax
}
// prefixMatch allows for matching a prefix only like query e.g a%
func prefixMatch(pattern string, record string) bool {
for i := 0; i < len(pattern)-1; i++ {
if pattern[i] != record[i] && pattern[i] != byte('_') {
return false
}
}
return true
}
// suffixMatch allows for matching a suffix only like query e.g %an
func suffixMatch(pattern string, record string) bool {
for i := len(pattern) - 1; i > 0; i-- {
if pattern[i] != record[len(record)-(len(pattern)-i)] && pattern[i] != byte('_') {
return false
}
}
return true
}
// This function is for evaluating select statements which are case sensitive
func likeConvert(pattern string, record string) (bool, error) {
// If pattern is empty just return false
if pattern == "" || record == "" {
return false, nil
}
// for suffix match queries e.g %a
if len(pattern) >= 2 && pattern[0] == byte('%') && strings.Count(pattern, "%") == 1 {
return suffixMatch(pattern, record), nil
}
// for prefix match queries e.g a%
if len(pattern) >= 2 && pattern[len(pattern)-1] == byte('%') && strings.Count(pattern, "%") == 1 {
return prefixMatch(pattern, record), nil
}
charCount := 0
currPos := 0
// Loop through the pattern so that a boolean can be returned
for i := 0; i < len(pattern); i++ {
if pattern[i] == byte('_') {
// if its an underscore it can be anything so shift current position for
// pattern and string
charCount++
// if there have been more characters in the pattern than record, clearly
// there should be a return
if i != len(pattern)-1 {
if pattern[i+1] != byte('%') && pattern[i+1] != byte('_') {
if currPos != len(record)-1 && pattern[i+1] != record[currPos+1] {
return false, nil
}
}
}
if charCount > len(record) {
return false, nil
}
// if the pattern has been fully evaluated, then just return.
if len(pattern) == i+1 {
return true, nil
}
i++
currPos++
}
if pattern[i] == byte('%') || pattern[i] == byte('*') {
// if there is a wildcard then want to return true if its last and flag it.
if currPos == len(record) {
return false, nil
}
if i+1 == len(pattern) {
return true, nil
}
} else {
charCount++
matched := false
// iterate through the pattern and check if there is a match for the
// character
for currPos < len(record) {
if record[currPos] == pattern[i] || pattern[i] == byte('_') {
matched = true
break
}
currPos++
}
currPos++
// if the character did not match then return should occur.
if !matched {
return false, nil
}
}
}
if charCount > len(record) {
return false, nil
}
if currPos < len(record) {
return false, nil
}
return true, nil
}
// TrimQuotes allows the following to occur select "name", we need to trim the
// quotes to reference our map of columnNames.
func trimQuotes(s string) string {
if len(s) >= 2 {
if c := s[len(s)-1]; s[0] == c && (c == '"') {
return s[1 : len(s)-1]
}
}
return s
}
// cleanCol cleans a column name from the parser so that the name is returned to
// original.
func cleanCol(myCol string, alias string) string {
if len(myCol) <= 0 {
return myCol
}
if !strings.HasPrefix(myCol, alias) && myCol[0] == '_' {
myCol = alias + myCol
}
if strings.Contains(myCol, ".") {
myCol = strings.Replace(myCol, alias+"._", "", len(myCol))
}
myCol = strings.Replace(myCol, alias+"_", "", len(myCol))
return myCol
}
// evaluateBetween is a function which evaluates a Between Clause.
func evaluateBetween(betweenExpr *sqlparser.RangeCond, alias string, record []string, columnNames map[string]int) (bool, error) {
var colToVal interface{}
var colFromVal interface{}
var conversionColumn string
var funcName string
switch colTo := betweenExpr.To.(type) {
case sqlparser.Expr:
switch colToMyVal := colTo.(type) {
case *sqlparser.FuncExpr:
var temp string
temp = stringOps(colToMyVal, record, "", columnNames)
colToVal = []byte(temp)
case *sqlparser.SQLVal:
var err error
colToVal, err = evaluateParserType(colToMyVal)
if err != nil {
return false, err
}
}
}
switch colFrom := betweenExpr.From.(type) {
case sqlparser.Expr:
switch colFromMyVal := colFrom.(type) {
case *sqlparser.FuncExpr:
colFromVal = stringOps(colFromMyVal, record, "", columnNames)
case *sqlparser.SQLVal:
var err error
colFromVal, err = evaluateParserType(colFromMyVal)
if err != nil {
return false, err
}
}
}
var myFuncVal string
myFuncVal = ""
switch left := betweenExpr.Left.(type) {
case *sqlparser.FuncExpr:
myFuncVal = evaluateFuncExpr(left, "", record, columnNames)
conversionColumn = ""
case *sqlparser.ColName:
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
}
toGreater, err := evaluateOperator(fmt.Sprintf("%v", colToVal), ">", colFromVal)
if err != nil {
return false, err
}
if toGreater {
return evalBetweenGreater(conversionColumn, record, funcName, columnNames, colFromVal, colToVal, myFuncVal)
}
return evalBetweenLess(conversionColumn, record, funcName, columnNames, colFromVal, colToVal, myFuncVal)
}
// evalBetweenLess is a function which evaluates the between given that the
// FROM is > than the TO.
func evalBetweenLess(conversionColumn string, record []string, funcName string, columnNames map[string]int, colFromVal interface{}, colToVal interface{}, myCoalVal string) (bool, error) {
if representsInt(conversionColumn) {
myIndex, _ := strconv.Atoi(conversionColumn)
// Subtract 1 out because the index starts at 1 for Amazon instead of 0.
myVal, err := evaluateOperator(record[myIndex-1], "<=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(record[myIndex-1]))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
if myCoalVal != "" {
myVal, err := evaluateOperator(myCoalVal, "<=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(myCoalVal))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
myVal, err := evaluateOperator(record[columnNames[conversionColumn]], "<=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(record[columnNames[conversionColumn]]))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
// evalBetweenGreater is a function which evaluates the between given that the
// TO is > than the FROM.
func evalBetweenGreater(conversionColumn string, record []string, funcName string, columnNames map[string]int, colFromVal interface{}, colToVal interface{}, myCoalVal string) (bool, error) {
if representsInt(conversionColumn) {
myIndex, _ := strconv.Atoi(conversionColumn)
myVal, err := evaluateOperator(record[myIndex-1], ">=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(record[myIndex-1]))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
if myCoalVal != "" {
myVal, err := evaluateOperator(myCoalVal, ">=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(myCoalVal))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
myVal, err := evaluateOperator(record[columnNames[conversionColumn]], ">=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(record[columnNames[conversionColumn]]))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
// whereClauseNameErrs is a function which returns an error if there is a column
// in the where clause which does not exist.
func (reader *Input) whereClauseNameErrs(whereClause interface{}, alias string) error {
var conversionColumn string
switch expr := whereClause.(type) {
// case for checking errors within a clause of the form "col_name is ..."
case *sqlparser.IsExpr:
switch myCol := expr.Expr.(type) {
case *sqlparser.FuncExpr:
if err := reader.evaluateFuncErr(myCol); err != nil {
return err
}
case *sqlparser.ColName:
conversionColumn = cleanCol(myCol.Name.CompliantName(), alias)
}
case *sqlparser.RangeCond:
switch left := expr.Left.(type) {
case *sqlparser.FuncExpr:
if err := reader.evaluateFuncErr(left); err != nil {
return err
}
case *sqlparser.ColName:
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
}
case *sqlparser.ComparisonExpr:
switch left := expr.Left.(type) {
case *sqlparser.FuncExpr:
if err := reader.evaluateFuncErr(left); err != nil {
return err
}
case *sqlparser.ColName:
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
}
case *sqlparser.AndExpr:
switch left := expr.Left.(type) {
case *sqlparser.ComparisonExpr:
return reader.whereClauseNameErrs(left, alias)
}
switch right := expr.Right.(type) {
case *sqlparser.ComparisonExpr:
return reader.whereClauseNameErrs(right, alias)
}
case *sqlparser.OrExpr:
switch left := expr.Left.(type) {
case *sqlparser.ComparisonExpr:
return reader.whereClauseNameErrs(left, alias)
}
switch right := expr.Right.(type) {
case *sqlparser.ComparisonExpr:
return reader.whereClauseNameErrs(right, alias)
}
}
if conversionColumn != "" {
return reader.colNameErrs([]string{conversionColumn})
}
return nil
}
// qualityCheck ensures the row has enough separators.
func qualityCheck(row string, amountOfSep int, sep string) string {
for i := 0; i < amountOfSep; i++ {
row = row + sep
}
return row
}
// writeRow helps to write the row regardless of how many entries.
func writeRow(myRow string, myEntry string, delimiter string, numOfReqCols int) string {
if myEntry == "" && len(myRow) == 0 && numOfReqCols == 1 {
return myEntry
}
if myEntry == "" && len(myRow) == 0 {
return myEntry + delimiter
}
if len(myRow) == 1 && myRow[0] == ',' {
return myRow + myEntry
}
if len(myRow) == 0 {
return myEntry
}
return myRow + delimiter + myEntry
}
// colNameErrs is a function which makes sure that the headers are requested are
// present in the file otherwise it throws an error.
func (reader *Input) colNameErrs(columnNames []string) error {
for i := 0; i < len(columnNames); i++ {
if columnNames[i] == "" {
continue
}
if !representsInt(columnNames[i]) && !reader.options.HeaderOpt {
return ErrInvalidColumnIndex
}
if representsInt(columnNames[i]) {
tempInt, _ := strconv.Atoi(columnNames[i])
if tempInt > len(reader.Header()) || tempInt == 0 {
return ErrInvalidColumnIndex
}
} else {
if reader.options.HeaderOpt && !stringInSlice(columnNames[i], reader.Header()) {
return ErrMissingHeaders
}
}
}
return nil
}
// aggFuncToStr converts an array of floats into a properly formatted string.
func (reader *Input) aggFuncToStr(myAggVals []float64) string {
myRow := strconv.FormatFloat(myAggVals[0], 'f', 6, 64)
for i := 1; i < len(myAggVals); i++ {
aggregateval := strconv.FormatFloat(myAggVals[i], 'f', 6, 64)
myRow = myRow + reader.options.OutputFieldDelimiter + aggregateval
}
return myRow
}
// checkForDuplicates ensures we do not have an ambigious column name.
func checkForDuplicates(columns []string, columnsMap map[string]int, hasDuplicates map[string]bool, lowercaseColumnsMap map[string]int) error {
for i := 0; i < len(columns); i++ {
columns[i] = strings.Replace(columns[i], " ", "_", len(columns[i]))
if _, exist := columnsMap[columns[i]]; exist {
return ErrAmbiguousFieldName
}
columnsMap[columns[i]] = i
// This checks that if a key has already been put into the map, that we're
// setting its appropriate value in has duplicates to be true.
if _, exist := lowercaseColumnsMap[strings.ToLower(columns[i])]; exist {
hasDuplicates[strings.ToLower(columns[i])] = true
} else {
lowercaseColumnsMap[strings.ToLower(columns[i])] = i
}
}
return nil
}
// evaluateParserType is a function that takes a SQL value and returns it as an
// interface converted into the appropriate value.
func evaluateParserType(col *sqlparser.SQLVal) (interface{}, error) {
colDataType := col.Type
var val interface{}
switch colDataType {
case 0:
val = string(col.Val)
case 1:
intVersion, isInt := strconv.Atoi(string(col.Val))
if isInt != nil {
return nil, ErrIntegerOverflow
}
val = intVersion
case 2:
floatVersion, isFloat := strconv.ParseFloat(string(col.Val), 64)
if isFloat != nil {
return nil, ErrIntegerOverflow
}
val = floatVersion
}
return val, nil
}
// parseErrs is the function which handles all the errors that could occur
// through use of function arguments such as column names in NULLIF
func (reader *Input) parseErrs(columnNames []string, whereClause interface{}, alias string, myFuncs *SelectFuncs) error {
// Below code cleans up column names.
reader.processColumnNames(columnNames, alias)
if columnNames[0] != "*" {
if err := reader.colNameErrs(columnNames); err != nil {
return err
}
}
// Below code ensures the whereClause has no errors.
if whereClause != nil {
tempClause := whereClause
if err := reader.whereClauseNameErrs(tempClause, alias); err != nil {
return err
}
}
for i := 0; i < len(myFuncs.funcExpr); i++ {
if myFuncs.funcExpr[i] == nil {
continue
}
if err := reader.evaluateFuncErr(myFuncs.funcExpr[i]); err != nil {
return err
}
}
return nil
}

381
pkg/s3select/input.go Normal file
View File

@@ -0,0 +1,381 @@
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package s3select
import (
"bytes"
"compress/bzip2"
"encoding/csv"
"encoding/xml"
"io"
"strconv"
"strings"
"time"
"net/http"
gzip "github.com/klauspost/pgzip"
)
const (
// progressTime is the time interval for which a progress message is sent.
progressTime time.Duration = 60 * time.Second
// continuationTime is the time interval for which a continuation message is
// sent.
continuationTime time.Duration = 5 * time.Second
)
// progress represents a struct that represents the format for XML of the
// progress messages
type progress struct {
BytesScanned int64 `xml:"BytesScanned"`
BytesProcessed int64 `xml:"BytesProcessed"`
BytesReturned int64 `xml:"BytesReturned"`
Xmlns string `xml:"xmlns,attr"`
}
// stats represents a struct that represents the format for XML of the stat
// messages
type stats struct {
BytesScanned int64 `xml:"BytesScanned"`
BytesProcessed int64 `xml:"BytesProcessed"`
BytesReturned int64 `xml:"BytesReturned"`
Xmlns string `xml:"xmlns,attr"`
}
// StatInfo is a struct that represents the
type statInfo struct {
BytesScanned int64
BytesReturned int64
BytesProcessed int64
}
// Input represents a record producing input from a formatted file or pipe.
type Input struct {
options *Options
reader *csv.Reader
firstRow []string
header []string
minOutputLength int
stats *statInfo
}
// Options options are passed to the underlying encoding/csv reader.
type Options struct {
// HasHeader when true, will treat the first row as a header row.
HasHeader bool
// FieldDelimiter is the string that fields are delimited by.
FieldDelimiter string
// Comments is the string the first character of a line of
// text matches the comment character.
Comments string
// Name of the table that is used for querying
Name string
// ReadFrom is where the data will be read from.
ReadFrom io.Reader
// If true then we need to add gzip or bzip reader.
// to extract the csv.
Compressed string
// SQL expression meant to be evaluated.
Expression string
// What the outputted CSV will be delimited by .
OutputFieldDelimiter string
// Size of incoming object
StreamSize int64
// Whether Header is "USE" or another
HeaderOpt bool
}
// NewInput sets up a new Input, the first row is read when this is run.
// If there is a problem with reading the first row, the error is returned.
// Otherwise, the returned reader can be reliably consumed with ReadRecord()
// until ReadRecord() returns nil.
func NewInput(opts *Options) (*Input, error) {
myReader := opts.ReadFrom
var tempBytesScanned int64
tempBytesScanned = 0
if opts.Compressed == "GZIP" {
tempBytesScanned = opts.StreamSize
var err error
if myReader, err = gzip.NewReader(opts.ReadFrom); err != nil {
return nil, ErrTruncatedInput
}
} else if opts.Compressed == "BZIP2" {
tempBytesScanned = opts.StreamSize
myReader = bzip2.NewReader(opts.ReadFrom)
}
progress := &statInfo{
BytesScanned: tempBytesScanned,
BytesProcessed: 0,
BytesReturned: 0,
}
reader := &Input{
options: opts,
reader: csv.NewReader(myReader),
stats: progress,
}
reader.firstRow = nil
reader.reader.FieldsPerRecord = -1
if reader.options.FieldDelimiter != "" {
reader.reader.Comma = rune(reader.options.FieldDelimiter[0])
}
if reader.options.Comments != "" {
reader.reader.Comment = rune(reader.options.Comments[0])
}
// QuoteCharacter - " (defaulted currently)
reader.reader.LazyQuotes = true
if err := reader.readHeader(); err != nil {
return nil, err
}
return reader, nil
}
// ReadRecord reads a single record from the . Always returns successfully.
// If the record is empty, an empty []string is returned.
// Record expand to match the current row size, adding blank fields as needed.
// Records never return less then the number of fields in the first row.
// Returns nil on EOF
// In the event of a parse error due to an invalid record, it is logged, and
// an empty []string is returned with the number of fields in the first row,
// as if the record were empty.
//
// In general, this is a very tolerant of problems reader.
func (reader *Input) ReadRecord() []string {
var row []string
var fileErr error
if reader.firstRow != nil {
row = reader.firstRow
reader.firstRow = nil
return row
}
row, fileErr = reader.reader.Read()
emptysToAppend := reader.minOutputLength - len(row)
if fileErr == io.EOF || fileErr == io.ErrClosedPipe {
return nil
} else if _, ok := fileErr.(*csv.ParseError); ok {
emptysToAppend = reader.minOutputLength
}
if emptysToAppend > 0 {
for counter := 0; counter < emptysToAppend; counter++ {
row = append(row, "")
}
}
return row
}
// convertMySQL Replaces double quote escape for column names with backtick for
// the MySQL parser
func convertMySQL(random string) string {
return strings.Replace(random, "\"", "`", len(random))
}
// readHeader reads the header into the header variable if the header is present
// as the first row of the csv
func (reader *Input) readHeader() error {
var readErr error
if reader.options.HasHeader {
reader.firstRow, readErr = reader.reader.Read()
if readErr != nil {
return ErrCSVParsingError
}
reader.header = reader.firstRow
reader.firstRow = nil
reader.minOutputLength = len(reader.header)
} else {
reader.firstRow, readErr = reader.reader.Read()
reader.header = make([]string, len(reader.firstRow))
for i := 0; i < reader.minOutputLength; i++ {
reader.header[i] = strconv.Itoa(i)
}
}
return nil
}
// createStatXML is the function which does the marshaling from the stat
// structs into XML so that the progress and stat message can be sent
func (reader *Input) createStatXML() (string, error) {
if reader.options.Compressed == "NONE" {
reader.stats.BytesProcessed = reader.options.StreamSize
reader.stats.BytesScanned = reader.stats.BytesProcessed
}
statXML := stats{
BytesScanned: reader.stats.BytesScanned,
BytesProcessed: reader.stats.BytesProcessed,
BytesReturned: reader.stats.BytesReturned,
Xmlns: "",
}
out, err := xml.Marshal(statXML)
if err != nil {
return "", err
}
return xml.Header + string(out), nil
}
// createProgressXML is the function which does the marshaling from the progress structs into XML so that the progress and stat message can be sent
func (reader *Input) createProgressXML() (string, error) {
if reader.options.HasHeader {
reader.stats.BytesProcessed += processSize(reader.header)
}
if !(reader.options.Compressed != "NONE") {
reader.stats.BytesScanned = reader.stats.BytesProcessed
}
progressXML := &progress{
BytesScanned: reader.stats.BytesScanned,
BytesProcessed: reader.stats.BytesProcessed,
BytesReturned: reader.stats.BytesReturned,
Xmlns: "",
}
out, err := xml.Marshal(progressXML)
if err != nil {
return "", err
}
return xml.Header + string(out), nil
}
// Header returns the header of the reader. Either the first row if a header
// set in the options, or c#, where # is the column number, starting with 0.
func (reader *Input) Header() []string {
return reader.header
}
// Row is a Struct for keeping track of key aspects of a row.
type Row struct {
record string
err error
}
// Execute is the function where all the blocking occurs, It writes to the HTTP
// response writer in a streaming fashion so that the client can actively use
// the results before the query is finally finished executing. The
func (reader *Input) Execute(writer io.Writer) error {
myRow := make(chan *Row)
curBuf := bytes.NewBuffer(make([]byte, 1000000))
curBuf.Reset()
progressTicker := time.NewTicker(progressTime)
continuationTimer := time.NewTimer(continuationTime)
defer progressTicker.Stop()
defer continuationTimer.Stop()
go reader.runSelectParser(convertMySQL(reader.options.Expression), myRow)
for {
select {
case row, ok := <-myRow:
if ok && row.err != nil {
errorMessage := reader.writeErrorMessage(row.err, curBuf)
_, err := errorMessage.WriteTo(writer)
flusher, okFlush := writer.(http.Flusher)
if okFlush {
flusher.Flush()
}
if err != nil {
return err
}
curBuf.Reset()
close(myRow)
return nil
} else if ok {
message := reader.writeRecordMessage(row.record, curBuf)
_, err := message.WriteTo(writer)
flusher, okFlush := writer.(http.Flusher)
if okFlush {
flusher.Flush()
}
if err != nil {
return err
}
curBuf.Reset()
reader.stats.BytesReturned += int64(len(row.record))
if !continuationTimer.Stop() {
<-continuationTimer.C
}
continuationTimer.Reset(continuationTime)
} else if !ok {
statPayload, err := reader.createStatXML()
if err != nil {
return err
}
statMessage := reader.writeStatMessage(statPayload, curBuf)
_, err = statMessage.WriteTo(writer)
flusher, ok := writer.(http.Flusher)
if ok {
flusher.Flush()
}
if err != nil {
return err
}
curBuf.Reset()
message := reader.writeEndMessage(curBuf)
_, err = message.WriteTo(writer)
flusher, ok = writer.(http.Flusher)
if ok {
flusher.Flush()
}
if err != nil {
return err
}
return nil
}
case <-progressTicker.C:
progressPayload, err := reader.createProgressXML()
if err != nil {
return err
}
progressMessage := reader.writeProgressMessage(progressPayload, curBuf)
_, err = progressMessage.WriteTo(writer)
flusher, ok := writer.(http.Flusher)
if ok {
flusher.Flush()
}
if err != nil {
return err
}
curBuf.Reset()
case <-continuationTimer.C:
message := reader.writeContinuationMessage(curBuf)
_, err := message.WriteTo(writer)
flusher, ok := writer.(http.Flusher)
if ok {
flusher.Flush()
}
if err != nil {
return err
}
curBuf.Reset()
continuationTimer.Reset(continuationTime)
}
}
}

460
pkg/s3select/output.go Normal file
View File

@@ -0,0 +1,460 @@
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// DO NOT EDIT THIS PACKAGE DIRECTLY: This follows the protocol defined by
// AmazonS3 found at
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
// Consult the Spec before making direct edits.
package s3select
import (
"bytes"
"encoding/binary"
"hash/crc32"
)
// Record Headers
// -11 -event type - 7 - 7 "Records"
// -13 -content-type -7 -24 "application/octet-stream"
// -13 -message-type -7 5 "event"
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
var recordHeaders []byte
// End Headers
// -13 -message-type -7 -5 "event"
// -11 -:event-type -7 -3 "End"
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
var endHeaders []byte
// Continuation Headers
// -13 -message-type -7 -5 "event"
// -11 -:event-type -7 -4 "Cont"
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
var contHeaders []byte
// Stat Headers
// -11 -event type - 7 - 5 "Stat" -20
// -13 -content-type -7 -8 "text/xml" -25
// -13 -message-type -7 -5 "event" -22
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
var statHeaders []byte
// Progress Headers
// -11 -event type - 7 - 8 "Progress" -23
// -13 -content-type -7 -8 "text/xml" -25
// -13 -message-type -7 -5 "event" -22
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
var progressHeaders []byte
// The length of the nonvariable portion of the ErrHeaders
// The below are the specifications of the header for a "error" event
// -11 -error-code - 7 - DEFINED "DEFINED"
// -14 -error-message -7 -DEFINED "DEFINED"
// -13 -message-type -7 -5 "error"
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
var errHdrLen int
func init() {
recordHeaders = writeRecordHeader()
endHeaders = writeEndHeader()
contHeaders = writeContHeader()
statHeaders = writeStatHeader()
progressHeaders = writeProgressHeader()
errHdrLen = 55
}
// encodeString encodes a string in a []byte, lenBytes is the number of bytes
// used to encode the length of the string.
func encodeHeaderStringValue(s string) []byte {
n := uint16(len(s))
lenSlice := make([]byte, 2)
binary.BigEndian.PutUint16(lenSlice[0:], n)
return append(lenSlice, []byte(s)...)
}
func encodeHeaderStringName(s string) []byte {
lenSlice := make([]byte, 1)
lenSlice[0] = byte(len(s))
return append(lenSlice, []byte(s)...)
}
// encodeNumber encodes a number in a []byte, lenBytes is the number of bytes
// used to encode the length of the string.
func encodeNumber(n byte, lenBytes int) []byte {
lenSlice := make([]byte, lenBytes)
lenSlice[0] = n
return lenSlice
}
// writePayloadSize writes the 4byte payload size portion of the protocol.
func writePayloadSize(payloadSize int, headerLength int) []byte {
totalByteLen := make([]byte, 4)
totalMsgLen := uint32(payloadSize + headerLength + 16)
binary.BigEndian.PutUint32(totalByteLen, totalMsgLen)
return totalByteLen
}
// writeHeaderSize writes the 4byte header size portion of the protocol.
func writeHeaderSize(headerLength int) []byte {
totalHeaderLen := make([]byte, 4)
totalLen := uint32(headerLength)
binary.BigEndian.PutUint32(totalHeaderLen, totalLen)
return totalHeaderLen
}
// writeCRC writes the CRC for both the prelude and and the end of the protocol.
func writeCRC(myBuffer []byte) []byte {
// Calculate the CRC here:
myCRC := make([]byte, 4)
cksum := crc32.ChecksumIEEE(myBuffer)
binary.BigEndian.PutUint32(myCRC, cksum)
return myCRC
}
// writePayload writes the Payload for those protocols which the Payload is
// necessary.
func writePayload(myPayload string) []byte {
convertedPayload := []byte(myPayload)
payloadStore := make([]byte, len(convertedPayload))
copy(payloadStore[0:], myPayload)
return payloadStore
}
// writeRecordHeader is a function which writes the headers for the continuation
// Message
func writeRecordHeader() []byte {
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
var currentMessage = &bytes.Buffer{}
// 11 -event type - 7 - 7 "Records"
// header name
currentMessage.Write(encodeHeaderStringName(":event-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("Records"))
// Creation of the Header for Content-Type // 13 -content-type -7 -24
// "application/octet-stream"
// header name
currentMessage.Write(encodeHeaderStringName(":content-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("application/octet-stream"))
// Creation of the Header for message-type 13 -message-type -7 5 "event"
// header name
currentMessage.Write(encodeHeaderStringName(":message-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("event"))
return currentMessage.Bytes()
}
// writeEndHeader is a function which writes the headers for the continuation
// Message
func writeEndHeader() []byte {
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
var currentMessage = &bytes.Buffer{}
// header name
currentMessage.Write(encodeHeaderStringName(":event-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("End"))
// Creation of the Header for message-type 13 -message-type -7 5 "event"
// header name
currentMessage.Write(encodeHeaderStringName(":message-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("event"))
return currentMessage.Bytes()
}
// writeContHeader is a function which writes the headers for the continuation
// Message
func writeContHeader() []byte {
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
var currentMessage = &bytes.Buffer{}
// header name
currentMessage.Write(encodeHeaderStringName(":event-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("Cont"))
// Creation of the Header for message-type 13 -message-type -7 5 "event"
// header name
currentMessage.Write(encodeHeaderStringName(":message-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("event"))
return currentMessage.Bytes()
}
// writeStatHeader is a function which writes the headers for the Stat
// Message
func writeStatHeader() []byte {
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
var currentMessage = &bytes.Buffer{}
// header name
currentMessage.Write(encodeHeaderStringName(":event-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("Stats"))
// Creation of the Header for Content-Type // 13 -content-type -7 -8
// "text/xml"
// header name
currentMessage.Write(encodeHeaderStringName(":content-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("text/xml"))
// Creation of the Header for message-type 13 -message-type -7 5 "event"
currentMessage.Write(encodeHeaderStringName(":message-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("event"))
return currentMessage.Bytes()
}
// writeProgressHeader is a function which writes the headers for the Progress
// Message
func writeProgressHeader() []byte {
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
var currentMessage = &bytes.Buffer{}
// header name
currentMessage.Write(encodeHeaderStringName(":event-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("Progress"))
// Creation of the Header for Content-Type // 13 -content-type -7 -8
// "text/xml"
// header name
currentMessage.Write(encodeHeaderStringName(":content-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("text/xml"))
// Creation of the Header for message-type 13 -message-type -7 5 "event"
// header name
currentMessage.Write(encodeHeaderStringName(":message-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("event"))
return currentMessage.Bytes()
}
// writeRecordMessage is the function which constructs the binary message for a
// record message to be sent.
func (csvOutput *Input) writeRecordMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
// The below are the specifications of the header for a "record" event
// 11 -event type - 7 - 7 "Records"
// 13 -content-type -7 -24 "application/octet-stream"
// 13 -message-type -7 5 "event"
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
headerLen := len(recordHeaders)
// Writes the total size of the message.
currentMessage.Write(writePayloadSize(len(payload), headerLen))
// Writes the total size of the header.
currentMessage.Write(writeHeaderSize(headerLen))
// Writes the CRC of the Prelude
currentMessage.Write(writeCRC(currentMessage.Bytes()))
currentMessage.Write(recordHeaders)
// This part is where the payload is written, this will be only one row, since
// we're sending one message at a types
currentMessage.Write(writePayload(payload))
// Now we do a CRC check on the entire messages
currentMessage.Write(writeCRC(currentMessage.Bytes()))
return currentMessage
}
// writeContinuationMessage is the function which constructs the binary message
// for a continuation message to be sent.
func (csvOutput *Input) writeContinuationMessage(currentMessage *bytes.Buffer) *bytes.Buffer {
// 11 -event type - 7 - 4 "Cont"
// 13 -message-type -7 5 "event"
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
headerLen := len(contHeaders)
currentMessage.Write(writePayloadSize(0, headerLen))
currentMessage.Write(writeHeaderSize(headerLen))
// Calculate the Prelude CRC here:
currentMessage.Write(writeCRC(currentMessage.Bytes()))
currentMessage.Write(contHeaders)
//Now we do a CRC check on the entire messages
currentMessage.Write(writeCRC(currentMessage.Bytes()))
return currentMessage
}
// writeEndMessage is the function which constructs the binary message
// for a end message to be sent.
func (csvOutput *Input) writeEndMessage(currentMessage *bytes.Buffer) *bytes.Buffer {
// 11 -event type - 7 - 3 "End"
// 13 -message-type -7 5 "event"
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
headerLen := len(endHeaders)
currentMessage.Write(writePayloadSize(0, headerLen))
currentMessage.Write(writeHeaderSize(headerLen))
//Calculate the Prelude CRC here:
currentMessage.Write(writeCRC(currentMessage.Bytes()))
currentMessage.Write(endHeaders)
// Now we do a CRC check on the entire messages
currentMessage.Write(writeCRC(currentMessage.Bytes()))
return currentMessage
}
// writeStateMessage is the function which constructs the binary message for a
// state message to be sent.
func (csvOutput *Input) writeStatMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
// 11 -event type - 7 - 5 "Stat" 20
// 13 -content-type -7 -8 "text/xml" 25
// 13 -message-type -7 5 "event" 22
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
headerLen := len(statHeaders)
currentMessage.Write(writePayloadSize(len(payload), headerLen))
currentMessage.Write(writeHeaderSize(headerLen))
currentMessage.Write(writeCRC(currentMessage.Bytes()))
currentMessage.Write(statHeaders)
// This part is where the payload is written, this will be only one row, since
// we're sending one message at a types
currentMessage.Write(writePayload(payload))
// Now we do a CRC check on the entire messages
currentMessage.Write(writeCRC(currentMessage.Bytes()))
return currentMessage
}
// writeProgressMessage is the function which constructs the binary message for
// a progress message to be sent.
func (csvOutput *Input) writeProgressMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
// The below are the specifications of the header for a "Progress" event
// 11 -event type - 7 - 8 "Progress" 23
// 13 -content-type -7 -8 "text/xml" 25
// 13 -message-type -7 5 "event" 22
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
headerLen := len(progressHeaders)
currentMessage.Write(writePayloadSize(len(payload), headerLen))
currentMessage.Write(writeHeaderSize(headerLen))
currentMessage.Write(writeCRC(currentMessage.Bytes()))
currentMessage.Write(progressHeaders)
// This part is where the payload is written, this will be only one row, since
// we're sending one message at a types
currentMessage.Write(writePayload(payload))
// Now we do a CRC check on the entire messages
currentMessage.Write(writeCRC(currentMessage.Bytes()))
return currentMessage
}
// writeErrorMessage is the function which constructs the binary message for a
// error message to be sent.
func (csvOutput *Input) writeErrorMessage(errorMessage error, currentMessage *bytes.Buffer) *bytes.Buffer {
// The below are the specifications of the header for a "error" event
// 11 -error-code - 7 - DEFINED "DEFINED"
// 14 -error-message -7 -DEFINED "DEFINED"
// 13 -message-type -7 5 "error"
// This is predefined from AMZ protocol found here:
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
sizeOfErrorCode := len(errorCodeResponse[errorMessage])
sizeOfErrorMessage := len(errorMessage.Error())
headerLen := errHdrLen + sizeOfErrorCode + sizeOfErrorMessage
currentMessage.Write(writePayloadSize(0, headerLen))
currentMessage.Write(writeHeaderSize(headerLen))
currentMessage.Write(writeCRC(currentMessage.Bytes()))
// header name
currentMessage.Write(encodeHeaderStringName(":error-code"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue(errorCodeResponse[errorMessage]))
// 14 -error-message -7 -DEFINED "DEFINED"
// header name
currentMessage.Write(encodeHeaderStringName(":error-message"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue(errorMessage.Error()))
// Creation of the Header for message-type 13 -message-type -7 5 "error"
// header name
currentMessage.Write(encodeHeaderStringName(":message-type"))
// header type
currentMessage.Write(encodeNumber(7, 1))
// header value and header value length
currentMessage.Write(encodeHeaderStringValue("error"))
// Now we do a CRC check on the entire messages
currentMessage.Write(writeCRC(currentMessage.Bytes()))
return currentMessage
}

415
pkg/s3select/select.go Normal file
View File

@@ -0,0 +1,415 @@
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package s3select
import (
"math"
"strconv"
"strings"
"github.com/xwb1989/sqlparser"
)
// SelectFuncs contains the relevant values from the parser for S3 Select
// Functions
type SelectFuncs struct {
funcExpr []*sqlparser.FuncExpr
index []int
}
// RunSqlParser allows us to easily bundle all the functions from above and run
// them in the appropriate order.
func (reader *Input) runSelectParser(selectExpression string, myRow chan *Row) {
reqCols, alias, myLimit, whereClause, aggFunctionNames, myFuncs, myErr := reader.ParseSelect(selectExpression)
if myErr != nil {
rowStruct := &Row{
err: myErr,
}
myRow <- rowStruct
return
}
reader.processSelectReq(reqCols, alias, whereClause, myLimit, aggFunctionNames, myRow, myFuncs)
}
// ParseSelect parses the SELECT expression, and effectively tokenizes it into
// its separate parts. It returns the requested column names,alias,limit of
// records, and the where clause.
func (reader *Input) ParseSelect(sqlInput string) ([]string, string, int, interface{}, []string, *SelectFuncs, error) {
// return columnNames, alias, limitOfRecords, whereclause,coalStore, nil
stmt, err := sqlparser.Parse(sqlInput)
var whereClause interface{}
var alias string
var limit int
myFuncs := &SelectFuncs{}
// TODO Maybe can parse their errors a bit to return some more of the s3 errors
if err != nil {
return nil, "", 0, nil, nil, nil, ErrLexerInvalidChar
}
switch stmt := stmt.(type) {
case *sqlparser.Select:
// evaluates the where clause
functionNames := make([]string, len(stmt.SelectExprs))
columnNames := make([]string, len(stmt.SelectExprs))
if stmt.Where != nil {
switch expr := stmt.Where.Expr.(type) {
default:
whereClause = expr
case *sqlparser.ComparisonExpr:
whereClause = expr
}
}
if stmt.SelectExprs != nil {
for i := 0; i < len(stmt.SelectExprs); i++ {
switch expr := stmt.SelectExprs[i].(type) {
case *sqlparser.StarExpr:
columnNames[0] = "*"
case *sqlparser.AliasedExpr:
switch smallerexpr := expr.Expr.(type) {
case *sqlparser.FuncExpr:
if smallerexpr.IsAggregate() {
functionNames[i] = smallerexpr.Name.CompliantName()
// Will return function name
// Case to deal with if we have functions and not an asterix
switch tempagg := smallerexpr.Exprs[0].(type) {
case *sqlparser.StarExpr:
columnNames[0] = "*"
if smallerexpr.Name.CompliantName() != "count" {
return nil, "", 0, nil, nil, nil, ErrParseUnsupportedCallWithStar
}
case *sqlparser.AliasedExpr:
switch col := tempagg.Expr.(type) {
case *sqlparser.BinaryExpr:
return nil, "", 0, nil, nil, nil, ErrParseNonUnaryAgregateFunctionCall
case *sqlparser.ColName:
columnNames[i] = col.Name.CompliantName()
}
}
// Case to deal with if COALESCE was used..
} else if supportedFunc(smallerexpr.Name.CompliantName()) {
if myFuncs.funcExpr == nil {
myFuncs.funcExpr = make([]*sqlparser.FuncExpr, len(stmt.SelectExprs))
myFuncs.index = make([]int, len(stmt.SelectExprs))
}
myFuncs.funcExpr[i] = smallerexpr
myFuncs.index[i] = i
} else {
return nil, "", 0, nil, nil, nil, ErrUnsupportedSQLOperation
}
case *sqlparser.ColName:
columnNames[i] = smallerexpr.Name.CompliantName()
}
}
}
}
// This code retrieves the alias and makes sure it is set to the correct
// value, if not it sets it to the tablename
if (stmt.From) != nil {
for i := 0; i < len(stmt.From); i++ {
switch smallerexpr := stmt.From[i].(type) {
case *sqlparser.JoinTableExpr:
return nil, "", 0, nil, nil, nil, ErrParseMalformedJoin
case *sqlparser.AliasedTableExpr:
alias = smallerexpr.As.CompliantName()
if alias == "" {
alias = sqlparser.GetTableName(smallerexpr.Expr).CompliantName()
}
}
}
}
if stmt.Limit != nil {
switch expr := stmt.Limit.Rowcount.(type) {
case *sqlparser.SQLVal:
// The Value of how many rows we're going to limit by
limit, _ = strconv.Atoi(string(expr.Val[:]))
}
}
if stmt.GroupBy != nil {
return nil, "", 0, nil, nil, nil, ErrParseUnsupportedLiteralsGroupBy
}
if stmt.OrderBy != nil {
return nil, "", 0, nil, nil, nil, ErrParseUnsupportedToken
}
if err := reader.parseErrs(columnNames, whereClause, alias, myFuncs); err != nil {
return nil, "", 0, nil, nil, nil, err
}
return columnNames, alias, limit, whereClause, functionNames, myFuncs, nil
}
return nil, "", 0, nil, nil, nil, nil
}
// This is the main function, It goes row by row and for records which validate
// the where clause it currently prints the appropriate row given the requested
// columns.
func (reader *Input) processSelectReq(reqColNames []string, alias string, whereClause interface{}, limitOfRecords int, functionNames []string, myRow chan *Row, myFunc *SelectFuncs) {
counter := -1
filtrCount := 0
functionFlag := false
// My values is used to store our aggregation values if we need to store them.
myAggVals := make([]float64, len(reqColNames))
var columns []string
// LowercasecolumnsMap is used in accordance with hasDuplicates so that we can
// raise the error "Ambigious" if a case insensitive column is provided and we
// have multiple matches.
lowercaseColumnsMap := make(map[string]int)
hasDuplicates := make(map[string]bool)
// ColumnsMap stores our columns and their index.
columnsMap := make(map[string]int)
if limitOfRecords == 0 {
limitOfRecords = math.MaxInt64
}
for {
record := reader.ReadRecord()
reader.stats.BytesProcessed += processSize(record)
if record == nil {
if functionFlag {
rowStruct := &Row{
record: reader.aggFuncToStr(myAggVals) + "\n",
}
myRow <- rowStruct
}
close(myRow)
return
}
if counter == -1 && reader.options.HeaderOpt && len(reader.header) > 0 {
columns = reader.Header()
myErr := checkForDuplicates(columns, columnsMap, hasDuplicates, lowercaseColumnsMap)
if myErr != nil {
rowStruct := &Row{
err: myErr,
}
myRow <- rowStruct
return
}
} else if counter == -1 && len(reader.header) > 0 {
columns = reader.Header()
}
// When we have reached our limit, on what the user specified as the number
// of rows they wanted, we terminate our interpreter.
if filtrCount == limitOfRecords && limitOfRecords != 0 {
close(myRow)
return
}
// The call to the where function clause,ensures that the rows we print match our where clause.
condition, myErr := matchesMyWhereClause(record, columnsMap, alias, whereClause)
if myErr != nil {
rowStruct := &Row{
err: myErr,
}
myRow <- rowStruct
return
}
if condition {
// if its an asterix we just print everything in the row
if reqColNames[0] == "*" && functionNames[0] == "" {
rowStruct := &Row{
record: reader.printAsterix(record) + "\n",
}
myRow <- rowStruct
} else if alias != "" {
// This is for dealing with the case of if we have to deal with a
// request for a column with an index e.g A_1.
if representsInt(reqColNames[0]) {
// This checks whether any aggregation function was called as now we
// no longer will go through printing each row, and only print at the
// end
if len(functionNames) > 0 && functionNames[0] != "" {
functionFlag = true
aggregationFunctions(counter, filtrCount, myAggVals, columnsMap, reqColNames, functionNames, record)
} else {
// The code below finds the appropriate columns of the row given the
// indicies provided in the SQL request and utilizes the map to
// retrieve the correct part of the row.
myQueryRow, myErr := reader.processColNameIndex(record, reqColNames, columns)
if myErr != nil {
rowStruct := &Row{
err: myErr,
}
myRow <- rowStruct
return
}
rowStruct := &Row{
record: myQueryRow + "\n",
}
myRow <- rowStruct
}
} else {
// This code does aggregation if we were provided column names in the
// form of acutal names rather an indices.
if len(functionNames) > 0 && functionNames[0] != "" {
functionFlag = true
aggregationFunctions(counter, filtrCount, myAggVals, columnsMap, reqColNames, functionNames, record)
} else {
// This code prints the appropriate part of the row given the filter
// and select request, if the select request was based on column
// names rather than indices.
myQueryRow, myErr := reader.processColNameLiteral(record, reqColNames, columns, columnsMap, myFunc)
if myErr != nil {
rowStruct := &Row{
err: myErr,
}
myRow <- rowStruct
return
}
rowStruct := &Row{
record: myQueryRow + "\n",
}
myRow <- rowStruct
}
}
}
filtrCount++
}
counter++
}
}
// printAsterix helps to print out the entire row if an asterix is used.
func (reader *Input) printAsterix(record []string) string {
myRow := record[0]
for i := 1; i < len(record); i++ {
myRow = myRow + reader.options.OutputFieldDelimiter + record[i]
}
return myRow
}
// processColumnNames is a function which allows for cleaning of column names.
func (reader *Input) processColumnNames(reqColNames []string, alias string) error {
for i := 0; i < len(reqColNames); i++ {
// The code below basically cleans the column name of its alias and other
// syntax, so that we can extract its pure name.
reqColNames[i] = cleanCol(reqColNames[i], alias)
}
return nil
}
// processColNameIndex is the function which creates the row for an index based
// query.
func (reader *Input) processColNameIndex(record []string, reqColNames []string, columns []string) (string, error) {
myRow := ""
for i := 0; i < len(reqColNames); i++ {
// COALESCE AND NULLIF do not support index based access.
if reqColNames[0] == "0" {
return "", ErrInvalidColumnIndex
}
// Subtract 1 because AWS Indexing is not 0 based, it starts at 1.
mytempindex, err := strconv.Atoi(reqColNames[i])
mytempindex = mytempindex - 1
if mytempindex > len(columns) {
return "", ErrInvalidColumnIndex
}
myRow = writeRow(myRow, record[mytempindex], reader.options.OutputFieldDelimiter, len(reqColNames))
if err != nil {
return "", ErrMissingHeaders
}
}
if len(myRow) > 1000000 {
return "", ErrOverMaxRecordSize
}
if strings.Count(myRow, reader.options.OutputFieldDelimiter) != len(reqColNames)-1 {
myRow = qualityCheck(myRow, len(reqColNames)-1-strings.Count(myRow, reader.options.OutputFieldDelimiter), reader.options.OutputFieldDelimiter)
}
return myRow, nil
}
// processColNameLiteral is the function which creates the row for an name based
// query.
func (reader *Input) processColNameLiteral(record []string, reqColNames []string, columns []string, columnsMap map[string]int, myFunc *SelectFuncs) (string, error) {
myRow := ""
for i := 0; i < len(reqColNames); i++ {
// this is the case to deal with COALESCE.
if reqColNames[i] == "" && isValidFunc(myFunc.index, i) {
myVal := evaluateFuncExpr(myFunc.funcExpr[i], "", record, columnsMap)
myRow = writeRow(myRow, myVal, reader.options.OutputFieldDelimiter, len(reqColNames))
continue
}
myTempIndex, notFound := columnsMap[trimQuotes(reqColNames[i])]
if !notFound {
return "", ErrMissingHeaders
}
myRow = writeRow(myRow, record[myTempIndex], reader.options.OutputFieldDelimiter, len(reqColNames))
}
if len(myRow) > 1000000 {
return "", ErrOverMaxRecordSize
}
if strings.Count(myRow, reader.options.OutputFieldDelimiter) != len(reqColNames)-1 {
myRow = qualityCheck(myRow, len(reqColNames)-1-strings.Count(myRow, reader.options.OutputFieldDelimiter), reader.options.OutputFieldDelimiter)
}
return myRow, nil
}
// aggregationFunctions is a function which performs the actual aggregation
// methods on the given row, it uses an array defined the the main parsing
// function to keep track of values.
func aggregationFunctions(counter int, filtrCount int, myAggVals []float64, columnsMap map[string]int, storeReqCols []string, storeFunctions []string, record []string) error {
for i := 0; i < len(storeFunctions); i++ {
if storeFunctions[i] == "" {
i++
} else if storeFunctions[i] == "count" {
myAggVals[i]++
} else {
// If column names are provided as an index it'll use this if statement instead of the else/
var convAggFloat float64
if representsInt(storeReqCols[i]) {
myIndex, _ := strconv.Atoi(storeReqCols[i])
convAggFloat, _ = strconv.ParseFloat(record[myIndex], 64)
} else {
// case that the columns are in the form of named columns rather than indices.
convAggFloat, _ = strconv.ParseFloat(record[columnsMap[trimQuotes(storeReqCols[i])]], 64)
}
// This if statement is for calculating the min.
if storeFunctions[i] == "min" {
if counter == -1 {
myAggVals[i] = math.MaxFloat64
}
if convAggFloat < myAggVals[i] {
myAggVals[i] = convAggFloat
}
} else if storeFunctions[i] == "max" {
// This if statement is for calculating the max.
if counter == -1 {
myAggVals[i] = math.SmallestNonzeroFloat64
}
if convAggFloat > myAggVals[i] {
myAggVals[i] = convAggFloat
}
} else if storeFunctions[i] == "sum" {
// This if statement is for calculating the sum.
myAggVals[i] += convAggFloat
} else if storeFunctions[i] == "avg" {
// This if statement is for calculating the average.
if filtrCount == 0 {
myAggVals[i] = convAggFloat
} else {
myAggVals[i] = (convAggFloat + (myAggVals[i] * float64(filtrCount))) / float64((filtrCount + 1))
}
} else {
return ErrParseNonUnaryAgregateFunctionCall
}
}
}
return nil
}

1070
pkg/s3select/select_test.go Normal file

File diff suppressed because it is too large Load Diff