mirror of
https://github.com/minio/minio.git
synced 2025-11-11 22:40:14 -05:00
SQL select query for CSV/JSON (#6648)
select * , select column names have been implemented for CSV. select * is implemented for JSON.
This commit is contained in:
committed by
kannappanr
parent
acf46cc3b5
commit
c0b4bf0a3e
110
pkg/s3select/datatypes.go
Normal file
110
pkg/s3select/datatypes.go
Normal file
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package s3select
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
)
|
||||
|
||||
// CSVFileHeaderInfo -Can be either USE IGNORE OR NONE, defines what to do with
|
||||
// the first row
|
||||
type CSVFileHeaderInfo string
|
||||
|
||||
// Constants for file header info.
|
||||
const (
|
||||
CSVFileHeaderInfoNone CSVFileHeaderInfo = "NONE"
|
||||
CSVFileHeaderInfoIgnore = "IGNORE"
|
||||
CSVFileHeaderInfoUse = "USE"
|
||||
)
|
||||
|
||||
// The maximum character per record is set to be 1 MB.
|
||||
const (
|
||||
MaxCharsPerRecord = 1000000
|
||||
)
|
||||
|
||||
// SelectCompressionType - ONLY GZIP is supported
|
||||
type SelectCompressionType string
|
||||
|
||||
// JSONType determines json input serialization type.
|
||||
type JSONType string
|
||||
|
||||
// Constants for compression types under select API.
|
||||
const (
|
||||
SelectCompressionNONE SelectCompressionType = "NONE"
|
||||
SelectCompressionGZIP = "GZIP"
|
||||
SelectCompressionBZIP = "BZIP2"
|
||||
)
|
||||
|
||||
// CSVQuoteFields - Can be either Always or AsNeeded
|
||||
type CSVQuoteFields string
|
||||
|
||||
// Constants for csv quote styles.
|
||||
const (
|
||||
CSVQuoteFieldsAlways CSVQuoteFields = "Always"
|
||||
CSVQuoteFieldsAsNeeded = "AsNeeded"
|
||||
)
|
||||
|
||||
// QueryExpressionType - Currently can only be SQL
|
||||
type QueryExpressionType string
|
||||
|
||||
// Constants for expression type.
|
||||
const (
|
||||
QueryExpressionTypeSQL QueryExpressionType = "SQL"
|
||||
)
|
||||
|
||||
// Constants for JSONTypes.
|
||||
const (
|
||||
JSONTypeDocument JSONType = "DOCUMENT"
|
||||
JSONLinesType = "LINES"
|
||||
)
|
||||
|
||||
// ObjectSelectRequest - represents the input select body
|
||||
type ObjectSelectRequest struct {
|
||||
XMLName xml.Name `xml:"SelectObjectContentRequest" json:"-"`
|
||||
Expression string
|
||||
ExpressionType QueryExpressionType
|
||||
InputSerialization struct {
|
||||
CompressionType SelectCompressionType
|
||||
Parquet *struct{}
|
||||
CSV *struct {
|
||||
FileHeaderInfo CSVFileHeaderInfo
|
||||
RecordDelimiter string
|
||||
FieldDelimiter string
|
||||
QuoteCharacter string
|
||||
QuoteEscapeCharacter string
|
||||
Comments string
|
||||
}
|
||||
JSON *struct {
|
||||
Type JSONType
|
||||
}
|
||||
}
|
||||
OutputSerialization struct {
|
||||
CSV *struct {
|
||||
QuoteFields CSVQuoteFields
|
||||
RecordDelimiter string
|
||||
FieldDelimiter string
|
||||
QuoteCharacter string
|
||||
QuoteEscapeCharacter string
|
||||
}
|
||||
JSON *struct {
|
||||
RecordDelimiter string
|
||||
}
|
||||
}
|
||||
RequestProgress struct {
|
||||
Enabled bool
|
||||
}
|
||||
}
|
||||
@@ -16,7 +16,11 @@
|
||||
|
||||
package s3select
|
||||
|
||||
import "errors"
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
)
|
||||
|
||||
//S3 errors below
|
||||
|
||||
@@ -35,10 +39,6 @@ var ErrExpressionTooLong = errors.New("The SQL expression is too long: The maxim
|
||||
// in the SQL function.
|
||||
var ErrIllegalSQLFunctionArgument = errors.New("Illegal argument was used in the SQL function")
|
||||
|
||||
// ErrInvalidColumnIndex is an error if you provide a column index which is not
|
||||
// valid.
|
||||
var ErrInvalidColumnIndex = errors.New("Column index in the SQL expression is invalid")
|
||||
|
||||
// ErrInvalidKeyPath is an error if you provide a key in the SQL expression that
|
||||
// is invalid.
|
||||
var ErrInvalidKeyPath = errors.New("Key path in the SQL expression is invalid")
|
||||
@@ -63,10 +63,6 @@ var ErrMissingHeaders = errors.New("Some headers in the query are missing from t
|
||||
// utilized with the select object query.
|
||||
var ErrInvalidCompressionFormat = errors.New("The file is not in a supported compression format. Only GZIP is supported at this time")
|
||||
|
||||
// ErrTruncatedInput is an error if the object is not compressed properly and an
|
||||
// error occurs during decompression.
|
||||
var ErrTruncatedInput = errors.New("Object decompression failed. Check that the object is properly compressed using the format specified in the request")
|
||||
|
||||
// ErrInvalidFileHeaderInfo is an error if the argument provided to the
|
||||
// FileHeader Argument is incorrect.
|
||||
var ErrInvalidFileHeaderInfo = errors.New("The FileHeaderInfo is invalid. Only NONE, USE, and IGNORE are supported")
|
||||
@@ -83,13 +79,6 @@ var ErrInvalidQuoteFields = errors.New("The QuoteFields is invalid. Only ALWAYS
|
||||
// request element is not valid.
|
||||
var ErrInvalidRequestParameter = errors.New("The value of a parameter in Request element is invalid. Check the service API documentation and try again")
|
||||
|
||||
// ErrCSVParsingError is an error if the CSV presents an error while being
|
||||
// parsed.
|
||||
var ErrCSVParsingError = errors.New("Encountered an Error parsing the CSV file. Check the file and try again")
|
||||
|
||||
// ErrJSONParsingError is an error if while parsing the JSON an error arises.
|
||||
var ErrJSONParsingError = errors.New("Encountered an error parsing the JSON file. Check the file and try again")
|
||||
|
||||
// ErrExternalEvalException is an error that arises if the query can not be
|
||||
// evaluated.
|
||||
var ErrExternalEvalException = errors.New("The query cannot be evaluated. Check the file and try again")
|
||||
@@ -224,10 +213,6 @@ var ErrParseUnsupportedSyntax = errors.New("The SQL expression contains unsuppor
|
||||
// operator present in the SQL expression.
|
||||
var ErrParseUnknownOperator = errors.New("The SQL expression contains an invalid operator")
|
||||
|
||||
// ErrParseInvalidPathComponent is an error that occurs if there is an invalid
|
||||
// path component.
|
||||
var ErrParseInvalidPathComponent = errors.New("The SQL expression contains an invalid path component")
|
||||
|
||||
// ErrParseMissingIdentAfterAt is an error that occurs if the wrong symbol
|
||||
// follows the "@" symbol in the SQL expression.
|
||||
var ErrParseMissingIdentAfterAt = errors.New("Did not find the expected identifier after the @ symbol in the SQL expression")
|
||||
@@ -395,20 +380,20 @@ var errorCodeResponse = map[error]string{
|
||||
ErrUnauthorizedAccess: "UnauthorizedAccess",
|
||||
ErrExpressionTooLong: "ExpressionTooLong",
|
||||
ErrIllegalSQLFunctionArgument: "IllegalSqlFunctionArgument",
|
||||
ErrInvalidColumnIndex: "InvalidColumnIndex",
|
||||
format.ErrInvalidColumnIndex: "InvalidColumnIndex",
|
||||
ErrInvalidKeyPath: "InvalidKeyPath",
|
||||
ErrColumnTooLong: "ColumnTooLong",
|
||||
ErrOverMaxColumn: "OverMaxColumn",
|
||||
ErrOverMaxRecordSize: "OverMaxRecordSize",
|
||||
ErrMissingHeaders: "MissingHeaders",
|
||||
ErrInvalidCompressionFormat: "InvalidCompressionFormat",
|
||||
ErrTruncatedInput: "TruncatedInput",
|
||||
format.ErrTruncatedInput: "TruncatedInput",
|
||||
ErrInvalidFileHeaderInfo: "InvalidFileHeaderInfo",
|
||||
ErrInvalidJSONType: "InvalidJsonType",
|
||||
ErrInvalidQuoteFields: "InvalidQuoteFields",
|
||||
ErrInvalidRequestParameter: "InvalidRequestParameter",
|
||||
ErrCSVParsingError: "CSVParsingError",
|
||||
ErrJSONParsingError: "JSONParsingError",
|
||||
format.ErrCSVParsingError: "CSVParsingError",
|
||||
format.ErrJSONParsingError: "JSONParsingError",
|
||||
ErrExternalEvalException: "ExternalEvalException",
|
||||
ErrInvalidDataType: "InvalidDataType",
|
||||
ErrUnrecognizedFormatException: "UnrecognizedFormatException",
|
||||
@@ -443,7 +428,7 @@ var errorCodeResponse = map[error]string{
|
||||
ErrParseUnsupportedAlias: "ParseUnsupportedAlias",
|
||||
ErrParseUnsupportedSyntax: "ParseUnsupportedSyntax",
|
||||
ErrParseUnknownOperator: "ParseUnknownOperator",
|
||||
ErrParseInvalidPathComponent: "ParseInvalidPathComponent",
|
||||
format.ErrParseInvalidPathComponent: "ParseInvalidPathComponent",
|
||||
ErrParseMissingIdentAfterAt: "ParseMissingIdentAfterAt",
|
||||
ErrParseUnexpectedOperator: "ParseUnexpectedOperator",
|
||||
ErrParseUnexpectedTerm: "ParseUnexpectedTerm",
|
||||
|
||||
@@ -19,12 +19,13 @@ package s3select
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
"github.com/xwb1989/sqlparser"
|
||||
)
|
||||
|
||||
// stringOps is a function which handles the case in a clause if there is a need
|
||||
// to perform a string function
|
||||
func stringOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, columnsMap map[string]int) string {
|
||||
func stringOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) string {
|
||||
var value string
|
||||
funcName := myFunc.Name.CompliantName()
|
||||
switch tempArg := myFunc.Exprs[0].(type) {
|
||||
@@ -34,7 +35,7 @@ func stringOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string,
|
||||
// myReturnVal is actually the tail recursive value being used in the eval func.
|
||||
return applyStrFunc(myReturnVal, funcName)
|
||||
case *sqlparser.ColName:
|
||||
value = applyStrFunc(record[columnsMap[col.Name.CompliantName()]], funcName)
|
||||
value = applyStrFunc(jsonValue(col.Name.CompliantName(), record), funcName)
|
||||
case *sqlparser.SQLVal:
|
||||
value = applyStrFunc(string(col.Val), funcName)
|
||||
}
|
||||
@@ -43,7 +44,7 @@ func stringOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string,
|
||||
}
|
||||
|
||||
// coalOps is a function which decomposes a COALESCE func expr into its struct.
|
||||
func coalOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, columnsMap map[string]int) string {
|
||||
func coalOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) string {
|
||||
myArgs := make([]string, len(myFunc.Exprs))
|
||||
|
||||
for i := 0; i < len(myFunc.Exprs); i++ {
|
||||
@@ -54,7 +55,7 @@ func coalOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, co
|
||||
// myReturnVal is actually the tail recursive value being used in the eval func.
|
||||
return myReturnVal
|
||||
case *sqlparser.ColName:
|
||||
myArgs[i] = record[columnsMap[col.Name.CompliantName()]]
|
||||
myArgs[i] = jsonValue(col.Name.CompliantName(), record)
|
||||
case *sqlparser.SQLVal:
|
||||
myArgs[i] = string(col.Val)
|
||||
}
|
||||
@@ -64,7 +65,7 @@ func coalOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, co
|
||||
}
|
||||
|
||||
// nullOps is a function which decomposes a NullIf func expr into its struct.
|
||||
func nullOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, columnsMap map[string]int) string {
|
||||
func nullOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) string {
|
||||
myArgs := make([]string, 2)
|
||||
|
||||
for i := 0; i < len(myFunc.Exprs); i++ {
|
||||
@@ -74,7 +75,7 @@ func nullOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, co
|
||||
case *sqlparser.FuncExpr:
|
||||
return myReturnVal
|
||||
case *sqlparser.ColName:
|
||||
myArgs[i] = record[columnsMap[col.Name.CompliantName()]]
|
||||
myArgs[i] = jsonValue(col.Name.CompliantName(), record)
|
||||
case *sqlparser.SQLVal:
|
||||
myArgs[i] = string(col.Val)
|
||||
}
|
||||
@@ -118,8 +119,8 @@ func processCoalNoIndex(coalStore []string) string {
|
||||
}
|
||||
|
||||
// evaluateFuncExpr is a function that allows for tail recursive evaluation of
|
||||
// nested function expressions.
|
||||
func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, myRecord []string, columnsMap map[string]int) string {
|
||||
// nested function expressions
|
||||
func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, myRecord string) string {
|
||||
if myVal == nil {
|
||||
return myReturnVal
|
||||
}
|
||||
@@ -140,26 +141,26 @@ func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, myRecord []
|
||||
for i := 0; i < len(mySubFunc); i++ {
|
||||
if supportedString(myVal.Name.CompliantName()) {
|
||||
if mySubFunc != nil {
|
||||
return stringOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord, columnsMap), columnsMap)
|
||||
return stringOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord))
|
||||
}
|
||||
return stringOps(myVal, myRecord, myReturnVal, columnsMap)
|
||||
return stringOps(myVal, myRecord, myReturnVal)
|
||||
} else if strings.ToUpper(myVal.Name.CompliantName()) == "NULLIF" {
|
||||
if mySubFunc != nil {
|
||||
return nullOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord, columnsMap), columnsMap)
|
||||
return nullOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord))
|
||||
}
|
||||
return nullOps(myVal, myRecord, myReturnVal, columnsMap)
|
||||
return nullOps(myVal, myRecord, myReturnVal)
|
||||
} else if strings.ToUpper(myVal.Name.CompliantName()) == "COALESCE" {
|
||||
if mySubFunc != nil {
|
||||
return coalOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord, columnsMap), columnsMap)
|
||||
return coalOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord))
|
||||
}
|
||||
return coalOps(myVal, myRecord, myReturnVal, columnsMap)
|
||||
return coalOps(myVal, myRecord, myReturnVal)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// evaluateFuncErr is a function that flags errors in nested functions.
|
||||
func (reader *Input) evaluateFuncErr(myVal *sqlparser.FuncExpr) error {
|
||||
func evaluateFuncErr(myVal *sqlparser.FuncExpr, reader format.Select) error {
|
||||
if myVal == nil {
|
||||
return nil
|
||||
}
|
||||
@@ -173,11 +174,11 @@ func (reader *Input) evaluateFuncErr(myVal *sqlparser.FuncExpr) error {
|
||||
case *sqlparser.AliasedExpr:
|
||||
switch col := tempArg.Expr.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
if err := reader.evaluateFuncErr(col); err != nil {
|
||||
if err := evaluateFuncErr(col, reader); err != nil {
|
||||
return err
|
||||
}
|
||||
case *sqlparser.ColName:
|
||||
if err := reader.colNameErrs([]string{col.Name.CompliantName()}); err != nil {
|
||||
if err := reader.ColNameErrs([]string{col.Name.CompliantName()}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -186,11 +187,9 @@ func (reader *Input) evaluateFuncErr(myVal *sqlparser.FuncExpr) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// evaluateIsExpr is a function for evaluating expressions of the form "column
|
||||
// is ...."
|
||||
func evaluateIsExpr(myFunc *sqlparser.IsExpr, row []string, columnNames map[string]int, alias string) (bool, error) {
|
||||
// evaluateIsExpr is a function for evaluating expressions of the form "column is ...."
|
||||
func evaluateIsExpr(myFunc *sqlparser.IsExpr, row string, alias string) (bool, error) {
|
||||
operator := myFunc.Operator
|
||||
var colName string
|
||||
var myVal string
|
||||
switch myIs := myFunc.Expr.(type) {
|
||||
// case for literal val
|
||||
@@ -198,14 +197,10 @@ func evaluateIsExpr(myFunc *sqlparser.IsExpr, row []string, columnNames map[stri
|
||||
myVal = string(myIs.Val)
|
||||
// case for nested func val
|
||||
case *sqlparser.FuncExpr:
|
||||
myVal = evaluateFuncExpr(myIs, "", row, columnNames)
|
||||
myVal = evaluateFuncExpr(myIs, "", row)
|
||||
// case for col val
|
||||
case *sqlparser.ColName:
|
||||
colName = cleanCol(myIs.Name.CompliantName(), alias)
|
||||
}
|
||||
// case if it is a col val
|
||||
if colName != "" {
|
||||
myVal = row[columnNames[colName]]
|
||||
myVal = jsonValue(myIs.Name.CompliantName(), row)
|
||||
}
|
||||
// case to evaluate is null
|
||||
if strings.ToLower(operator) == "is null" {
|
||||
@@ -221,11 +216,11 @@ func evaluateIsExpr(myFunc *sqlparser.IsExpr, row []string, columnNames map[stri
|
||||
// supportedString is a function that checks whether the function is a supported
|
||||
// string one
|
||||
func supportedString(strFunc string) bool {
|
||||
return stringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER"})
|
||||
return format.StringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER"})
|
||||
}
|
||||
|
||||
// supportedFunc is a function that checks whether the function is a supported
|
||||
// S3 one.
|
||||
func supportedFunc(strFunc string) bool {
|
||||
return stringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER", "COALESCE", "NULLIF"})
|
||||
return format.StringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER", "COALESCE", "NULLIF"})
|
||||
}
|
||||
334
pkg/s3select/format/csv/csv.go
Normal file
334
pkg/s3select/format/csv/csv.go
Normal file
@@ -0,0 +1,334 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package csv
|
||||
|
||||
import (
|
||||
"compress/bzip2"
|
||||
"encoding/csv"
|
||||
"encoding/xml"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
gzip "github.com/klauspost/pgzip"
|
||||
"github.com/minio/minio/pkg/ioutil"
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
)
|
||||
|
||||
// Options options are passed to the underlying encoding/csv reader.
|
||||
type Options struct {
|
||||
// HasHeader when true, will treat the first row as a header row.
|
||||
HasHeader bool
|
||||
|
||||
// RecordDelimiter is the string that records are delimited by.
|
||||
RecordDelimiter string
|
||||
|
||||
// FieldDelimiter is the string that fields are delimited by.
|
||||
FieldDelimiter string
|
||||
|
||||
// Comments is the string the first character of a line of
|
||||
// text matches the comment character.
|
||||
Comments string
|
||||
|
||||
// Name of the table that is used for querying
|
||||
Name string
|
||||
|
||||
// ReadFrom is where the data will be read from.
|
||||
ReadFrom io.Reader
|
||||
|
||||
// If true then we need to add gzip or bzip reader.
|
||||
// to extract the csv.
|
||||
Compressed string
|
||||
|
||||
// SQL expression meant to be evaluated.
|
||||
Expression string
|
||||
|
||||
// What the outputted CSV will be delimited by .
|
||||
OutputFieldDelimiter string
|
||||
|
||||
// Size of incoming object
|
||||
StreamSize int64
|
||||
|
||||
// Whether Header is "USE" or another
|
||||
HeaderOpt bool
|
||||
|
||||
// Progress enabled, enable/disable progress messages.
|
||||
Progress bool
|
||||
}
|
||||
|
||||
// cinput represents a record producing input from a formatted object.
|
||||
type cinput struct {
|
||||
options *Options
|
||||
reader *csv.Reader
|
||||
firstRow []string
|
||||
header []string
|
||||
minOutputLength int
|
||||
stats struct {
|
||||
BytesScanned int64
|
||||
BytesReturned int64
|
||||
BytesProcessed int64
|
||||
}
|
||||
}
|
||||
|
||||
// New sets up a new Input, the first row is read when this is run.
|
||||
// If there is a problem with reading the first row, the error is returned.
|
||||
// Otherwise, the returned reader can be reliably consumed with Read().
|
||||
// until Read() return err.
|
||||
func New(opts *Options) (format.Select, error) {
|
||||
myReader := opts.ReadFrom
|
||||
var tempBytesScanned int64
|
||||
tempBytesScanned = 0
|
||||
switch opts.Compressed {
|
||||
case "GZIP":
|
||||
tempBytesScanned = opts.StreamSize
|
||||
var err error
|
||||
if myReader, err = gzip.NewReader(opts.ReadFrom); err != nil {
|
||||
return nil, format.ErrTruncatedInput
|
||||
}
|
||||
case "BZIP2":
|
||||
tempBytesScanned = opts.StreamSize
|
||||
myReader = bzip2.NewReader(opts.ReadFrom)
|
||||
}
|
||||
|
||||
// DelimitedReader treats custom record delimiter like `\r\n`,`\r`,`ab` etc and replaces it with `\n`.
|
||||
normalizedReader := ioutil.NewDelimitedReader(myReader, []rune(opts.RecordDelimiter))
|
||||
reader := &cinput{
|
||||
options: opts,
|
||||
reader: csv.NewReader(normalizedReader),
|
||||
}
|
||||
reader.stats.BytesScanned = tempBytesScanned
|
||||
reader.stats.BytesProcessed = 0
|
||||
reader.stats.BytesReturned = 0
|
||||
|
||||
reader.firstRow = nil
|
||||
|
||||
reader.reader.FieldsPerRecord = -1
|
||||
if reader.options.FieldDelimiter != "" {
|
||||
reader.reader.Comma = rune(reader.options.FieldDelimiter[0])
|
||||
}
|
||||
|
||||
if reader.options.Comments != "" {
|
||||
reader.reader.Comment = rune(reader.options.Comments[0])
|
||||
}
|
||||
|
||||
// QuoteCharacter - " (defaulted currently)
|
||||
reader.reader.LazyQuotes = true
|
||||
|
||||
if err := reader.readHeader(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
// Replace the spaces in columnnames with underscores
|
||||
func cleanHeader(columns []string) []string {
|
||||
for i := 0; i < len(columns); i++ {
|
||||
columns[i] = strings.Replace(columns[i], " ", "_", -1)
|
||||
}
|
||||
return columns
|
||||
}
|
||||
|
||||
// readHeader reads the header into the header variable if the header is present
|
||||
// as the first row of the csv
|
||||
func (reader *cinput) readHeader() error {
|
||||
var readErr error
|
||||
if reader.options.HasHeader {
|
||||
reader.firstRow, readErr = reader.reader.Read()
|
||||
if readErr != nil {
|
||||
return format.ErrCSVParsingError
|
||||
}
|
||||
reader.header = cleanHeader(reader.firstRow)
|
||||
reader.firstRow = nil
|
||||
reader.minOutputLength = len(reader.header)
|
||||
} else {
|
||||
reader.firstRow, readErr = reader.reader.Read()
|
||||
reader.header = make([]string, len(reader.firstRow))
|
||||
for i := 0; i < reader.minOutputLength; i++ {
|
||||
reader.header[i] = strconv.Itoa(i)
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Progress - return true if progress was requested.
|
||||
func (reader *cinput) Progress() bool {
|
||||
return reader.options.Progress
|
||||
}
|
||||
|
||||
// UpdateBytesProcessed - populates the bytes Processed
|
||||
func (reader *cinput) UpdateBytesProcessed(record map[string]interface{}) {
|
||||
// Convert map to slice of values.
|
||||
values := []string{}
|
||||
for _, value := range record {
|
||||
values = append(values, value.(string))
|
||||
}
|
||||
|
||||
reader.stats.BytesProcessed += int64(len(values))
|
||||
|
||||
}
|
||||
|
||||
// Read the file and returns map[string]interface{}
|
||||
func (reader *cinput) Read() (map[string]interface{}, error) {
|
||||
record := make(map[string]interface{})
|
||||
dec := reader.readRecord()
|
||||
if dec != nil {
|
||||
if reader.options.HasHeader {
|
||||
columns := reader.header
|
||||
for i, value := range dec {
|
||||
record[columns[i]] = value
|
||||
}
|
||||
} else {
|
||||
for i, value := range dec {
|
||||
record["_"+strconv.Itoa(i)] = value
|
||||
}
|
||||
}
|
||||
return record, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// OutputFieldDelimiter - returns the delimiter specified in input request
|
||||
func (reader *cinput) OutputFieldDelimiter() string {
|
||||
return reader.options.OutputFieldDelimiter
|
||||
}
|
||||
|
||||
// HasHeader - returns true or false depending upon the header.
|
||||
func (reader *cinput) HasHeader() bool {
|
||||
return reader.options.HasHeader
|
||||
}
|
||||
|
||||
// Expression - return the Select Expression for
|
||||
func (reader *cinput) Expression() string {
|
||||
return reader.options.Expression
|
||||
}
|
||||
|
||||
// UpdateBytesReturned - updates the Bytes returned for
|
||||
func (reader *cinput) UpdateBytesReturned(size int64) {
|
||||
reader.stats.BytesReturned += size
|
||||
}
|
||||
|
||||
// Header returns the header of the reader. Either the first row if a header
|
||||
// set in the options, or c#, where # is the column number, starting with 0.
|
||||
func (reader *cinput) Header() []string {
|
||||
return reader.header
|
||||
}
|
||||
|
||||
// readRecord reads a single record from the stream and it always returns successfully.
|
||||
// If the record is empty, an empty []string is returned.
|
||||
// Record expand to match the current row size, adding blank fields as needed.
|
||||
// Records never return less then the number of fields in the first row.
|
||||
// Returns nil on EOF
|
||||
// In the event of a parse error due to an invalid record, it is logged, and
|
||||
// an empty []string is returned with the number of fields in the first row,
|
||||
// as if the record were empty.
|
||||
//
|
||||
// In general, this is a very tolerant of problems reader.
|
||||
func (reader *cinput) readRecord() []string {
|
||||
var row []string
|
||||
var fileErr error
|
||||
|
||||
if reader.firstRow != nil {
|
||||
row = reader.firstRow
|
||||
reader.firstRow = nil
|
||||
return row
|
||||
}
|
||||
|
||||
row, fileErr = reader.reader.Read()
|
||||
emptysToAppend := reader.minOutputLength - len(row)
|
||||
if fileErr == io.EOF || fileErr == io.ErrClosedPipe {
|
||||
return nil
|
||||
} else if _, ok := fileErr.(*csv.ParseError); ok {
|
||||
emptysToAppend = reader.minOutputLength
|
||||
}
|
||||
|
||||
if emptysToAppend > 0 {
|
||||
for counter := 0; counter < emptysToAppend; counter++ {
|
||||
row = append(row, "")
|
||||
}
|
||||
}
|
||||
|
||||
return row
|
||||
}
|
||||
|
||||
// CreateStatXML is the function which does the marshaling from the stat
|
||||
// structs into XML so that the progress and stat message can be sent
|
||||
func (reader *cinput) CreateStatXML() (string, error) {
|
||||
if reader.options.Compressed == "NONE" {
|
||||
reader.stats.BytesProcessed = reader.options.StreamSize
|
||||
reader.stats.BytesScanned = reader.stats.BytesProcessed
|
||||
}
|
||||
out, err := xml.Marshal(&format.Stats{
|
||||
BytesScanned: reader.stats.BytesScanned,
|
||||
BytesProcessed: reader.stats.BytesProcessed,
|
||||
BytesReturned: reader.stats.BytesReturned,
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return xml.Header + string(out), nil
|
||||
}
|
||||
|
||||
// CreateProgressXML is the function which does the marshaling from the progress
|
||||
// structs into XML so that the progress and stat message can be sent
|
||||
func (reader *cinput) CreateProgressXML() (string, error) {
|
||||
if reader.options.HasHeader {
|
||||
reader.stats.BytesProcessed += format.ProcessSize(reader.header)
|
||||
}
|
||||
if reader.options.Compressed == "NONE" {
|
||||
reader.stats.BytesScanned = reader.stats.BytesProcessed
|
||||
}
|
||||
out, err := xml.Marshal(&format.Progress{
|
||||
BytesScanned: reader.stats.BytesScanned,
|
||||
BytesProcessed: reader.stats.BytesProcessed,
|
||||
BytesReturned: reader.stats.BytesReturned,
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return xml.Header + string(out), nil
|
||||
}
|
||||
|
||||
// Type - return the data format type {
|
||||
func (reader *cinput) Type() format.Type {
|
||||
return format.CSV
|
||||
}
|
||||
|
||||
// ColNameErrs is a function which makes sure that the headers are requested are
|
||||
// present in the file otherwise it throws an error.
|
||||
func (reader *cinput) ColNameErrs(columnNames []string) error {
|
||||
for i := 0; i < len(columnNames); i++ {
|
||||
if columnNames[i] == "" {
|
||||
continue
|
||||
}
|
||||
if !format.IsInt(columnNames[i]) && !reader.options.HeaderOpt {
|
||||
return format.ErrInvalidColumnIndex
|
||||
}
|
||||
if format.IsInt(columnNames[i]) {
|
||||
tempInt, _ := strconv.Atoi(columnNames[i])
|
||||
if tempInt > len(reader.Header()) || tempInt == 0 {
|
||||
return format.ErrInvalidColumnIndex
|
||||
}
|
||||
} else {
|
||||
if reader.options.HeaderOpt && !format.StringInSlice(columnNames[i], reader.Header()) {
|
||||
return format.ErrParseInvalidPathComponent
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
38
pkg/s3select/format/errors.go
Normal file
38
pkg/s3select/format/errors.go
Normal file
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package format
|
||||
|
||||
import "errors"
|
||||
|
||||
// ErrTruncatedInput is an error if the object is not compressed properly and an
|
||||
// error occurs during decompression.
|
||||
var ErrTruncatedInput = errors.New("Object decompression failed. Check that the object is properly compressed using the format specified in the request")
|
||||
|
||||
// ErrCSVParsingError is an error if the CSV presents an error while being
|
||||
// parsed.
|
||||
var ErrCSVParsingError = errors.New("Encountered an Error parsing the CSV file. Check the file and try again")
|
||||
|
||||
// ErrInvalidColumnIndex is an error if you provide a column index which is not
|
||||
// valid.
|
||||
var ErrInvalidColumnIndex = errors.New("Column index in the SQL expression is invalid")
|
||||
|
||||
// ErrParseInvalidPathComponent is an error that occurs if there is an invalid
|
||||
// path component.
|
||||
var ErrParseInvalidPathComponent = errors.New("The SQL expression contains an invalid path component")
|
||||
|
||||
// ErrJSONParsingError is an error if while parsing the JSON an error arises.
|
||||
var ErrJSONParsingError = errors.New("Encountered an error parsing the JSON file. Check the file and try again")
|
||||
50
pkg/s3select/format/helpers.go
Normal file
50
pkg/s3select/format/helpers.go
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package format
|
||||
|
||||
import "strconv"
|
||||
|
||||
// IsInt - returns a true or false, whether a string can
|
||||
// be represented as an int.
|
||||
func IsInt(s string) bool {
|
||||
_, err := strconv.Atoi(s)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// StringInSlice - this function finds whether a string is in a list
|
||||
func StringInSlice(x string, list []string) bool {
|
||||
for _, y := range list {
|
||||
if x == y {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ProcessSize - this function processes size so that we can calculate bytes BytesProcessed.
|
||||
func ProcessSize(myrecord []string) int64 {
|
||||
if len(myrecord) > 0 {
|
||||
var size int64
|
||||
size = int64(len(myrecord)-1) + 1
|
||||
for i := range myrecord {
|
||||
size += int64(len(myrecord[i]))
|
||||
}
|
||||
|
||||
return size
|
||||
}
|
||||
return 0
|
||||
}
|
||||
200
pkg/s3select/format/json/json.go
Normal file
200
pkg/s3select/format/json/json.go
Normal file
@@ -0,0 +1,200 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"compress/bzip2"
|
||||
"encoding/json"
|
||||
"encoding/xml"
|
||||
"io"
|
||||
|
||||
jsoniter "github.com/json-iterator/go"
|
||||
gzip "github.com/klauspost/pgzip"
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
)
|
||||
|
||||
// Options options are passed to the underlying encoding/json reader.
|
||||
type Options struct {
|
||||
|
||||
// Name of the table that is used for querying
|
||||
Name string
|
||||
|
||||
// ReadFrom is where the data will be read from.
|
||||
ReadFrom io.Reader
|
||||
|
||||
// If true then we need to add gzip or bzip reader.
|
||||
// to extract the csv.
|
||||
Compressed string
|
||||
|
||||
// SQL expression meant to be evaluated.
|
||||
Expression string
|
||||
|
||||
// What the outputted will be delimited by .
|
||||
RecordDelimiter string
|
||||
|
||||
// Size of incoming object
|
||||
StreamSize int64
|
||||
|
||||
// True if Type is DOCUMENTS
|
||||
Type bool
|
||||
|
||||
// Progress enabled, enable/disable progress messages.
|
||||
Progress bool
|
||||
}
|
||||
|
||||
// jinput represents a record producing input from a formatted file or pipe.
|
||||
type jinput struct {
|
||||
options *Options
|
||||
reader *jsoniter.Decoder
|
||||
firstRow []string
|
||||
header []string
|
||||
minOutputLength int
|
||||
stats struct {
|
||||
BytesScanned int64
|
||||
BytesReturned int64
|
||||
BytesProcessed int64
|
||||
}
|
||||
}
|
||||
|
||||
// New sets up a new, the first Json is read when this is run.
|
||||
// If there is a problem with reading the first Json, the error is returned.
|
||||
// Otherwise, the returned reader can be reliably consumed with jsonRead()
|
||||
// until jsonRead() returns nil.
|
||||
func New(opts *Options) (format.Select, error) {
|
||||
myReader := opts.ReadFrom
|
||||
var tempBytesScanned int64
|
||||
tempBytesScanned = 0
|
||||
switch opts.Compressed {
|
||||
case "GZIP":
|
||||
tempBytesScanned = opts.StreamSize
|
||||
var err error
|
||||
if myReader, err = gzip.NewReader(opts.ReadFrom); err != nil {
|
||||
return nil, format.ErrTruncatedInput
|
||||
}
|
||||
case "BZIP2":
|
||||
tempBytesScanned = opts.StreamSize
|
||||
myReader = bzip2.NewReader(opts.ReadFrom)
|
||||
}
|
||||
|
||||
reader := &jinput{
|
||||
options: opts,
|
||||
reader: jsoniter.NewDecoder(myReader),
|
||||
}
|
||||
reader.stats.BytesScanned = tempBytesScanned
|
||||
reader.stats.BytesProcessed = 0
|
||||
reader.stats.BytesReturned = 0
|
||||
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
// Progress - return true if progress was requested.
|
||||
func (reader *jinput) Progress() bool {
|
||||
return reader.options.Progress
|
||||
}
|
||||
|
||||
// UpdateBytesProcessed - populates the bytes Processed
|
||||
func (reader *jinput) UpdateBytesProcessed(record map[string]interface{}) {
|
||||
out, _ := json.Marshal(record)
|
||||
reader.stats.BytesProcessed += int64(len(out))
|
||||
}
|
||||
|
||||
// Read the file and returns map[string]interface{}
|
||||
func (reader *jinput) Read() (map[string]interface{}, error) {
|
||||
dec := reader.reader
|
||||
var record interface{}
|
||||
for {
|
||||
err := dec.Decode(&record)
|
||||
if err == io.EOF || err == io.ErrClosedPipe {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, format.ErrJSONParsingError
|
||||
}
|
||||
return record.(map[string]interface{}), nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// OutputFieldDelimiter - returns the delimiter specified in input request
|
||||
func (reader *jinput) OutputFieldDelimiter() string {
|
||||
return ","
|
||||
}
|
||||
|
||||
// HasHeader - returns true or false depending upon the header.
|
||||
func (reader *jinput) HasHeader() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// Expression - return the Select Expression for
|
||||
func (reader *jinput) Expression() string {
|
||||
return reader.options.Expression
|
||||
}
|
||||
|
||||
// UpdateBytesReturned - updates the Bytes returned for
|
||||
func (reader *jinput) UpdateBytesReturned(size int64) {
|
||||
reader.stats.BytesReturned += size
|
||||
}
|
||||
|
||||
// Header returns a nil in case of
|
||||
func (reader *jinput) Header() []string {
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateStatXML is the function which does the marshaling from the stat
|
||||
// structs into XML so that the progress and stat message can be sent
|
||||
func (reader *jinput) CreateStatXML() (string, error) {
|
||||
if reader.options.Compressed == "NONE" {
|
||||
reader.stats.BytesProcessed = reader.options.StreamSize
|
||||
reader.stats.BytesScanned = reader.stats.BytesProcessed
|
||||
}
|
||||
out, err := xml.Marshal(&format.Stats{
|
||||
BytesScanned: reader.stats.BytesScanned,
|
||||
BytesProcessed: reader.stats.BytesProcessed,
|
||||
BytesReturned: reader.stats.BytesReturned,
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return xml.Header + string(out), nil
|
||||
}
|
||||
|
||||
// CreateProgressXML is the function which does the marshaling from the progress
|
||||
// structs into XML so that the progress and stat message can be sent
|
||||
func (reader *jinput) CreateProgressXML() (string, error) {
|
||||
if !(reader.options.Compressed != "NONE") {
|
||||
reader.stats.BytesScanned = reader.stats.BytesProcessed
|
||||
}
|
||||
out, err := xml.Marshal(&format.Progress{
|
||||
BytesScanned: reader.stats.BytesScanned,
|
||||
BytesProcessed: reader.stats.BytesProcessed,
|
||||
BytesReturned: reader.stats.BytesReturned,
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return xml.Header + string(out), nil
|
||||
}
|
||||
|
||||
// Type - return the data format type {
|
||||
func (reader *jinput) Type() format.Type {
|
||||
return format.JSON
|
||||
}
|
||||
|
||||
// ColNameErrs - this is a dummy function for JSON input type.
|
||||
func (reader *jinput) ColNameErrs(columnNames []string) error {
|
||||
return nil
|
||||
}
|
||||
63
pkg/s3select/format/select.go
Normal file
63
pkg/s3select/format/select.go
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package format
|
||||
|
||||
import "encoding/xml"
|
||||
|
||||
// Select Interface helper methods, implementing features needed for
|
||||
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
|
||||
type Select interface {
|
||||
Type() Type
|
||||
Read() (map[string]interface{}, error)
|
||||
Header() []string
|
||||
HasHeader() bool
|
||||
OutputFieldDelimiter() string
|
||||
UpdateBytesProcessed(record map[string]interface{})
|
||||
Expression() string
|
||||
UpdateBytesReturned(int64)
|
||||
CreateStatXML() (string, error)
|
||||
CreateProgressXML() (string, error)
|
||||
ColNameErrs(columnNames []string) error
|
||||
Progress() bool
|
||||
}
|
||||
|
||||
// Progress represents a struct that represents the format for XML of the
|
||||
// progress messages
|
||||
type Progress struct {
|
||||
XMLName xml.Name `xml:"Progress" json:"-"`
|
||||
BytesScanned int64 `xml:"BytesScanned"`
|
||||
BytesProcessed int64 `xml:"BytesProcessed"`
|
||||
BytesReturned int64 `xml:"BytesReturned"`
|
||||
}
|
||||
|
||||
// Stats represents a struct that represents the format for XML of the stat
|
||||
// messages
|
||||
type Stats struct {
|
||||
XMLName xml.Name `xml:"Stats" json:"-"`
|
||||
BytesScanned int64 `xml:"BytesScanned"`
|
||||
BytesProcessed int64 `xml:"BytesProcessed"`
|
||||
BytesReturned int64 `xml:"BytesReturned"`
|
||||
}
|
||||
|
||||
// Type different types of support data format types.
|
||||
type Type string
|
||||
|
||||
// Different data format types.
|
||||
const (
|
||||
JSON Type = "json"
|
||||
CSV Type = "csv"
|
||||
)
|
||||
@@ -17,94 +17,58 @@
|
||||
package s3select
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/xwb1989/sqlparser"
|
||||
)
|
||||
|
||||
// MaxExpressionLength - 256KiB
|
||||
const MaxExpressionLength = 256 * 1024
|
||||
|
||||
// This function processes size so that we can calculate bytes BytesProcessed.
|
||||
func processSize(myrecord []string) int64 {
|
||||
if len(myrecord) > 0 {
|
||||
var size int64
|
||||
size = int64(len(myrecord)-1) + 1
|
||||
for i := range myrecord {
|
||||
size += int64(len(myrecord[i]))
|
||||
}
|
||||
|
||||
return size
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// This function finds whether a string is in a list
|
||||
func stringInSlice(x string, list []string) bool {
|
||||
for _, y := range list {
|
||||
if x == y {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// This function returns the index of a string in a list
|
||||
func stringIndex(a string, list []string) int {
|
||||
for i, v := range list {
|
||||
if v == a {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// Returns a true or false, whether a string can be represented as an int.
|
||||
func representsInt(s string) bool {
|
||||
_, err := strconv.Atoi(s)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// The function below processes the where clause into an acutal boolean given a
|
||||
// row
|
||||
func matchesMyWhereClause(row []string, columnNames map[string]int, alias string, whereClause interface{}) (bool, error) {
|
||||
// This particular logic deals with the details of casting, e.g if we have to
|
||||
// cast a column of string numbers into int's for comparison.
|
||||
// matchesMyWhereClause takes map[string]interfaces{} , process the where clause and returns true if the row suffices
|
||||
func matchesMyWhereClause(record map[string]interface{}, alias string, whereClause interface{}) (bool, error) {
|
||||
var conversionColumn string
|
||||
var operator string
|
||||
var operand interface{}
|
||||
if fmt.Sprintf("%v", whereClause) == "false" {
|
||||
return false, nil
|
||||
}
|
||||
out, err := json.Marshal(record)
|
||||
if err != nil {
|
||||
return false, ErrExternalEvalException
|
||||
}
|
||||
switch expr := whereClause.(type) {
|
||||
case *sqlparser.IsExpr:
|
||||
return evaluateIsExpr(expr, row, columnNames, alias)
|
||||
return evaluateIsExpr(expr, string(out), alias)
|
||||
case *sqlparser.RangeCond:
|
||||
operator = expr.Operator
|
||||
if operator != "between" && operator != "not between" {
|
||||
return false, ErrUnsupportedSQLOperation
|
||||
}
|
||||
if operator == "not between" {
|
||||
myResult, err := evaluateBetween(expr, alias, row, columnNames)
|
||||
result, err := evaluateBetween(expr, alias, string(out))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return !myResult, nil
|
||||
return !result, nil
|
||||
}
|
||||
myResult, err := evaluateBetween(expr, alias, row, columnNames)
|
||||
result, err := evaluateBetween(expr, alias, string(out))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return myResult, nil
|
||||
return result, nil
|
||||
case *sqlparser.ComparisonExpr:
|
||||
operator = expr.Operator
|
||||
switch right := expr.Right.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
operand = evaluateFuncExpr(right, "", row, columnNames)
|
||||
operand = evaluateFuncExpr(right, "", string(out))
|
||||
case *sqlparser.SQLVal:
|
||||
var err error
|
||||
operand, err = evaluateParserType(right)
|
||||
@@ -116,29 +80,22 @@ func matchesMyWhereClause(row []string, columnNames map[string]int, alias string
|
||||
myVal = ""
|
||||
switch left := expr.Left.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
myVal = evaluateFuncExpr(left, "", row, columnNames)
|
||||
myVal = evaluateFuncExpr(left, "", string(out))
|
||||
conversionColumn = ""
|
||||
case *sqlparser.ColName:
|
||||
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
|
||||
}
|
||||
if representsInt(conversionColumn) {
|
||||
intCol, err := strconv.Atoi(conversionColumn)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
// Subtract 1 out because the index starts at 1 for Amazon instead of 0.
|
||||
return evaluateOperator(row[intCol-1], operator, operand)
|
||||
conversionColumn = left.Name.CompliantName()
|
||||
}
|
||||
|
||||
if myVal != "" {
|
||||
return evaluateOperator(myVal, operator, operand)
|
||||
}
|
||||
return evaluateOperator(row[columnNames[conversionColumn]], operator, operand)
|
||||
return evaluateOperator(jsonValue(conversionColumn, string(out)), operator, operand)
|
||||
case *sqlparser.AndExpr:
|
||||
var leftVal bool
|
||||
var rightVal bool
|
||||
switch left := expr.Left.(type) {
|
||||
case *sqlparser.ComparisonExpr:
|
||||
temp, err := matchesMyWhereClause(row, columnNames, alias, left)
|
||||
temp, err := matchesMyWhereClause(record, alias, left)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
@@ -146,7 +103,7 @@ func matchesMyWhereClause(row []string, columnNames map[string]int, alias string
|
||||
}
|
||||
switch right := expr.Right.(type) {
|
||||
case *sqlparser.ComparisonExpr:
|
||||
temp, err := matchesMyWhereClause(row, columnNames, alias, right)
|
||||
temp, err := matchesMyWhereClause(record, alias, right)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
@@ -158,18 +115,18 @@ func matchesMyWhereClause(row []string, columnNames map[string]int, alias string
|
||||
var rightVal bool
|
||||
switch left := expr.Left.(type) {
|
||||
case *sqlparser.ComparisonExpr:
|
||||
leftVal, _ = matchesMyWhereClause(row, columnNames, alias, left)
|
||||
leftVal, _ = matchesMyWhereClause(record, alias, left)
|
||||
|
||||
}
|
||||
switch right := expr.Right.(type) {
|
||||
case *sqlparser.ComparisonExpr:
|
||||
rightVal, _ = matchesMyWhereClause(row, columnNames, alias, right)
|
||||
rightVal, _ = matchesMyWhereClause(record, alias, right)
|
||||
}
|
||||
return (rightVal || leftVal), nil
|
||||
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func applyStrFunc(rawArg string, funcName string) string {
|
||||
switch strings.ToUpper(funcName) {
|
||||
case "TRIM":
|
||||
@@ -192,6 +149,135 @@ func applyStrFunc(rawArg string, funcName string) string {
|
||||
|
||||
}
|
||||
|
||||
// evaluateBetween is a function which evaluates a Between Clause.
|
||||
func evaluateBetween(betweenExpr *sqlparser.RangeCond, alias string, record string) (bool, error) {
|
||||
var colToVal interface{}
|
||||
var colFromVal interface{}
|
||||
var conversionColumn string
|
||||
var funcName string
|
||||
switch colTo := betweenExpr.To.(type) {
|
||||
case sqlparser.Expr:
|
||||
switch colToMyVal := colTo.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
colToVal = stringOps(colToMyVal, record, "")
|
||||
case *sqlparser.SQLVal:
|
||||
var err error
|
||||
colToVal, err = evaluateParserType(colToMyVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
}
|
||||
switch colFrom := betweenExpr.From.(type) {
|
||||
case sqlparser.Expr:
|
||||
switch colFromMyVal := colFrom.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
colFromVal = stringOps(colFromMyVal, record, "")
|
||||
case *sqlparser.SQLVal:
|
||||
var err error
|
||||
colFromVal, err = evaluateParserType(colFromMyVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
}
|
||||
var myFuncVal string
|
||||
switch left := betweenExpr.Left.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
myFuncVal = evaluateFuncExpr(left, "", record)
|
||||
conversionColumn = ""
|
||||
case *sqlparser.ColName:
|
||||
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
|
||||
}
|
||||
toGreater, err := evaluateOperator(fmt.Sprintf("%v", colToVal), ">", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if toGreater {
|
||||
return evalBetweenGreater(conversionColumn, record, funcName, colFromVal, colToVal, myFuncVal)
|
||||
}
|
||||
return evalBetweenLess(conversionColumn, record, funcName, colFromVal, colToVal, myFuncVal)
|
||||
}
|
||||
|
||||
// evalBetweenGreater is a function which evaluates the between given that the
|
||||
// TO is > than the FROM.
|
||||
func evalBetweenGreater(conversionColumn string, record string, funcName string, colFromVal interface{}, colToVal interface{}, myColVal string) (bool, error) {
|
||||
if format.IsInt(conversionColumn) {
|
||||
myVal, err := evaluateOperator(jsonValue("_"+conversionColumn, record), ">=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(jsonValue("_"+conversionColumn, record)))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
if myColVal != "" {
|
||||
myVal, err := evaluateOperator(myColVal, ">=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(myColVal))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
myVal, err := evaluateOperator(jsonValue(conversionColumn, record), ">=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(jsonValue(conversionColumn, record)))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
|
||||
// evalBetweenLess is a function which evaluates the between given that the
|
||||
// FROM is > than the TO.
|
||||
func evalBetweenLess(conversionColumn string, record string, funcName string, colFromVal interface{}, colToVal interface{}, myColVal string) (bool, error) {
|
||||
if format.IsInt(conversionColumn) {
|
||||
// Subtract 1 out because the index starts at 1 for Amazon instead of 0.
|
||||
myVal, err := evaluateOperator(jsonValue("_"+conversionColumn, record), "<=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(jsonValue("_"+conversionColumn, record)))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
if myColVal != "" {
|
||||
myVal, err := evaluateOperator(myColVal, "<=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(myColVal))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
myVal, err := evaluateOperator(jsonValue(conversionColumn, record), "<=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(jsonValue(conversionColumn, record)))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
|
||||
// This is a really important function it actually evaluates the boolean
|
||||
// statement and therefore actually returns a bool, it functions as the lowest
|
||||
// level of the state machine.
|
||||
@@ -432,151 +518,16 @@ func cleanCol(myCol string, alias string) string {
|
||||
return myCol
|
||||
}
|
||||
|
||||
// evaluateBetween is a function which evaluates a Between Clause.
|
||||
func evaluateBetween(betweenExpr *sqlparser.RangeCond, alias string, record []string, columnNames map[string]int) (bool, error) {
|
||||
var colToVal interface{}
|
||||
var colFromVal interface{}
|
||||
var conversionColumn string
|
||||
var funcName string
|
||||
switch colTo := betweenExpr.To.(type) {
|
||||
case sqlparser.Expr:
|
||||
switch colToMyVal := colTo.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
var temp string
|
||||
temp = stringOps(colToMyVal, record, "", columnNames)
|
||||
colToVal = []byte(temp)
|
||||
case *sqlparser.SQLVal:
|
||||
var err error
|
||||
colToVal, err = evaluateParserType(colToMyVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
}
|
||||
switch colFrom := betweenExpr.From.(type) {
|
||||
case sqlparser.Expr:
|
||||
switch colFromMyVal := colFrom.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
colFromVal = stringOps(colFromMyVal, record, "", columnNames)
|
||||
case *sqlparser.SQLVal:
|
||||
var err error
|
||||
colFromVal, err = evaluateParserType(colFromMyVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
}
|
||||
var myFuncVal string
|
||||
myFuncVal = ""
|
||||
switch left := betweenExpr.Left.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
myFuncVal = evaluateFuncExpr(left, "", record, columnNames)
|
||||
conversionColumn = ""
|
||||
case *sqlparser.ColName:
|
||||
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
|
||||
}
|
||||
|
||||
toGreater, err := evaluateOperator(fmt.Sprintf("%v", colToVal), ">", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if toGreater {
|
||||
return evalBetweenGreater(conversionColumn, record, funcName, columnNames, colFromVal, colToVal, myFuncVal)
|
||||
}
|
||||
return evalBetweenLess(conversionColumn, record, funcName, columnNames, colFromVal, colToVal, myFuncVal)
|
||||
}
|
||||
|
||||
// evalBetweenLess is a function which evaluates the between given that the
|
||||
// FROM is > than the TO.
|
||||
func evalBetweenLess(conversionColumn string, record []string, funcName string, columnNames map[string]int, colFromVal interface{}, colToVal interface{}, myCoalVal string) (bool, error) {
|
||||
if representsInt(conversionColumn) {
|
||||
myIndex, _ := strconv.Atoi(conversionColumn)
|
||||
// Subtract 1 out because the index starts at 1 for Amazon instead of 0.
|
||||
myVal, err := evaluateOperator(record[myIndex-1], "<=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(record[myIndex-1]))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
if myCoalVal != "" {
|
||||
myVal, err := evaluateOperator(myCoalVal, "<=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(myCoalVal))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
myVal, err := evaluateOperator(record[columnNames[conversionColumn]], "<=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(record[columnNames[conversionColumn]]))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
|
||||
// evalBetweenGreater is a function which evaluates the between given that the
|
||||
// TO is > than the FROM.
|
||||
func evalBetweenGreater(conversionColumn string, record []string, funcName string, columnNames map[string]int, colFromVal interface{}, colToVal interface{}, myCoalVal string) (bool, error) {
|
||||
if representsInt(conversionColumn) {
|
||||
myIndex, _ := strconv.Atoi(conversionColumn)
|
||||
myVal, err := evaluateOperator(record[myIndex-1], ">=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(record[myIndex-1]))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
if myCoalVal != "" {
|
||||
myVal, err := evaluateOperator(myCoalVal, ">=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(myCoalVal))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
myVal, err := evaluateOperator(record[columnNames[conversionColumn]], ">=", colFromVal)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
var myOtherVal bool
|
||||
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(record[columnNames[conversionColumn]]))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return (myVal && myOtherVal), nil
|
||||
}
|
||||
|
||||
// whereClauseNameErrs is a function which returns an error if there is a column
|
||||
// in the where clause which does not exist.
|
||||
func (reader *Input) whereClauseNameErrs(whereClause interface{}, alias string) error {
|
||||
func whereClauseNameErrs(whereClause interface{}, alias string, f format.Select) error {
|
||||
var conversionColumn string
|
||||
switch expr := whereClause.(type) {
|
||||
// case for checking errors within a clause of the form "col_name is ..."
|
||||
case *sqlparser.IsExpr:
|
||||
switch myCol := expr.Expr.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
if err := reader.evaluateFuncErr(myCol); err != nil {
|
||||
if err := evaluateFuncErr(myCol, f); err != nil {
|
||||
return err
|
||||
}
|
||||
case *sqlparser.ColName:
|
||||
@@ -585,7 +536,7 @@ func (reader *Input) whereClauseNameErrs(whereClause interface{}, alias string)
|
||||
case *sqlparser.RangeCond:
|
||||
switch left := expr.Left.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
if err := reader.evaluateFuncErr(left); err != nil {
|
||||
if err := evaluateFuncErr(left, f); err != nil {
|
||||
return err
|
||||
}
|
||||
case *sqlparser.ColName:
|
||||
@@ -594,7 +545,7 @@ func (reader *Input) whereClauseNameErrs(whereClause interface{}, alias string)
|
||||
case *sqlparser.ComparisonExpr:
|
||||
switch left := expr.Left.(type) {
|
||||
case *sqlparser.FuncExpr:
|
||||
if err := reader.evaluateFuncErr(left); err != nil {
|
||||
if err := evaluateFuncErr(left, f); err != nil {
|
||||
return err
|
||||
}
|
||||
case *sqlparser.ColName:
|
||||
@@ -603,54 +554,30 @@ func (reader *Input) whereClauseNameErrs(whereClause interface{}, alias string)
|
||||
case *sqlparser.AndExpr:
|
||||
switch left := expr.Left.(type) {
|
||||
case *sqlparser.ComparisonExpr:
|
||||
return reader.whereClauseNameErrs(left, alias)
|
||||
return whereClauseNameErrs(left, alias, f)
|
||||
}
|
||||
switch right := expr.Right.(type) {
|
||||
case *sqlparser.ComparisonExpr:
|
||||
return reader.whereClauseNameErrs(right, alias)
|
||||
return whereClauseNameErrs(right, alias, f)
|
||||
}
|
||||
case *sqlparser.OrExpr:
|
||||
switch left := expr.Left.(type) {
|
||||
case *sqlparser.ComparisonExpr:
|
||||
return reader.whereClauseNameErrs(left, alias)
|
||||
return whereClauseNameErrs(left, alias, f)
|
||||
}
|
||||
switch right := expr.Right.(type) {
|
||||
case *sqlparser.ComparisonExpr:
|
||||
return reader.whereClauseNameErrs(right, alias)
|
||||
return whereClauseNameErrs(right, alias, f)
|
||||
}
|
||||
}
|
||||
if conversionColumn != "" {
|
||||
return reader.colNameErrs([]string{conversionColumn})
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// colNameErrs is a function which makes sure that the headers are requested are
|
||||
// present in the file otherwise it throws an error.
|
||||
func (reader *Input) colNameErrs(columnNames []string) error {
|
||||
for i := 0; i < len(columnNames); i++ {
|
||||
if columnNames[i] == "" {
|
||||
continue
|
||||
}
|
||||
if !representsInt(columnNames[i]) && !reader.options.HeaderOpt {
|
||||
return ErrInvalidColumnIndex
|
||||
}
|
||||
if representsInt(columnNames[i]) {
|
||||
tempInt, _ := strconv.Atoi(columnNames[i])
|
||||
if tempInt > len(reader.Header()) || tempInt == 0 {
|
||||
return ErrInvalidColumnIndex
|
||||
}
|
||||
} else {
|
||||
if reader.options.HeaderOpt && !stringInSlice(columnNames[i], reader.Header()) {
|
||||
return ErrMissingHeaders
|
||||
}
|
||||
}
|
||||
return f.ColNameErrs([]string{conversionColumn})
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// aggFuncToStr converts an array of floats into a properly formatted string.
|
||||
func (reader *Input) aggFuncToStr(aggVals []float64) string {
|
||||
func aggFuncToStr(aggVals []float64, f format.Select) string {
|
||||
// Define a number formatting function
|
||||
numToStr := func(f float64) string {
|
||||
if f == math.Trunc(f) {
|
||||
@@ -666,7 +593,7 @@ func (reader *Input) aggFuncToStr(aggVals []float64) string {
|
||||
}
|
||||
|
||||
// Intersperse field delimiter
|
||||
return strings.Join(vals, reader.options.OutputFieldDelimiter)
|
||||
return strings.Join(vals, f.OutputFieldDelimiter())
|
||||
}
|
||||
|
||||
// checkForDuplicates ensures we do not have an ambigious column name.
|
||||
@@ -714,18 +641,18 @@ func evaluateParserType(col *sqlparser.SQLVal) (interface{}, error) {
|
||||
|
||||
// parseErrs is the function which handles all the errors that could occur
|
||||
// through use of function arguments such as column names in NULLIF
|
||||
func (reader *Input) parseErrs(columnNames []string, whereClause interface{}, alias string, myFuncs *SelectFuncs) error {
|
||||
func parseErrs(columnNames []string, whereClause interface{}, alias string, myFuncs *SelectFuncs, f format.Select) error {
|
||||
// Below code cleans up column names.
|
||||
reader.processColumnNames(columnNames, alias)
|
||||
processColumnNames(columnNames, alias, f)
|
||||
if columnNames[0] != "*" {
|
||||
if err := reader.colNameErrs(columnNames); err != nil {
|
||||
if err := f.ColNameErrs(columnNames); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// Below code ensures the whereClause has no errors.
|
||||
if whereClause != nil {
|
||||
tempClause := whereClause
|
||||
if err := reader.whereClauseNameErrs(tempClause, alias); err != nil {
|
||||
if err := whereClauseNameErrs(tempClause, alias, f); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -733,9 +660,16 @@ func (reader *Input) parseErrs(columnNames []string, whereClause interface{}, al
|
||||
if myFuncs.funcExpr[i] == nil {
|
||||
continue
|
||||
}
|
||||
if err := reader.evaluateFuncErr(myFuncs.funcExpr[i]); err != nil {
|
||||
if err := evaluateFuncErr(myFuncs.funcExpr[i], f); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// It return the value corresponding to the tag in Json .
|
||||
// Input is the Key and row is the JSON string
|
||||
func jsonValue(input string, row string) string {
|
||||
value := gjson.Get(row, input)
|
||||
return value.String()
|
||||
}
|
||||
|
||||
@@ -18,18 +18,14 @@ package s3select
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/bzip2"
|
||||
"encoding/csv"
|
||||
"encoding/xml"
|
||||
"io"
|
||||
"strconv"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"net/http"
|
||||
|
||||
gzip "github.com/klauspost/pgzip"
|
||||
"github.com/minio/minio/pkg/ioutil"
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
"github.com/minio/minio/pkg/s3select/format/csv"
|
||||
"github.com/minio/minio/pkg/s3select/format/json"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -40,245 +36,16 @@ const (
|
||||
continuationTime time.Duration = 5 * time.Second
|
||||
)
|
||||
|
||||
// progress represents a struct that represents the format for XML of the
|
||||
// progress messages
|
||||
type progress struct {
|
||||
XMLName xml.Name `xml:"Progress" json:"-"`
|
||||
BytesScanned int64 `xml:"BytesScanned"`
|
||||
BytesProcessed int64 `xml:"BytesProcessed"`
|
||||
BytesReturned int64 `xml:"BytesReturned"`
|
||||
}
|
||||
|
||||
// stats represents a struct that represents the format for XML of the stat
|
||||
// messages
|
||||
type stats struct {
|
||||
XMLName xml.Name `xml:"Stats" json:"-"`
|
||||
BytesScanned int64 `xml:"BytesScanned"`
|
||||
BytesProcessed int64 `xml:"BytesProcessed"`
|
||||
BytesReturned int64 `xml:"BytesReturned"`
|
||||
}
|
||||
|
||||
// StatInfo is a struct that represents the
|
||||
type statInfo struct {
|
||||
BytesScanned int64
|
||||
BytesReturned int64
|
||||
BytesProcessed int64
|
||||
}
|
||||
|
||||
// Input represents a record producing input from a formatted file or pipe.
|
||||
type Input struct {
|
||||
options *Options
|
||||
reader *csv.Reader
|
||||
firstRow []string
|
||||
header []string
|
||||
minOutputLength int
|
||||
stats *statInfo
|
||||
}
|
||||
|
||||
// Options options are passed to the underlying encoding/csv reader.
|
||||
type Options struct {
|
||||
// HasHeader when true, will treat the first row as a header row.
|
||||
HasHeader bool
|
||||
|
||||
// RecordDelimiter is the string that records are delimited by.
|
||||
RecordDelimiter string
|
||||
|
||||
// FieldDelimiter is the string that fields are delimited by.
|
||||
FieldDelimiter string
|
||||
|
||||
// Comments is the string the first character of a line of
|
||||
// text matches the comment character.
|
||||
Comments string
|
||||
|
||||
// Name of the table that is used for querying
|
||||
Name string
|
||||
|
||||
// ReadFrom is where the data will be read from.
|
||||
ReadFrom io.Reader
|
||||
|
||||
// If true then we need to add gzip or bzip reader.
|
||||
// to extract the csv.
|
||||
Compressed string
|
||||
|
||||
// SQL expression meant to be evaluated.
|
||||
Expression string
|
||||
|
||||
// What the outputted CSV will be delimited by .
|
||||
OutputFieldDelimiter string
|
||||
|
||||
// Size of incoming object
|
||||
StreamSize int64
|
||||
|
||||
// Whether Header is "USE" or another
|
||||
HeaderOpt bool
|
||||
|
||||
// Progress enabled, enable/disable progress messages.
|
||||
Progress bool
|
||||
}
|
||||
|
||||
// NewInput sets up a new Input, the first row is read when this is run.
|
||||
// If there is a problem with reading the first row, the error is returned.
|
||||
// Otherwise, the returned reader can be reliably consumed with ReadRecord()
|
||||
// until ReadRecord() returns nil.
|
||||
func NewInput(opts *Options) (*Input, error) {
|
||||
myReader := opts.ReadFrom
|
||||
var tempBytesScanned int64
|
||||
tempBytesScanned = 0
|
||||
switch opts.Compressed {
|
||||
case "GZIP":
|
||||
tempBytesScanned = opts.StreamSize
|
||||
var err error
|
||||
if myReader, err = gzip.NewReader(opts.ReadFrom); err != nil {
|
||||
return nil, ErrTruncatedInput
|
||||
}
|
||||
case "BZIP2":
|
||||
tempBytesScanned = opts.StreamSize
|
||||
myReader = bzip2.NewReader(opts.ReadFrom)
|
||||
}
|
||||
|
||||
// DelimitedReader treats custom record delimiter like `\r\n`,`\r`,`ab` etc and replaces it with `\n`.
|
||||
normalizedReader := ioutil.NewDelimitedReader(myReader, []rune(opts.RecordDelimiter))
|
||||
progress := &statInfo{
|
||||
BytesScanned: tempBytesScanned,
|
||||
BytesProcessed: 0,
|
||||
BytesReturned: 0,
|
||||
}
|
||||
|
||||
reader := &Input{
|
||||
options: opts,
|
||||
reader: csv.NewReader(normalizedReader),
|
||||
stats: progress,
|
||||
}
|
||||
reader.firstRow = nil
|
||||
|
||||
reader.reader.FieldsPerRecord = -1
|
||||
if reader.options.FieldDelimiter != "" {
|
||||
reader.reader.Comma = rune(reader.options.FieldDelimiter[0])
|
||||
}
|
||||
|
||||
if reader.options.Comments != "" {
|
||||
reader.reader.Comment = rune(reader.options.Comments[0])
|
||||
}
|
||||
|
||||
// QuoteCharacter - " (defaulted currently)
|
||||
reader.reader.LazyQuotes = true
|
||||
|
||||
if err := reader.readHeader(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
// ReadRecord reads a single record from the . Always returns successfully.
|
||||
// If the record is empty, an empty []string is returned.
|
||||
// Record expand to match the current row size, adding blank fields as needed.
|
||||
// Records never return less then the number of fields in the first row.
|
||||
// Returns nil on EOF
|
||||
// In the event of a parse error due to an invalid record, it is logged, and
|
||||
// an empty []string is returned with the number of fields in the first row,
|
||||
// as if the record were empty.
|
||||
//
|
||||
// In general, this is a very tolerant of problems reader.
|
||||
func (reader *Input) ReadRecord() []string {
|
||||
var row []string
|
||||
var fileErr error
|
||||
|
||||
if reader.firstRow != nil {
|
||||
row = reader.firstRow
|
||||
reader.firstRow = nil
|
||||
return row
|
||||
}
|
||||
|
||||
row, fileErr = reader.reader.Read()
|
||||
emptysToAppend := reader.minOutputLength - len(row)
|
||||
if fileErr == io.EOF || fileErr == io.ErrClosedPipe {
|
||||
return nil
|
||||
} else if _, ok := fileErr.(*csv.ParseError); ok {
|
||||
emptysToAppend = reader.minOutputLength
|
||||
}
|
||||
|
||||
if emptysToAppend > 0 {
|
||||
for counter := 0; counter < emptysToAppend; counter++ {
|
||||
row = append(row, "")
|
||||
}
|
||||
}
|
||||
|
||||
return row
|
||||
}
|
||||
|
||||
// readHeader reads the header into the header variable if the header is present
|
||||
// as the first row of the csv
|
||||
func (reader *Input) readHeader() error {
|
||||
var readErr error
|
||||
if reader.options.HasHeader {
|
||||
reader.firstRow, readErr = reader.reader.Read()
|
||||
if readErr != nil {
|
||||
return ErrCSVParsingError
|
||||
}
|
||||
reader.header = cleanHeader(reader.firstRow)
|
||||
reader.firstRow = nil
|
||||
reader.minOutputLength = len(reader.header)
|
||||
} else {
|
||||
reader.firstRow, readErr = reader.reader.Read()
|
||||
reader.header = make([]string, len(reader.firstRow))
|
||||
for i := 0; i < reader.minOutputLength; i++ {
|
||||
reader.header[i] = strconv.Itoa(i)
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Replace the spaces in columnnames with underscores
|
||||
func cleanHeader(columns []string) []string {
|
||||
for i := 0; i < len(columns); i++ {
|
||||
columns[i] = strings.Replace(columns[i], " ", "_", -1)
|
||||
}
|
||||
return columns
|
||||
}
|
||||
|
||||
// createStatXML is the function which does the marshaling from the stat
|
||||
// structs into XML so that the progress and stat message can be sent
|
||||
func (reader *Input) createStatXML() (string, error) {
|
||||
if reader.options.Compressed == "NONE" {
|
||||
reader.stats.BytesProcessed = reader.options.StreamSize
|
||||
reader.stats.BytesScanned = reader.stats.BytesProcessed
|
||||
}
|
||||
out, err := xml.Marshal(&stats{
|
||||
BytesScanned: reader.stats.BytesScanned,
|
||||
BytesProcessed: reader.stats.BytesProcessed,
|
||||
BytesReturned: reader.stats.BytesReturned,
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return xml.Header + string(out), nil
|
||||
}
|
||||
|
||||
// createProgressXML is the function which does the marshaling from the progress structs into XML so that the progress and stat message can be sent
|
||||
func (reader *Input) createProgressXML() (string, error) {
|
||||
if reader.options.HasHeader {
|
||||
reader.stats.BytesProcessed += processSize(reader.header)
|
||||
}
|
||||
if reader.options.Compressed == "NONE" {
|
||||
reader.stats.BytesScanned = reader.stats.BytesProcessed
|
||||
}
|
||||
out, err := xml.Marshal(&progress{
|
||||
BytesScanned: reader.stats.BytesScanned,
|
||||
BytesProcessed: reader.stats.BytesProcessed,
|
||||
BytesReturned: reader.stats.BytesReturned,
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return xml.Header + string(out), nil
|
||||
}
|
||||
|
||||
// Header returns the header of the reader. Either the first row if a header
|
||||
// set in the options, or c#, where # is the column number, starting with 0.
|
||||
func (reader *Input) Header() []string {
|
||||
return reader.header
|
||||
// ParseSelectTokens tokenizes the select query into required Columns, Alias, limit value
|
||||
// where clause, aggregate functions, myFunctions, error.
|
||||
type ParseSelectTokens struct {
|
||||
reqCols []string
|
||||
alias string
|
||||
myLimit int64
|
||||
whereClause interface{}
|
||||
aggFunctionNames []string
|
||||
myFuncs *SelectFuncs
|
||||
myErr error
|
||||
}
|
||||
|
||||
// Row is a Struct for keeping track of key aspects of a row.
|
||||
@@ -287,10 +54,58 @@ type Row struct {
|
||||
err error
|
||||
}
|
||||
|
||||
// This function replaces "",'' with `` for the select parser
|
||||
func cleanExpr(expr string) string {
|
||||
r := strings.NewReplacer("\"", "`", "'", "`")
|
||||
return r.Replace(expr)
|
||||
}
|
||||
|
||||
// New - initialize new select format
|
||||
func New(gr io.Reader, size int64, req ObjectSelectRequest) (s3s format.Select, err error) {
|
||||
// Initializating options for CSV
|
||||
if req.InputSerialization.CSV != nil {
|
||||
if req.OutputSerialization.CSV.FieldDelimiter == "" {
|
||||
req.OutputSerialization.CSV.FieldDelimiter = ","
|
||||
}
|
||||
if req.InputSerialization.CSV.FileHeaderInfo == "" {
|
||||
req.InputSerialization.CSV.FileHeaderInfo = CSVFileHeaderInfoNone
|
||||
}
|
||||
if req.InputSerialization.CSV.RecordDelimiter == "" {
|
||||
req.InputSerialization.CSV.RecordDelimiter = "\n"
|
||||
}
|
||||
s3s, err = csv.New(&csv.Options{
|
||||
HasHeader: req.InputSerialization.CSV.FileHeaderInfo != CSVFileHeaderInfoNone,
|
||||
RecordDelimiter: req.InputSerialization.CSV.RecordDelimiter,
|
||||
FieldDelimiter: req.InputSerialization.CSV.FieldDelimiter,
|
||||
Comments: req.InputSerialization.CSV.Comments,
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: gr,
|
||||
Compressed: string(req.InputSerialization.CompressionType),
|
||||
Expression: cleanExpr(req.Expression),
|
||||
OutputFieldDelimiter: req.OutputSerialization.CSV.FieldDelimiter,
|
||||
StreamSize: size,
|
||||
HeaderOpt: req.InputSerialization.CSV.FileHeaderInfo == CSVFileHeaderInfoUse,
|
||||
Progress: req.RequestProgress.Enabled,
|
||||
})
|
||||
} else if req.InputSerialization.JSON != nil {
|
||||
// Initializating options for JSON
|
||||
s3s, err = json.New(&json.Options{
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: gr,
|
||||
Compressed: string(req.InputSerialization.CompressionType),
|
||||
Expression: cleanExpr(req.Expression),
|
||||
StreamSize: size,
|
||||
Type: req.InputSerialization.JSON.Type == JSONTypeDocument,
|
||||
Progress: req.RequestProgress.Enabled,
|
||||
})
|
||||
}
|
||||
return s3s, err
|
||||
}
|
||||
|
||||
// Execute is the function where all the blocking occurs, It writes to the HTTP
|
||||
// response writer in a streaming fashion so that the client can actively use
|
||||
// the results before the query is finally finished executing. The
|
||||
func (reader *Input) Execute(writer io.Writer) error {
|
||||
func Execute(writer io.Writer, f format.Select) error {
|
||||
myRow := make(chan *Row)
|
||||
curBuf := bytes.NewBuffer(make([]byte, 1000000))
|
||||
curBuf.Reset()
|
||||
@@ -298,12 +113,14 @@ func (reader *Input) Execute(writer io.Writer) error {
|
||||
continuationTimer := time.NewTimer(continuationTime)
|
||||
defer progressTicker.Stop()
|
||||
defer continuationTimer.Stop()
|
||||
go reader.runSelectParser(reader.options.Expression, myRow)
|
||||
|
||||
go runSelectParser(f, myRow)
|
||||
|
||||
for {
|
||||
select {
|
||||
case row, ok := <-myRow:
|
||||
if ok && row.err != nil {
|
||||
errorMessage := reader.writeErrorMessage(row.err, curBuf)
|
||||
errorMessage := writeErrorMessage(row.err, curBuf)
|
||||
_, err := errorMessage.WriteTo(writer)
|
||||
flusher, okFlush := writer.(http.Flusher)
|
||||
if okFlush {
|
||||
@@ -316,7 +133,7 @@ func (reader *Input) Execute(writer io.Writer) error {
|
||||
close(myRow)
|
||||
return nil
|
||||
} else if ok {
|
||||
message := reader.writeRecordMessage(row.record, curBuf)
|
||||
message := writeRecordMessage(row.record, curBuf)
|
||||
_, err := message.WriteTo(writer)
|
||||
flusher, okFlush := writer.(http.Flusher)
|
||||
if okFlush {
|
||||
@@ -326,17 +143,17 @@ func (reader *Input) Execute(writer io.Writer) error {
|
||||
return err
|
||||
}
|
||||
curBuf.Reset()
|
||||
reader.stats.BytesReturned += int64(len(row.record))
|
||||
f.UpdateBytesReturned(int64(len(row.record)))
|
||||
if !continuationTimer.Stop() {
|
||||
<-continuationTimer.C
|
||||
}
|
||||
continuationTimer.Reset(continuationTime)
|
||||
} else if !ok {
|
||||
statPayload, err := reader.createStatXML()
|
||||
statPayload, err := f.CreateStatXML()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
statMessage := reader.writeStatMessage(statPayload, curBuf)
|
||||
statMessage := writeStatMessage(statPayload, curBuf)
|
||||
_, err = statMessage.WriteTo(writer)
|
||||
flusher, ok := writer.(http.Flusher)
|
||||
if ok {
|
||||
@@ -346,7 +163,7 @@ func (reader *Input) Execute(writer io.Writer) error {
|
||||
return err
|
||||
}
|
||||
curBuf.Reset()
|
||||
message := reader.writeEndMessage(curBuf)
|
||||
message := writeEndMessage(curBuf)
|
||||
_, err = message.WriteTo(writer)
|
||||
flusher, ok = writer.(http.Flusher)
|
||||
if ok {
|
||||
@@ -360,12 +177,12 @@ func (reader *Input) Execute(writer io.Writer) error {
|
||||
|
||||
case <-progressTicker.C:
|
||||
// Send progress messages only if requested by client.
|
||||
if reader.options.Progress {
|
||||
progressPayload, err := reader.createProgressXML()
|
||||
if f.Progress() {
|
||||
progressPayload, err := f.CreateProgressXML()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
progressMessage := reader.writeProgressMessage(progressPayload, curBuf)
|
||||
progressMessage := writeProgressMessage(progressPayload, curBuf)
|
||||
_, err = progressMessage.WriteTo(writer)
|
||||
flusher, ok := writer.(http.Flusher)
|
||||
if ok {
|
||||
@@ -377,7 +194,7 @@ func (reader *Input) Execute(writer io.Writer) error {
|
||||
curBuf.Reset()
|
||||
}
|
||||
case <-continuationTimer.C:
|
||||
message := reader.writeContinuationMessage(curBuf)
|
||||
message := writeContinuationMessage(curBuf)
|
||||
_, err := message.WriteTo(writer)
|
||||
flusher, ok := writer.(http.Flusher)
|
||||
if ok {
|
||||
|
||||
@@ -282,7 +282,7 @@ func writeProgressHeader() []byte {
|
||||
|
||||
// writeRecordMessage is the function which constructs the binary message for a
|
||||
// record message to be sent.
|
||||
func (csvOutput *Input) writeRecordMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
|
||||
func writeRecordMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
|
||||
// The below are the specifications of the header for a "record" event
|
||||
// 11 -event type - 7 - 7 "Records"
|
||||
// 13 -content-type -7 -24 "application/octet-stream"
|
||||
@@ -310,7 +310,7 @@ func (csvOutput *Input) writeRecordMessage(payload string, currentMessage *bytes
|
||||
|
||||
// writeContinuationMessage is the function which constructs the binary message
|
||||
// for a continuation message to be sent.
|
||||
func (csvOutput *Input) writeContinuationMessage(currentMessage *bytes.Buffer) *bytes.Buffer {
|
||||
func writeContinuationMessage(currentMessage *bytes.Buffer) *bytes.Buffer {
|
||||
// 11 -event type - 7 - 4 "Cont"
|
||||
// 13 -message-type -7 5 "event"
|
||||
// This is predefined from AMZ protocol found here:
|
||||
@@ -333,7 +333,7 @@ func (csvOutput *Input) writeContinuationMessage(currentMessage *bytes.Buffer) *
|
||||
|
||||
// writeEndMessage is the function which constructs the binary message
|
||||
// for a end message to be sent.
|
||||
func (csvOutput *Input) writeEndMessage(currentMessage *bytes.Buffer) *bytes.Buffer {
|
||||
func writeEndMessage(currentMessage *bytes.Buffer) *bytes.Buffer {
|
||||
// 11 -event type - 7 - 3 "End"
|
||||
// 13 -message-type -7 5 "event"
|
||||
// This is predefined from AMZ protocol found here:
|
||||
@@ -356,7 +356,7 @@ func (csvOutput *Input) writeEndMessage(currentMessage *bytes.Buffer) *bytes.Buf
|
||||
|
||||
// writeStateMessage is the function which constructs the binary message for a
|
||||
// state message to be sent.
|
||||
func (csvOutput *Input) writeStatMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
|
||||
func writeStatMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
|
||||
// 11 -event type - 7 - 5 "Stat" 20
|
||||
// 13 -content-type -7 -8 "text/xml" 25
|
||||
// 13 -message-type -7 5 "event" 22
|
||||
@@ -384,7 +384,7 @@ func (csvOutput *Input) writeStatMessage(payload string, currentMessage *bytes.B
|
||||
|
||||
// writeProgressMessage is the function which constructs the binary message for
|
||||
// a progress message to be sent.
|
||||
func (csvOutput *Input) writeProgressMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
|
||||
func writeProgressMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
|
||||
// The below are the specifications of the header for a "Progress" event
|
||||
// 11 -event type - 7 - 8 "Progress" 23
|
||||
// 13 -content-type -7 -8 "text/xml" 25
|
||||
@@ -413,7 +413,7 @@ func (csvOutput *Input) writeProgressMessage(payload string, currentMessage *byt
|
||||
|
||||
// writeErrorMessage is the function which constructs the binary message for a
|
||||
// error message to be sent.
|
||||
func (csvOutput *Input) writeErrorMessage(errorMessage error, currentMessage *bytes.Buffer) *bytes.Buffer {
|
||||
func writeErrorMessage(errorMessage error, currentMessage *bytes.Buffer) *bytes.Buffer {
|
||||
|
||||
// The below are the specifications of the header for a "error" event
|
||||
// 11 -error-code - 7 - DEFINED "DEFINED"
|
||||
|
||||
@@ -17,10 +17,13 @@
|
||||
package s3select
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"math"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
"github.com/xwb1989/sqlparser"
|
||||
)
|
||||
|
||||
@@ -33,8 +36,8 @@ type SelectFuncs struct {
|
||||
|
||||
// RunSqlParser allows us to easily bundle all the functions from above and run
|
||||
// them in the appropriate order.
|
||||
func (reader *Input) runSelectParser(selectExpression string, myRow chan *Row) {
|
||||
reqCols, alias, myLimit, whereClause, aggFunctionNames, myFuncs, myErr := reader.ParseSelect(selectExpression)
|
||||
func runSelectParser(f format.Select, myRow chan *Row) {
|
||||
reqCols, alias, myLimit, whereClause, aggFunctionNames, myFuncs, myErr := ParseSelect(f)
|
||||
if myErr != nil {
|
||||
rowStruct := &Row{
|
||||
err: myErr,
|
||||
@@ -42,23 +45,26 @@ func (reader *Input) runSelectParser(selectExpression string, myRow chan *Row) {
|
||||
myRow <- rowStruct
|
||||
return
|
||||
}
|
||||
reader.processSelectReq(reqCols, alias, whereClause, myLimit, aggFunctionNames, myRow, myFuncs)
|
||||
processSelectReq(reqCols, alias, whereClause, myLimit, aggFunctionNames, myRow, myFuncs, f)
|
||||
|
||||
}
|
||||
|
||||
// ParseSelect parses the SELECT expression, and effectively tokenizes it into
|
||||
// its separate parts. It returns the requested column names,alias,limit of
|
||||
// records, and the where clause.
|
||||
func (reader *Input) ParseSelect(sqlInput string) ([]string, string, int64, interface{}, []string, *SelectFuncs, error) {
|
||||
func ParseSelect(f format.Select) ([]string, string, int64, interface{}, []string, *SelectFuncs, error) {
|
||||
// return columnNames, alias, limitOfRecords, whereclause,coalStore, nil
|
||||
stmt, err := sqlparser.Parse(sqlInput)
|
||||
var whereClause interface{}
|
||||
var alias string
|
||||
var limit int64
|
||||
myFuncs := &SelectFuncs{}
|
||||
|
||||
stmt, err := sqlparser.Parse(cleanExpr(f.Expression()))
|
||||
// TODO Maybe can parse their errors a bit to return some more of the s3 errors
|
||||
if err != nil {
|
||||
return nil, "", 0, nil, nil, nil, ErrLexerInvalidChar
|
||||
}
|
||||
|
||||
var whereClause interface{}
|
||||
var alias string
|
||||
var limit int64
|
||||
myFuncs := &SelectFuncs{}
|
||||
switch stmt := stmt.(type) {
|
||||
case *sqlparser.Select:
|
||||
// evaluates the where clause
|
||||
@@ -146,7 +152,7 @@ func (reader *Input) ParseSelect(sqlInput string) ([]string, string, int64, inte
|
||||
if stmt.OrderBy != nil {
|
||||
return nil, "", 0, nil, nil, nil, ErrParseUnsupportedToken
|
||||
}
|
||||
if err := reader.parseErrs(columnNames, whereClause, alias, myFuncs); err != nil {
|
||||
if err := parseErrs(columnNames, whereClause, alias, myFuncs, f); err != nil {
|
||||
return nil, "", 0, nil, nil, nil, err
|
||||
}
|
||||
return columnNames, alias, limit, whereClause, functionNames, myFuncs, nil
|
||||
@@ -157,13 +163,13 @@ func (reader *Input) ParseSelect(sqlInput string) ([]string, string, int64, inte
|
||||
// This is the main function, It goes row by row and for records which validate
|
||||
// the where clause it currently prints the appropriate row given the requested
|
||||
// columns.
|
||||
func (reader *Input) processSelectReq(reqColNames []string, alias string, whereClause interface{}, limitOfRecords int64, functionNames []string, myRow chan *Row, myFunc *SelectFuncs) {
|
||||
func processSelectReq(reqColNames []string, alias string, whereClause interface{}, limitOfRecords int64, functionNames []string, myRow chan *Row, myFunc *SelectFuncs, f format.Select) {
|
||||
counter := -1
|
||||
var columns []string
|
||||
filtrCount := 0
|
||||
functionFlag := false
|
||||
// My values is used to store our aggregation values if we need to store them.
|
||||
myAggVals := make([]float64, len(reqColNames))
|
||||
var columns []string
|
||||
// LowercasecolumnsMap is used in accordance with hasDuplicates so that we can
|
||||
// raise the error "Ambigious" if a case insensitive column is provided and we
|
||||
// have multiple matches.
|
||||
@@ -174,23 +180,35 @@ func (reader *Input) processSelectReq(reqColNames []string, alias string, whereC
|
||||
if limitOfRecords == 0 {
|
||||
limitOfRecords = math.MaxInt64
|
||||
}
|
||||
|
||||
for {
|
||||
record := reader.ReadRecord()
|
||||
reader.stats.BytesProcessed += processSize(record)
|
||||
record, err := f.Read()
|
||||
if err != nil {
|
||||
rowStruct := &Row{
|
||||
err: err,
|
||||
}
|
||||
myRow <- rowStruct
|
||||
return
|
||||
}
|
||||
if record == nil {
|
||||
if functionFlag {
|
||||
rowStruct := &Row{
|
||||
record: reader.aggFuncToStr(myAggVals) + "\n",
|
||||
record: aggFuncToStr(myAggVals, f) + "\n",
|
||||
}
|
||||
myRow <- rowStruct
|
||||
}
|
||||
close(myRow)
|
||||
return
|
||||
}
|
||||
if counter == -1 && reader.options.HeaderOpt && len(reader.header) > 0 {
|
||||
columns = reader.Header()
|
||||
|
||||
out, _ := json.Marshal(record)
|
||||
f.UpdateBytesProcessed(record)
|
||||
|
||||
if counter == -1 && f.HasHeader() && len(f.Header()) > 0 {
|
||||
columns = f.Header()
|
||||
myErr := checkForDuplicates(columns, columnsMap, hasDuplicates, lowercaseColumnsMap)
|
||||
if format.IsInt(reqColNames[0]) {
|
||||
myErr = ErrMissingHeaders
|
||||
}
|
||||
if myErr != nil {
|
||||
rowStruct := &Row{
|
||||
err: myErr,
|
||||
@@ -198,17 +216,21 @@ func (reader *Input) processSelectReq(reqColNames []string, alias string, whereC
|
||||
myRow <- rowStruct
|
||||
return
|
||||
}
|
||||
} else if counter == -1 && len(reader.header) > 0 {
|
||||
columns = reader.Header()
|
||||
} else if counter == -1 && len(f.Header()) > 0 {
|
||||
columns = f.Header()
|
||||
for i := 0; i < len(columns); i++ {
|
||||
columnsMap["_"+strconv.Itoa(i)] = i
|
||||
}
|
||||
|
||||
}
|
||||
// When we have reached our limit, on what the user specified as the number
|
||||
// of rows they wanted, we terminate our interpreter.
|
||||
// Return in case the number of record reaches the LIMIT defined in select query
|
||||
if int64(filtrCount) == limitOfRecords && limitOfRecords != 0 {
|
||||
close(myRow)
|
||||
return
|
||||
}
|
||||
|
||||
// The call to the where function clause,ensures that the rows we print match our where clause.
|
||||
condition, myErr := matchesMyWhereClause(record, columnsMap, alias, whereClause)
|
||||
condition, myErr := matchesMyWhereClause(record, alias, whereClause)
|
||||
if myErr != nil {
|
||||
rowStruct := &Row{
|
||||
err: myErr,
|
||||
@@ -219,25 +241,33 @@ func (reader *Input) processSelectReq(reqColNames []string, alias string, whereC
|
||||
if condition {
|
||||
// if its an asterix we just print everything in the row
|
||||
if reqColNames[0] == "*" && functionNames[0] == "" {
|
||||
rowStruct := &Row{
|
||||
record: reader.printAsterix(record) + "\n",
|
||||
var row *Row
|
||||
switch f.Type() {
|
||||
case format.CSV:
|
||||
row = &Row{
|
||||
record: strings.Join(convertToSlice(columnsMap, record, string(out)), f.OutputFieldDelimiter()) + "\n",
|
||||
}
|
||||
case format.JSON:
|
||||
row = &Row{
|
||||
record: string(out) + "\n",
|
||||
}
|
||||
}
|
||||
myRow <- rowStruct
|
||||
myRow <- row
|
||||
|
||||
} else if alias != "" {
|
||||
// This is for dealing with the case of if we have to deal with a
|
||||
// request for a column with an index e.g A_1.
|
||||
if representsInt(reqColNames[0]) {
|
||||
if format.IsInt(reqColNames[0]) {
|
||||
// This checks whether any aggregation function was called as now we
|
||||
// no longer will go through printing each row, and only print at the
|
||||
// end
|
||||
// no longer will go through printing each row, and only print at the end
|
||||
if len(functionNames) > 0 && functionNames[0] != "" {
|
||||
functionFlag = true
|
||||
aggregationFunctions(counter, filtrCount, myAggVals, columnsMap, reqColNames, functionNames, record)
|
||||
aggregationFunctions(counter, filtrCount, myAggVals, reqColNames, functionNames, string(out))
|
||||
} else {
|
||||
// The code below finds the appropriate columns of the row given the
|
||||
// indicies provided in the SQL request and utilizes the map to
|
||||
// retrieve the correct part of the row.
|
||||
myQueryRow, myErr := reader.processColNameIndex(record, reqColNames, columns)
|
||||
myQueryRow, myErr := processColNameIndex(string(out), reqColNames, columns, f)
|
||||
if myErr != nil {
|
||||
rowStruct := &Row{
|
||||
err: myErr,
|
||||
@@ -255,12 +285,12 @@ func (reader *Input) processSelectReq(reqColNames []string, alias string, whereC
|
||||
// form of acutal names rather an indices.
|
||||
if len(functionNames) > 0 && functionNames[0] != "" {
|
||||
functionFlag = true
|
||||
aggregationFunctions(counter, filtrCount, myAggVals, columnsMap, reqColNames, functionNames, record)
|
||||
aggregationFunctions(counter, filtrCount, myAggVals, reqColNames, functionNames, string(out))
|
||||
} else {
|
||||
// This code prints the appropriate part of the row given the filter
|
||||
// and select request, if the select request was based on column
|
||||
// names rather than indices.
|
||||
myQueryRow, myErr := reader.processColNameLiteral(record, reqColNames, columns, columnsMap, myFunc)
|
||||
myQueryRow, myErr := processColNameLiteral(string(out), reqColNames, myFunc, f)
|
||||
if myErr != nil {
|
||||
rowStruct := &Row{
|
||||
err: myErr,
|
||||
@@ -281,75 +311,73 @@ func (reader *Input) processSelectReq(reqColNames []string, alias string, whereC
|
||||
}
|
||||
}
|
||||
|
||||
// printAsterix helps to print out the entire row if an asterix is used.
|
||||
func (reader *Input) printAsterix(record []string) string {
|
||||
return strings.Join(record, reader.options.OutputFieldDelimiter)
|
||||
}
|
||||
|
||||
// processColumnNames is a function which allows for cleaning of column names.
|
||||
func (reader *Input) processColumnNames(reqColNames []string, alias string) error {
|
||||
for i := 0; i < len(reqColNames); i++ {
|
||||
// The code below basically cleans the column name of its alias and other
|
||||
// syntax, so that we can extract its pure name.
|
||||
reqColNames[i] = cleanCol(reqColNames[i], alias)
|
||||
func processColumnNames(reqColNames []string, alias string, f format.Select) error {
|
||||
switch f.Type() {
|
||||
case format.CSV:
|
||||
for i := 0; i < len(reqColNames); i++ {
|
||||
// The code below basically cleans the column name of its alias and other
|
||||
// syntax, so that we can extract its pure name.
|
||||
reqColNames[i] = cleanCol(reqColNames[i], alias)
|
||||
}
|
||||
case format.JSON:
|
||||
// JSON doesnt have columns so no cleaning required
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// processColNameIndex is the function which creates the row for an index based
|
||||
// query.
|
||||
func (reader *Input) processColNameIndex(record []string, reqColNames []string, columns []string) (string, error) {
|
||||
func processColNameIndex(record string, reqColNames []string, columns []string, f format.Select) (string, error) {
|
||||
row := make([]string, len(reqColNames))
|
||||
for i := 0; i < len(reqColNames); i++ {
|
||||
// COALESCE AND NULLIF do not support index based access.
|
||||
if reqColNames[0] == "0" {
|
||||
return "", ErrInvalidColumnIndex
|
||||
return "", format.ErrInvalidColumnIndex
|
||||
}
|
||||
// Subtract 1 because AWS Indexing is not 0 based, it starts at 1.
|
||||
mytempindex, err := strconv.Atoi(reqColNames[i])
|
||||
if mytempindex > len(columns) {
|
||||
return "", format.ErrInvalidColumnIndex
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return "", ErrMissingHeaders
|
||||
}
|
||||
mytempindex = mytempindex - 1
|
||||
if mytempindex > len(columns) {
|
||||
return "", ErrInvalidColumnIndex
|
||||
}
|
||||
row[i] = record[mytempindex]
|
||||
// Subtract 1 because AWS Indexing is not 0 based, it starts at 1 generating the key like "_1".
|
||||
row[i] = jsonValue(string("_"+strconv.Itoa(mytempindex-1)), record)
|
||||
}
|
||||
rowStr := strings.Join(row, reader.options.OutputFieldDelimiter)
|
||||
if len(rowStr) > 1000000 {
|
||||
rowStr := strings.Join(row, f.OutputFieldDelimiter())
|
||||
if len(rowStr) > MaxCharsPerRecord {
|
||||
return "", ErrOverMaxRecordSize
|
||||
}
|
||||
|
||||
return rowStr, nil
|
||||
}
|
||||
|
||||
// processColNameLiteral is the function which creates the row for an name based
|
||||
// query.
|
||||
func (reader *Input) processColNameLiteral(record []string, reqColNames []string, columns []string, columnsMap map[string]int, myFunc *SelectFuncs) (string, error) {
|
||||
func processColNameLiteral(record string, reqColNames []string, myFunc *SelectFuncs, f format.Select) (string, error) {
|
||||
row := make([]string, len(reqColNames))
|
||||
for i := 0; i < len(reqColNames); i++ {
|
||||
// this is the case to deal with COALESCE.
|
||||
if reqColNames[i] == "" && isValidFunc(myFunc.index, i) {
|
||||
row[i] = evaluateFuncExpr(myFunc.funcExpr[i], "", record, columnsMap)
|
||||
row[i] = evaluateFuncExpr(myFunc.funcExpr[i], "", record)
|
||||
continue
|
||||
}
|
||||
myTempIndex, notFound := columnsMap[trimQuotes(reqColNames[i])]
|
||||
if !notFound {
|
||||
return "", ErrMissingHeaders
|
||||
}
|
||||
row[i] = record[myTempIndex]
|
||||
row[i] = jsonValue(reqColNames[i], record)
|
||||
}
|
||||
rowStr := strings.Join(row, reader.options.OutputFieldDelimiter)
|
||||
if len(rowStr) > 1000000 {
|
||||
rowStr := strings.Join(row, f.OutputFieldDelimiter())
|
||||
if len(rowStr) > MaxCharsPerRecord {
|
||||
return "", ErrOverMaxRecordSize
|
||||
}
|
||||
return rowStr, nil
|
||||
}
|
||||
|
||||
// aggregationFunctions performs the actual aggregation methods on the
|
||||
// given row, it uses an array defined for the main parsing function
|
||||
// to keep track of values.
|
||||
func aggregationFunctions(counter int, filtrCount int, myAggVals []float64, columnsMap map[string]int, storeReqCols []string, storeFunctions []string, record []string) error {
|
||||
// aggregationFunctions is a function which performs the actual aggregation
|
||||
// methods on the given row, it uses an array defined in the main parsing
|
||||
// function to keep track of values.
|
||||
func aggregationFunctions(counter int, filtrCount int, myAggVals []float64, storeReqCols []string, storeFunctions []string, record string) error {
|
||||
for i := 0; i < len(storeFunctions); i++ {
|
||||
if storeFunctions[i] == "" {
|
||||
i++
|
||||
@@ -358,15 +386,13 @@ func aggregationFunctions(counter int, filtrCount int, myAggVals []float64, colu
|
||||
} else {
|
||||
// If column names are provided as an index it'll use this if statement instead of the else/
|
||||
var convAggFloat float64
|
||||
if representsInt(storeReqCols[i]) {
|
||||
colIndex, _ := strconv.Atoi(storeReqCols[i])
|
||||
// colIndex is 1-based
|
||||
convAggFloat, _ = strconv.ParseFloat(record[colIndex-1], 64)
|
||||
if format.IsInt(storeReqCols[i]) {
|
||||
myIndex, _ := strconv.Atoi(storeReqCols[i])
|
||||
convAggFloat, _ = strconv.ParseFloat(jsonValue(string("_"+strconv.Itoa(myIndex)), record), 64)
|
||||
|
||||
} else {
|
||||
// case that the columns are in the form of named columns rather than indices.
|
||||
convAggFloat, _ = strconv.ParseFloat(record[columnsMap[trimQuotes(storeReqCols[i])]], 64)
|
||||
|
||||
convAggFloat, _ = strconv.ParseFloat(jsonValue(storeReqCols[i], record), 64)
|
||||
}
|
||||
// This if statement is for calculating the min.
|
||||
if storeFunctions[i] == "min" {
|
||||
@@ -404,3 +430,25 @@ func aggregationFunctions(counter int, filtrCount int, myAggVals []float64, colu
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// convertToSlice takes the map[string]interface{} and convert it to []string
|
||||
func convertToSlice(columnsMap map[string]int, record map[string]interface{}, marshalledRecord string) []string {
|
||||
var result []string
|
||||
type kv struct {
|
||||
Key string
|
||||
Value int
|
||||
}
|
||||
var ss []kv
|
||||
for k, v := range columnsMap {
|
||||
ss = append(ss, kv{k, v})
|
||||
}
|
||||
sort.Slice(ss, func(i, j int) bool {
|
||||
return ss[i].Value < ss[j].Value
|
||||
})
|
||||
for _, kv := range ss {
|
||||
if _, ok := record[kv.Key]; ok {
|
||||
result = append(result, jsonValue(kv.Key, marshalledRecord))
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -17,10 +17,11 @@
|
||||
package s3select
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
)
|
||||
|
||||
// Unit Test for the checkForDuplicates function.
|
||||
@@ -35,6 +36,7 @@ func TestCheckForDuplicates(t *testing.T) {
|
||||
{[]string{"name", "id", "last_name", "last_name"}, make(map[string]int), make(map[string]bool), make(map[string]int), ErrAmbiguousFieldName},
|
||||
{[]string{"name", "id", "last_name", "another_name"}, make(map[string]int), make(map[string]bool), make(map[string]int), nil},
|
||||
}
|
||||
|
||||
for _, table := range tables {
|
||||
err := checkForDuplicates(table.myReq, table.myHeaders, table.myDup, table.myLow)
|
||||
if err != table.myErr {
|
||||
@@ -43,106 +45,14 @@ func TestCheckForDuplicates(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Test for the function which processes columnnames to make sure that they are
|
||||
// compatible with spaces.
|
||||
func TestMyProcessing(t *testing.T) {
|
||||
options := &Options{
|
||||
HasHeader: false,
|
||||
RecordDelimiter: "\n",
|
||||
FieldDelimiter: ",",
|
||||
Comments: "",
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: bytes.NewReader([]byte("Here , is, a, string + \n + random,random,stuff,stuff ")),
|
||||
Compressed: "",
|
||||
Expression: "",
|
||||
OutputFieldDelimiter: ",",
|
||||
StreamSize: 20,
|
||||
}
|
||||
s3s, err := NewInput(options)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
tables := []struct {
|
||||
myReq []string
|
||||
myHeaders map[string]int
|
||||
myDup map[string]bool
|
||||
myLow map[string]int
|
||||
myOpts *Options
|
||||
input *Input
|
||||
length int
|
||||
testOutput string
|
||||
myErr error
|
||||
}{
|
||||
{[]string{"name", "id", "last_name", "CAST"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, 4, "CAST", nil},
|
||||
{[]string{"name", "id", "last_name", "another_name"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, 4, "another_name", nil},
|
||||
{[]string{"name", "id", "last_name", "another_name"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, 4, "another_name", nil},
|
||||
{[]string{"name", "id", "random_name", "fame_name", "another_col"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, 5, "fame_name", nil},
|
||||
}
|
||||
for _, table := range tables {
|
||||
err = checkForDuplicates(table.myReq, table.myHeaders, table.myDup, table.myLow)
|
||||
if err != table.myErr {
|
||||
t.Error()
|
||||
}
|
||||
if len(table.myReq) != table.length {
|
||||
t.Errorf("UnexpectedError")
|
||||
}
|
||||
if table.myReq[3] != table.testOutput {
|
||||
t.Error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyRowIndexResults is a unit test which makes sure that the rows that are
|
||||
// being printed are appropriate to the query being requested.
|
||||
func TestMyRowIndexResults(t *testing.T) {
|
||||
options := &Options{
|
||||
HasHeader: false,
|
||||
RecordDelimiter: "\n",
|
||||
FieldDelimiter: ",",
|
||||
Comments: "",
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: bytes.NewReader([]byte("Here , is, a, string + \n + random,random,stuff,stuff ")),
|
||||
Compressed: "",
|
||||
Expression: "",
|
||||
OutputFieldDelimiter: ",",
|
||||
StreamSize: 20,
|
||||
}
|
||||
s3s, err := NewInput(options)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
tables := []struct {
|
||||
myReq []string
|
||||
myHeaders map[string]int
|
||||
myDup map[string]bool
|
||||
myLow map[string]int
|
||||
myOpts *Options
|
||||
input *Input
|
||||
myRecord []string
|
||||
myTarget string
|
||||
myAsterix string
|
||||
columns []string
|
||||
err error
|
||||
}{
|
||||
{[]string{"1", "2"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, []string{"target", "random", "hello", "stuff"}, "target,random", "target,random,hello,stuff", []string{"1", "2", "3", "4"}, nil},
|
||||
{[]string{"2", "3", "4"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, []string{"random", "hullo", "thing", "stuff"}, "hullo,thing,stuff", "random,hullo,thing,stuff", []string{"1", "2", "3", "4"}, nil},
|
||||
{[]string{"3", "2"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, []string{"random", "hullo", "thing", "stuff"}, "thing,hullo", "random,hullo,thing,stuff", []string{"1", "2", "3", "4"}, nil},
|
||||
{[]string{"11", "1"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, []string{"random", "hullo", "thing", "stuff"}, "", "random,hullo,thing,stuff", []string{"1", "2", "3", "4"}, ErrInvalidColumnIndex},
|
||||
}
|
||||
for _, table := range tables {
|
||||
checkForDuplicates(table.columns, table.myHeaders, table.myDup, table.myLow)
|
||||
myRow, err := s3s.processColNameIndex(table.myRecord, table.myReq, table.columns)
|
||||
if err != table.err {
|
||||
t.Error()
|
||||
}
|
||||
if myRow != table.myTarget {
|
||||
t.Error()
|
||||
}
|
||||
myRow = table.input.printAsterix(table.myRecord)
|
||||
if myRow != table.myAsterix {
|
||||
t.Error()
|
||||
// This function returns the index of a string in a list
|
||||
func stringIndex(a string, list []string) int {
|
||||
for i, v := range list {
|
||||
if v == a {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// TestMyHelperFunctions is a unit test which tests some small helper string
|
||||
@@ -159,7 +69,7 @@ func TestMyHelperFunctions(t *testing.T) {
|
||||
{"test3", []string{"test1", "test2", "test3", "test4", "test5"}, 2, true},
|
||||
}
|
||||
for _, table := range tables {
|
||||
if stringInSlice(table.myReq, table.myList) != table.expected {
|
||||
if format.StringInSlice(table.myReq, table.myList) != table.expected {
|
||||
t.Error()
|
||||
}
|
||||
if stringIndex(table.myReq, table.myList) != table.myIndex {
|
||||
@@ -233,82 +143,6 @@ func TestMyConversion(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Unit Tests for Parser.
|
||||
func TestMyParser(t *testing.T) {
|
||||
tables := []struct {
|
||||
myQuery string
|
||||
err error
|
||||
reqCols []string
|
||||
alias string
|
||||
myLimit int
|
||||
aggFuncs []string
|
||||
header []string
|
||||
}{
|
||||
{"SELECT * FROM S3OBJECT", nil, []string{"*"}, "S3OBJECT", 0, make([]string, 1), []string{"name1", "name2", "name3", "name4"}},
|
||||
{"SELECT * FROM S3OBJECT AS A", nil, []string{"*"}, "A", 0, make([]string, 1), []string{"name1", "name2", "name3", "name4"}},
|
||||
{"SELECT col_name FROM S3OBJECT AS A", nil, []string{"col_name"}, "A", 0, make([]string, 1), []string{"col_name", "name2", "name3", "name4"}},
|
||||
{"SELECT col_name,col_other FROM S3OBJECT AS A LIMIT 5", nil, []string{"col_name", "col_other"}, "A", 5, make([]string, 2), []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT col_name,col_other FROM S3OBJECT AS A WHERE col_name = 'Name' LIMIT 5", nil, []string{"col_name", "col_other"}, "A", 5, make([]string, 2), []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT col_name,col_other FROM S3OBJECT AS A WHERE col_name = 'Name LIMIT 5", ErrLexerInvalidChar, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT count(*) FROM S3OBJECT AS A WHERE col_name = 'Name' LIMIT 5", nil, []string{"*"}, "A", 5, []string{"count"}, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT sum(col_name),sum(col_other) FROM S3OBJECT AS A WHERE col_name = 'Name' LIMIT 5", nil, []string{"col_name", "col_other"}, "A", 5, []string{"sum", "sum"}, []string{"col_name", "col_other"}},
|
||||
{"SELECT A.col_name FROM S3OBJECT AS A", nil, []string{"col_name"}, "A", 0, make([]string, 1), []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT A.`col name` FROM S3OBJECT AS A", nil, []string{"col_name"}, "A", 0, make([]string, 1), []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT A._col_name FROM S3OBJECT AS A", nil, []string{"col_name"}, "A", 0, make([]string, 1), []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT A._col_name FROM S3OBJECT AS A WHERE randomname > 5", ErrMissingHeaders, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT A._col_name FROM S3OBJECT AS A WHERE A._11 > 5", ErrInvalidColumnIndex, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT COALESCE(col_name,col_other) FROM S3OBJECT AS A WHERE A._3 > 5", nil, []string{""}, "A", 0, []string{""}, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT COALESCE(col_name,col_other),COALESCE(col_name,col_other) FROM S3OBJECT AS A WHERE A._3 > 5", nil, []string{"", ""}, "A", 0, []string{"", ""}, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT COALESCE(col_name,col_other) ,col_name , COALESCE(col_name,col_other) FROM S3OBJECT AS A WHERE col_name > 5", nil, []string{"", "col_name", ""}, "A", 0, []string{"", "", ""}, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT NULLIF(col_name,col_other) ,col_name , COALESCE(col_name,col_other) FROM S3OBJECT AS A WHERE col_name > 5", nil, []string{"", "col_name", ""}, "A", 0, []string{"", "", ""}, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT NULLIF(col_name,col_other) FROM S3OBJECT AS A WHERE col_name > 5", nil, []string{""}, "A", 0, []string{""}, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT NULLIF(randomname,col_other) FROM S3OBJECT AS A WHERE col_name > 5", ErrMissingHeaders, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT col_name FROM S3OBJECT AS A WHERE COALESCE(random,5) > 5", ErrMissingHeaders, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT col_name FROM S3OBJECT AS A WHERE NULLIF(random,5) > 5", ErrMissingHeaders, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT col_name FROM S3OBJECT AS A WHERE LOWER(col_name) BETWEEN 5 AND 7", nil, []string{"col_name"}, "A", 0, []string{""}, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT UPPER(col_name) FROM S3OBJECT AS A WHERE LOWER(col_name) BETWEEN 5 AND 7", nil, []string{""}, "A", 0, []string{""}, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT UPPER(*) FROM S3OBJECT AS A WHERE LOWER(col_name) BETWEEN 5 AND 7", ErrParseUnsupportedCallWithStar, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT NULLIF(col_name,col_name) FROM S3OBJECT AS A WHERE NULLIF(LOWER(col_name),col_name) BETWEEN 5 AND 7", nil, []string{""}, "A", 0, []string{""}, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
{"SELECT COALESCE(col_name,col_name) FROM S3OBJECT AS A WHERE NULLIF(LOWER(col_name),col_name) BETWEEN 5 AND 7", nil, []string{""}, "A", 0, []string{""}, []string{"col_name", "col_other", "name3", "name4"}},
|
||||
}
|
||||
for _, table := range tables {
|
||||
options := &Options{
|
||||
HasHeader: false,
|
||||
RecordDelimiter: "\n",
|
||||
FieldDelimiter: ",",
|
||||
Comments: "",
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
|
||||
Compressed: "",
|
||||
Expression: "",
|
||||
OutputFieldDelimiter: ",",
|
||||
StreamSize: 20,
|
||||
HeaderOpt: true,
|
||||
}
|
||||
s3s, err := NewInput(options)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
s3s.header = table.header
|
||||
reqCols, alias, myLimit, _, aggFunctionNames, _, err := s3s.ParseSelect(table.myQuery)
|
||||
if table.err != err {
|
||||
t.Error()
|
||||
}
|
||||
if !reflect.DeepEqual(reqCols, table.reqCols) {
|
||||
t.Error()
|
||||
}
|
||||
if alias != table.alias {
|
||||
t.Error()
|
||||
}
|
||||
if myLimit != int64(table.myLimit) {
|
||||
t.Error()
|
||||
}
|
||||
if !reflect.DeepEqual(table.aggFuncs, aggFunctionNames) {
|
||||
t.Error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unit tests for the main function that performs aggreggation.
|
||||
func TestMyAggregationFunc(t *testing.T) {
|
||||
columnsMap := make(map[string]int)
|
||||
@@ -321,21 +155,22 @@ func TestMyAggregationFunc(t *testing.T) {
|
||||
columnsMap map[string]int
|
||||
storeReqCols []string
|
||||
storeFunctions []string
|
||||
record []string
|
||||
record string
|
||||
err error
|
||||
expectedVal float64
|
||||
}{
|
||||
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"count"}, []string{"1", "2"}, nil, 11},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"min"}, []string{"1", "2"}, nil, 1},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"max"}, []string{"1", "2"}, nil, 10},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"sum"}, []string{"1", "2"}, nil, 11},
|
||||
{1, 1, []float64{10}, columnsMap, []string{"Col1"}, []string{"avg"}, []string{"1", "2"}, nil, 5.500},
|
||||
{10, 5, []float64{0.000}, columnsMap, []string{"Col1"}, []string{"random"}, []string{"1", "2"}, ErrParseNonUnaryAgregateFunctionCall, 0},
|
||||
{0, 5, []float64{0}, columnsMap, []string{"0"}, []string{"count"}, []string{"1", "2"}, nil, 1},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"1"}, []string{"min"}, []string{"1", "12"}, nil, 1},
|
||||
{10, 5, []float64{10, 11, 12, 13, 14}, columnsMap, []string{"Col1"}, []string{"count"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 11},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"min"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 1},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"max"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 10},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"sum"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 11},
|
||||
{1, 1, []float64{10}, columnsMap, []string{"Col1"}, []string{"avg"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 5.500},
|
||||
{10, 5, []float64{0.0000}, columnsMap, []string{"Col1"}, []string{"random"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", ErrParseNonUnaryAgregateFunctionCall, 0},
|
||||
{0, 5, []float64{0}, columnsMap, []string{"0"}, []string{"count"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 1},
|
||||
{10, 5, []float64{10}, columnsMap, []string{"1"}, []string{"min"}, "{\"_1\":\"1\",\"_2\":\"2\"}", nil, 1},
|
||||
}
|
||||
|
||||
for _, table := range tables {
|
||||
err := aggregationFunctions(table.counter, table.filtrCount, table.myAggVals, table.columnsMap, table.storeReqCols, table.storeFunctions, table.record)
|
||||
err := aggregationFunctions(table.counter, table.filtrCount, table.myAggVals, table.storeReqCols, table.storeFunctions, table.record)
|
||||
if table.err != err {
|
||||
t.Error()
|
||||
}
|
||||
@@ -346,156 +181,6 @@ func TestMyAggregationFunc(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Unit Tests for the function which converts a float array to string.
|
||||
func TestToStringAgg(t *testing.T) {
|
||||
options := &Options{
|
||||
HasHeader: false,
|
||||
RecordDelimiter: "\n",
|
||||
FieldDelimiter: ",",
|
||||
Comments: "",
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: bytes.NewReader([]byte("Here , is, a, string + \n + random,random,stuff,stuff ")),
|
||||
Compressed: "",
|
||||
Expression: "",
|
||||
OutputFieldDelimiter: ",",
|
||||
StreamSize: 20,
|
||||
HeaderOpt: true,
|
||||
}
|
||||
s3s, err := NewInput(options)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
tables := []struct {
|
||||
myAggVal []float64
|
||||
expected string
|
||||
}{
|
||||
{[]float64{10, 11, 12, 13, 14}, "10,11,12,13,14"},
|
||||
{[]float64{10, 11.3, 12, 13, 14}, "10,11.300000,12,13,14"},
|
||||
{[]float64{10.235, 11.3, 12, 13, 14}, "10.235000,11.300000,12,13,14"},
|
||||
{[]float64{10.235, 11.3, 12.123, 13.456, 14.789}, "10.235000,11.300000,12.123000,13.456000,14.789000"},
|
||||
{[]float64{10}, "10"},
|
||||
}
|
||||
for _, table := range tables {
|
||||
val := s3s.aggFuncToStr(table.myAggVal)
|
||||
if val != table.expected {
|
||||
t.Error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyRowColLiteralResults is a unit test which makes sure that the rows that
|
||||
// are being printed are appropriate to the query being requested.
|
||||
func TestMyRowColLiteralResults(t *testing.T) {
|
||||
options := &Options{
|
||||
HasHeader: false,
|
||||
RecordDelimiter: "\n",
|
||||
FieldDelimiter: ",",
|
||||
Comments: "",
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: bytes.NewReader([]byte("Here , is, a, string + \n + random,random,stuff,stuff ")),
|
||||
Compressed: "",
|
||||
Expression: "",
|
||||
OutputFieldDelimiter: ",",
|
||||
StreamSize: 20,
|
||||
HeaderOpt: true,
|
||||
}
|
||||
s3s, err := NewInput(options)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
tables := []struct {
|
||||
myReq []string
|
||||
myHeaders map[string]int
|
||||
myDup map[string]bool
|
||||
myLow map[string]int
|
||||
myOpts *Options
|
||||
tempList []string
|
||||
input *Input
|
||||
myRecord []string
|
||||
myTarget string
|
||||
columns []string
|
||||
err error
|
||||
}{
|
||||
{[]string{"draft", "year"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, []string{"draft", "year"}, s3s, []string{"target", "random", "hello", "stuff"}, "target,random", []string{"draft", "year", "random", "another"}, nil},
|
||||
{[]string{"year", "draft"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, []string{"year", "draft"}, s3s, []string{"draft", "2012", "thing", "stuff"}, "2012,draft", []string{"draft", "year", "random", "another"}, nil},
|
||||
{[]string{"yearrandomstuff", "draft"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, []string{"yearrandomstuff", "draft"}, s3s, []string{"draft", "2012", "thing", "stuff"}, "", []string{"draft", "year", "random", "another"}, ErrMissingHeaders},
|
||||
{[]string{"draft", "randomstuff"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, []string{"yearrandomstuff", "draft"}, s3s, []string{"draft", "2012", "thing", "stuff"}, "", []string{"draft", "year", "random", "another"}, ErrMissingHeaders},
|
||||
}
|
||||
for _, table := range tables {
|
||||
checkForDuplicates(table.columns, table.myHeaders, table.myDup, table.myLow)
|
||||
myRow, err := table.input.processColNameLiteral(table.myRecord, table.myReq, table.tempList, table.myHeaders, nil)
|
||||
if err != table.err {
|
||||
t.Error()
|
||||
}
|
||||
if myRow != table.myTarget {
|
||||
t.Error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyWhereEval is a function which provides unit tests for the function
|
||||
// which evaluates the where clause.
|
||||
func TestMyWhereEval(t *testing.T) {
|
||||
columnsMap := make(map[string]int)
|
||||
columnsMap["Col1"] = 0
|
||||
columnsMap["Col2"] = 1
|
||||
tables := []struct {
|
||||
myQuery string
|
||||
record []string
|
||||
err error
|
||||
expected bool
|
||||
header []string
|
||||
}{
|
||||
{"SELECT * FROM S3OBJECT", []string{"record_1,record_2,record_3,record_4"}, nil, true, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT WHERE Col1 < -1", []string{"0", "1"}, nil, false, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT WHERE Col1 < -1 OR Col2 > 15", []string{"151", "12"}, nil, false, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT WHERE Col1 > -1 AND Col2 > 15", []string{"151", "12"}, nil, false, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT WHERE Col1 > 1.00", []string{"151.0000", "12"}, nil, true, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT WHERE Col1 > 100", []string{"random", "12"}, nil, false, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT WHERE Col1 BETWEEN 100 AND 0", []string{"151", "12"}, nil, false, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT WHERE Col1 BETWEEN 100.0 AND 0.0", []string{"151", "12"}, nil, false, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT AS A WHERE A.1 BETWEEN 160 AND 150", []string{"151", "12"}, nil, true, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT AS A WHERE A._1 BETWEEN 160 AND 0", []string{"151", "12"}, nil, true, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT AS A WHERE A._1 BETWEEN 0 AND 160", []string{"151", "12"}, nil, true, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT A._1 LIKE 'r%'", []string{"record_1,record_2,record_3,record_4"}, nil, true, []string{"Col1", "Col2"}},
|
||||
{"SELECT s._2 FROM S3Object s WHERE s._2 = 'Steven'", []string{"record_1", "Steven", "Steven", "record_4"}, nil, true, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT AS A WHERE Col1 BETWEEN 0 AND 160", []string{"151", "12"}, nil, true, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT AS A WHERE Col1 BETWEEN 160 AND 0", []string{"151", "12"}, nil, true, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT AS A WHERE UPPER(Col1) BETWEEN 160 AND 0", []string{"151", "12"}, nil, true, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT AS A WHERE UPPER(Col1) = 'RANDOM'", []string{"random", "12"}, nil, true, []string{"Col1", "Col2"}},
|
||||
{"SELECT * FROM S3OBJECT AS A WHERE LOWER(UPPER(Col1) = 'random'", []string{"random", "12"}, nil, true, []string{"Col1", "Col2"}},
|
||||
}
|
||||
for _, table := range tables {
|
||||
options := &Options{
|
||||
HasHeader: false,
|
||||
RecordDelimiter: "\n",
|
||||
FieldDelimiter: ",",
|
||||
Comments: "",
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
|
||||
Compressed: "",
|
||||
Expression: "",
|
||||
OutputFieldDelimiter: ",",
|
||||
StreamSize: 20,
|
||||
HeaderOpt: true,
|
||||
}
|
||||
s3s, err := NewInput(options)
|
||||
s3s.header = table.header
|
||||
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
_, alias, _, whereClause, _, _, _ := s3s.ParseSelect(table.myQuery)
|
||||
myVal, err := matchesMyWhereClause(table.record, columnsMap, alias, whereClause)
|
||||
if table.err != err {
|
||||
t.Error()
|
||||
}
|
||||
if myVal != table.expected {
|
||||
t.Error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyStringComparator is a unit test which ensures that the appropriate
|
||||
// values are being compared for strings.
|
||||
func TestMyStringComparator(t *testing.T) {
|
||||
@@ -594,231 +279,13 @@ func TestMySizeFunction(t *testing.T) {
|
||||
{[]string{"test1", "test2", "test3", "test4", "test5"}, 30},
|
||||
}
|
||||
for _, table := range tables {
|
||||
if processSize(table.myRecord) != table.expected {
|
||||
if format.ProcessSize(table.myRecord) != table.expected {
|
||||
t.Error()
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// TestInterpreter is a function which provides unit testing for the main
|
||||
// interpreter function.
|
||||
func TestInterpreter(t *testing.T) {
|
||||
tables := []struct {
|
||||
myQuery string
|
||||
myChan chan *Row
|
||||
err error
|
||||
header []string
|
||||
}{
|
||||
{"Select random from S3OBJECT", make(chan *Row), ErrMissingHeaders, []string{"name1", "name2", "name3", "name4"}},
|
||||
{"Select * from S3OBJECT as A WHERE name2 > 5.00", make(chan *Row), nil, []string{"name1", "name2", "name3", "name4"}},
|
||||
{"Select * from S3OBJECT", make(chan *Row), nil, []string{"name1", "name2", "name3", "name4"}},
|
||||
{"Select A_1 from S3OBJECT as A", make(chan *Row), nil, []string{"1", "2", "3", "4"}},
|
||||
{"Select count(*) from S3OBJECT", make(chan *Row), nil, []string{"name1", "name2", "name3", "name4"}},
|
||||
{"Select * from S3OBJECT WHERE name1 > 5.00", make(chan *Row), nil, []string{"name1", "name2", "name3", "name4"}},
|
||||
}
|
||||
for _, table := range tables {
|
||||
options := &Options{
|
||||
HasHeader: false,
|
||||
RecordDelimiter: "\n",
|
||||
FieldDelimiter: ",",
|
||||
Comments: "",
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
|
||||
Compressed: "",
|
||||
Expression: "",
|
||||
OutputFieldDelimiter: ",",
|
||||
StreamSize: 20,
|
||||
HeaderOpt: true,
|
||||
}
|
||||
s3s, err := NewInput(options)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
s3s.header = table.header
|
||||
reqCols, alias, myLimit, whereClause, aggFunctionNames, _, err := s3s.ParseSelect(table.myQuery)
|
||||
if err != table.err {
|
||||
t.Fatal()
|
||||
}
|
||||
if err == nil {
|
||||
go s3s.processSelectReq(reqCols, alias, whereClause, myLimit, aggFunctionNames, table.myChan, nil)
|
||||
select {
|
||||
case row, ok := <-table.myChan:
|
||||
if ok && len(row.record) > 0 {
|
||||
} else if ok && row.err != nil {
|
||||
if row.err != table.err {
|
||||
t.Error()
|
||||
}
|
||||
close(table.myChan)
|
||||
} else if !ok {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyXMLFunction is a function that provides unit testing for the XML
|
||||
// creating function.
|
||||
func TestMyXMLFunction(t *testing.T) {
|
||||
options := &Options{
|
||||
HasHeader: false,
|
||||
RecordDelimiter: "\n",
|
||||
FieldDelimiter: ",",
|
||||
Comments: "",
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
|
||||
Compressed: "",
|
||||
Expression: "",
|
||||
OutputFieldDelimiter: ",",
|
||||
StreamSize: 20,
|
||||
HeaderOpt: true,
|
||||
}
|
||||
s3s, err := NewInput(options)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
tables := []struct {
|
||||
expectedStat int
|
||||
expectedProgress int
|
||||
}{
|
||||
{150, 156},
|
||||
}
|
||||
for _, table := range tables {
|
||||
myVal, _ := s3s.createStatXML()
|
||||
myOtherVal, _ := s3s.createProgressXML()
|
||||
if len(myVal) != table.expectedStat {
|
||||
t.Error()
|
||||
}
|
||||
if len(myOtherVal) != table.expectedProgress {
|
||||
fmt.Println(len(myOtherVal))
|
||||
t.Error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyProtocolFunction is a function which provides unit testing for several
|
||||
// of the functions which write the binary protocol.
|
||||
func TestMyProtocolFunction(t *testing.T) {
|
||||
options := &Options{
|
||||
HasHeader: false,
|
||||
RecordDelimiter: "\n",
|
||||
FieldDelimiter: ",",
|
||||
Comments: "",
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
|
||||
Compressed: "",
|
||||
Expression: "",
|
||||
OutputFieldDelimiter: ",",
|
||||
StreamSize: 20,
|
||||
HeaderOpt: true,
|
||||
}
|
||||
s3s, err := NewInput(options)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
tables := []struct {
|
||||
payloadMsg string
|
||||
expectedRecord int
|
||||
expectedEnd int
|
||||
}{
|
||||
{"random payload", 115, 56},
|
||||
}
|
||||
for _, table := range tables {
|
||||
var currentMessage = &bytes.Buffer{}
|
||||
if len(s3s.writeRecordMessage(table.payloadMsg, currentMessage).Bytes()) != table.expectedRecord {
|
||||
t.Error()
|
||||
}
|
||||
currentMessage.Reset()
|
||||
if len(s3s.writeEndMessage(currentMessage).Bytes()) != table.expectedEnd {
|
||||
t.Error()
|
||||
}
|
||||
currentMessage.Reset()
|
||||
if len(s3s.writeContinuationMessage(currentMessage).Bytes()) != 57 {
|
||||
t.Error()
|
||||
}
|
||||
currentMessage.Reset()
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyInfoProtocolFunctions is a function which provides unit testing for the
|
||||
// stat and progress messages of the protocols.
|
||||
func TestMyInfoProtocolFunctions(t *testing.T) {
|
||||
options := &Options{
|
||||
HasHeader: true,
|
||||
RecordDelimiter: "\n",
|
||||
FieldDelimiter: ",",
|
||||
Comments: "",
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
|
||||
Compressed: "",
|
||||
Expression: "",
|
||||
OutputFieldDelimiter: ",",
|
||||
StreamSize: 20,
|
||||
}
|
||||
s3s, err := NewInput(options)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
myVal, _ := s3s.createStatXML()
|
||||
myOtherVal, _ := s3s.createProgressXML()
|
||||
|
||||
tables := []struct {
|
||||
payloadStatMsg string
|
||||
payloadProgressMsg string
|
||||
expectedStat int
|
||||
expectedProgress int
|
||||
}{
|
||||
{myVal, myOtherVal, 233, 243},
|
||||
}
|
||||
for _, table := range tables {
|
||||
var currBuf = &bytes.Buffer{}
|
||||
if len(s3s.writeStatMessage(table.payloadStatMsg, currBuf).Bytes()) != table.expectedStat {
|
||||
t.Error()
|
||||
}
|
||||
currBuf.Reset()
|
||||
if len(s3s.writeProgressMessage(table.payloadProgressMsg, currBuf).Bytes()) != table.expectedProgress {
|
||||
t.Error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyErrorProtocolFunctions is a function which provides unit testing for
|
||||
// the error message type of protocol.
|
||||
func TestMyErrorProtocolFunctions(t *testing.T) {
|
||||
options := &Options{
|
||||
HasHeader: false,
|
||||
RecordDelimiter: "\n",
|
||||
FieldDelimiter: ",",
|
||||
Comments: "",
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
|
||||
Compressed: "",
|
||||
Expression: "",
|
||||
OutputFieldDelimiter: ",",
|
||||
StreamSize: 20,
|
||||
HeaderOpt: true,
|
||||
}
|
||||
s3s, err := NewInput(options)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
tables := []struct {
|
||||
err error
|
||||
expectedError int
|
||||
}{
|
||||
{ErrInvalidCast, 248},
|
||||
{ErrTruncatedInput, 200},
|
||||
{ErrUnsupportedSyntax, 114},
|
||||
{ErrCSVParsingError, 157},
|
||||
}
|
||||
for _, table := range tables {
|
||||
var currentMessage = &bytes.Buffer{}
|
||||
if len(s3s.writeErrorMessage(table.err, currentMessage).Bytes()) != table.expectedError {
|
||||
t.Error()
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
func TestMatch(t *testing.T) {
|
||||
testCases := []struct {
|
||||
pattern string
|
||||
@@ -1004,51 +471,6 @@ func TestMatch(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyValids is a unit test which ensures that the appropriate values are
|
||||
// being returned from the isValid... functions.
|
||||
func TestMyValids(t *testing.T) {
|
||||
|
||||
tables := []struct {
|
||||
myQuery string
|
||||
indexList []int
|
||||
myIndex int
|
||||
myValIndex bool
|
||||
header []string
|
||||
err error
|
||||
}{
|
||||
{"SELECT UPPER(NULLIF(draft_year,random_name))", []int{3, 5, 6, 7, 8, 9}, 3, true, []string{"draft_year", "random_name"}, nil},
|
||||
{"SELECT UPPER(NULLIF(draft_year,xandom_name))", []int{3, 5, 6, 7, 8, 9}, 3, true, []string{"draft_year", "random_name"}, ErrMissingHeaders},
|
||||
}
|
||||
for _, table := range tables {
|
||||
options := &Options{
|
||||
HasHeader: false,
|
||||
RecordDelimiter: "\n",
|
||||
FieldDelimiter: ",",
|
||||
Comments: "",
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
|
||||
Compressed: "",
|
||||
Expression: "",
|
||||
OutputFieldDelimiter: ",",
|
||||
StreamSize: 20,
|
||||
HeaderOpt: true,
|
||||
}
|
||||
s3s, err := NewInput(options)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
s3s.header = table.header
|
||||
_, _, _, _, _, _, err = s3s.ParseSelect(table.myQuery)
|
||||
if err != table.err {
|
||||
t.Fatal()
|
||||
}
|
||||
myVal := isValidFunc(table.indexList, table.myIndex)
|
||||
if myVal != table.myValIndex {
|
||||
t.Error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestMyFuncProcessing is a unit test which ensures that the appropriate values are
|
||||
// being returned from the Processing... functions.
|
||||
func TestMyFuncProcessing(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user