SQL select query for CSV/JSON (#6648)

select * , select column names have been implemented for CSV.
select * is implemented for JSON.
This commit is contained in:
Ashish Kumar Sinha
2018-10-23 00:42:22 +05:30
committed by kannappanr
parent acf46cc3b5
commit c0b4bf0a3e
111 changed files with 12888 additions and 1398 deletions

110
pkg/s3select/datatypes.go Normal file
View File

@@ -0,0 +1,110 @@
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package s3select
import (
"encoding/xml"
)
// CSVFileHeaderInfo -Can be either USE IGNORE OR NONE, defines what to do with
// the first row
type CSVFileHeaderInfo string
// Constants for file header info.
const (
CSVFileHeaderInfoNone CSVFileHeaderInfo = "NONE"
CSVFileHeaderInfoIgnore = "IGNORE"
CSVFileHeaderInfoUse = "USE"
)
// The maximum character per record is set to be 1 MB.
const (
MaxCharsPerRecord = 1000000
)
// SelectCompressionType - ONLY GZIP is supported
type SelectCompressionType string
// JSONType determines json input serialization type.
type JSONType string
// Constants for compression types under select API.
const (
SelectCompressionNONE SelectCompressionType = "NONE"
SelectCompressionGZIP = "GZIP"
SelectCompressionBZIP = "BZIP2"
)
// CSVQuoteFields - Can be either Always or AsNeeded
type CSVQuoteFields string
// Constants for csv quote styles.
const (
CSVQuoteFieldsAlways CSVQuoteFields = "Always"
CSVQuoteFieldsAsNeeded = "AsNeeded"
)
// QueryExpressionType - Currently can only be SQL
type QueryExpressionType string
// Constants for expression type.
const (
QueryExpressionTypeSQL QueryExpressionType = "SQL"
)
// Constants for JSONTypes.
const (
JSONTypeDocument JSONType = "DOCUMENT"
JSONLinesType = "LINES"
)
// ObjectSelectRequest - represents the input select body
type ObjectSelectRequest struct {
XMLName xml.Name `xml:"SelectObjectContentRequest" json:"-"`
Expression string
ExpressionType QueryExpressionType
InputSerialization struct {
CompressionType SelectCompressionType
Parquet *struct{}
CSV *struct {
FileHeaderInfo CSVFileHeaderInfo
RecordDelimiter string
FieldDelimiter string
QuoteCharacter string
QuoteEscapeCharacter string
Comments string
}
JSON *struct {
Type JSONType
}
}
OutputSerialization struct {
CSV *struct {
QuoteFields CSVQuoteFields
RecordDelimiter string
FieldDelimiter string
QuoteCharacter string
QuoteEscapeCharacter string
}
JSON *struct {
RecordDelimiter string
}
}
RequestProgress struct {
Enabled bool
}
}

View File

@@ -16,7 +16,11 @@
package s3select
import "errors"
import (
"errors"
"github.com/minio/minio/pkg/s3select/format"
)
//S3 errors below
@@ -35,10 +39,6 @@ var ErrExpressionTooLong = errors.New("The SQL expression is too long: The maxim
// in the SQL function.
var ErrIllegalSQLFunctionArgument = errors.New("Illegal argument was used in the SQL function")
// ErrInvalidColumnIndex is an error if you provide a column index which is not
// valid.
var ErrInvalidColumnIndex = errors.New("Column index in the SQL expression is invalid")
// ErrInvalidKeyPath is an error if you provide a key in the SQL expression that
// is invalid.
var ErrInvalidKeyPath = errors.New("Key path in the SQL expression is invalid")
@@ -63,10 +63,6 @@ var ErrMissingHeaders = errors.New("Some headers in the query are missing from t
// utilized with the select object query.
var ErrInvalidCompressionFormat = errors.New("The file is not in a supported compression format. Only GZIP is supported at this time")
// ErrTruncatedInput is an error if the object is not compressed properly and an
// error occurs during decompression.
var ErrTruncatedInput = errors.New("Object decompression failed. Check that the object is properly compressed using the format specified in the request")
// ErrInvalidFileHeaderInfo is an error if the argument provided to the
// FileHeader Argument is incorrect.
var ErrInvalidFileHeaderInfo = errors.New("The FileHeaderInfo is invalid. Only NONE, USE, and IGNORE are supported")
@@ -83,13 +79,6 @@ var ErrInvalidQuoteFields = errors.New("The QuoteFields is invalid. Only ALWAYS
// request element is not valid.
var ErrInvalidRequestParameter = errors.New("The value of a parameter in Request element is invalid. Check the service API documentation and try again")
// ErrCSVParsingError is an error if the CSV presents an error while being
// parsed.
var ErrCSVParsingError = errors.New("Encountered an Error parsing the CSV file. Check the file and try again")
// ErrJSONParsingError is an error if while parsing the JSON an error arises.
var ErrJSONParsingError = errors.New("Encountered an error parsing the JSON file. Check the file and try again")
// ErrExternalEvalException is an error that arises if the query can not be
// evaluated.
var ErrExternalEvalException = errors.New("The query cannot be evaluated. Check the file and try again")
@@ -224,10 +213,6 @@ var ErrParseUnsupportedSyntax = errors.New("The SQL expression contains unsuppor
// operator present in the SQL expression.
var ErrParseUnknownOperator = errors.New("The SQL expression contains an invalid operator")
// ErrParseInvalidPathComponent is an error that occurs if there is an invalid
// path component.
var ErrParseInvalidPathComponent = errors.New("The SQL expression contains an invalid path component")
// ErrParseMissingIdentAfterAt is an error that occurs if the wrong symbol
// follows the "@" symbol in the SQL expression.
var ErrParseMissingIdentAfterAt = errors.New("Did not find the expected identifier after the @ symbol in the SQL expression")
@@ -395,20 +380,20 @@ var errorCodeResponse = map[error]string{
ErrUnauthorizedAccess: "UnauthorizedAccess",
ErrExpressionTooLong: "ExpressionTooLong",
ErrIllegalSQLFunctionArgument: "IllegalSqlFunctionArgument",
ErrInvalidColumnIndex: "InvalidColumnIndex",
format.ErrInvalidColumnIndex: "InvalidColumnIndex",
ErrInvalidKeyPath: "InvalidKeyPath",
ErrColumnTooLong: "ColumnTooLong",
ErrOverMaxColumn: "OverMaxColumn",
ErrOverMaxRecordSize: "OverMaxRecordSize",
ErrMissingHeaders: "MissingHeaders",
ErrInvalidCompressionFormat: "InvalidCompressionFormat",
ErrTruncatedInput: "TruncatedInput",
format.ErrTruncatedInput: "TruncatedInput",
ErrInvalidFileHeaderInfo: "InvalidFileHeaderInfo",
ErrInvalidJSONType: "InvalidJsonType",
ErrInvalidQuoteFields: "InvalidQuoteFields",
ErrInvalidRequestParameter: "InvalidRequestParameter",
ErrCSVParsingError: "CSVParsingError",
ErrJSONParsingError: "JSONParsingError",
format.ErrCSVParsingError: "CSVParsingError",
format.ErrJSONParsingError: "JSONParsingError",
ErrExternalEvalException: "ExternalEvalException",
ErrInvalidDataType: "InvalidDataType",
ErrUnrecognizedFormatException: "UnrecognizedFormatException",
@@ -443,7 +428,7 @@ var errorCodeResponse = map[error]string{
ErrParseUnsupportedAlias: "ParseUnsupportedAlias",
ErrParseUnsupportedSyntax: "ParseUnsupportedSyntax",
ErrParseUnknownOperator: "ParseUnknownOperator",
ErrParseInvalidPathComponent: "ParseInvalidPathComponent",
format.ErrParseInvalidPathComponent: "ParseInvalidPathComponent",
ErrParseMissingIdentAfterAt: "ParseMissingIdentAfterAt",
ErrParseUnexpectedOperator: "ParseUnexpectedOperator",
ErrParseUnexpectedTerm: "ParseUnexpectedTerm",

View File

@@ -19,12 +19,13 @@ package s3select
import (
"strings"
"github.com/minio/minio/pkg/s3select/format"
"github.com/xwb1989/sqlparser"
)
// stringOps is a function which handles the case in a clause if there is a need
// to perform a string function
func stringOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, columnsMap map[string]int) string {
func stringOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) string {
var value string
funcName := myFunc.Name.CompliantName()
switch tempArg := myFunc.Exprs[0].(type) {
@@ -34,7 +35,7 @@ func stringOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string,
// myReturnVal is actually the tail recursive value being used in the eval func.
return applyStrFunc(myReturnVal, funcName)
case *sqlparser.ColName:
value = applyStrFunc(record[columnsMap[col.Name.CompliantName()]], funcName)
value = applyStrFunc(jsonValue(col.Name.CompliantName(), record), funcName)
case *sqlparser.SQLVal:
value = applyStrFunc(string(col.Val), funcName)
}
@@ -43,7 +44,7 @@ func stringOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string,
}
// coalOps is a function which decomposes a COALESCE func expr into its struct.
func coalOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, columnsMap map[string]int) string {
func coalOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) string {
myArgs := make([]string, len(myFunc.Exprs))
for i := 0; i < len(myFunc.Exprs); i++ {
@@ -54,7 +55,7 @@ func coalOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, co
// myReturnVal is actually the tail recursive value being used in the eval func.
return myReturnVal
case *sqlparser.ColName:
myArgs[i] = record[columnsMap[col.Name.CompliantName()]]
myArgs[i] = jsonValue(col.Name.CompliantName(), record)
case *sqlparser.SQLVal:
myArgs[i] = string(col.Val)
}
@@ -64,7 +65,7 @@ func coalOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, co
}
// nullOps is a function which decomposes a NullIf func expr into its struct.
func nullOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, columnsMap map[string]int) string {
func nullOps(myFunc *sqlparser.FuncExpr, record string, myReturnVal string) string {
myArgs := make([]string, 2)
for i := 0; i < len(myFunc.Exprs); i++ {
@@ -74,7 +75,7 @@ func nullOps(myFunc *sqlparser.FuncExpr, record []string, myReturnVal string, co
case *sqlparser.FuncExpr:
return myReturnVal
case *sqlparser.ColName:
myArgs[i] = record[columnsMap[col.Name.CompliantName()]]
myArgs[i] = jsonValue(col.Name.CompliantName(), record)
case *sqlparser.SQLVal:
myArgs[i] = string(col.Val)
}
@@ -118,8 +119,8 @@ func processCoalNoIndex(coalStore []string) string {
}
// evaluateFuncExpr is a function that allows for tail recursive evaluation of
// nested function expressions.
func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, myRecord []string, columnsMap map[string]int) string {
// nested function expressions
func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, myRecord string) string {
if myVal == nil {
return myReturnVal
}
@@ -140,26 +141,26 @@ func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, myRecord []
for i := 0; i < len(mySubFunc); i++ {
if supportedString(myVal.Name.CompliantName()) {
if mySubFunc != nil {
return stringOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord, columnsMap), columnsMap)
return stringOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord))
}
return stringOps(myVal, myRecord, myReturnVal, columnsMap)
return stringOps(myVal, myRecord, myReturnVal)
} else if strings.ToUpper(myVal.Name.CompliantName()) == "NULLIF" {
if mySubFunc != nil {
return nullOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord, columnsMap), columnsMap)
return nullOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord))
}
return nullOps(myVal, myRecord, myReturnVal, columnsMap)
return nullOps(myVal, myRecord, myReturnVal)
} else if strings.ToUpper(myVal.Name.CompliantName()) == "COALESCE" {
if mySubFunc != nil {
return coalOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord, columnsMap), columnsMap)
return coalOps(myVal, myRecord, evaluateFuncExpr(mySubFunc[i], myReturnVal, myRecord))
}
return coalOps(myVal, myRecord, myReturnVal, columnsMap)
return coalOps(myVal, myRecord, myReturnVal)
}
}
return ""
}
// evaluateFuncErr is a function that flags errors in nested functions.
func (reader *Input) evaluateFuncErr(myVal *sqlparser.FuncExpr) error {
func evaluateFuncErr(myVal *sqlparser.FuncExpr, reader format.Select) error {
if myVal == nil {
return nil
}
@@ -173,11 +174,11 @@ func (reader *Input) evaluateFuncErr(myVal *sqlparser.FuncExpr) error {
case *sqlparser.AliasedExpr:
switch col := tempArg.Expr.(type) {
case *sqlparser.FuncExpr:
if err := reader.evaluateFuncErr(col); err != nil {
if err := evaluateFuncErr(col, reader); err != nil {
return err
}
case *sqlparser.ColName:
if err := reader.colNameErrs([]string{col.Name.CompliantName()}); err != nil {
if err := reader.ColNameErrs([]string{col.Name.CompliantName()}); err != nil {
return err
}
}
@@ -186,11 +187,9 @@ func (reader *Input) evaluateFuncErr(myVal *sqlparser.FuncExpr) error {
return nil
}
// evaluateIsExpr is a function for evaluating expressions of the form "column
// is ...."
func evaluateIsExpr(myFunc *sqlparser.IsExpr, row []string, columnNames map[string]int, alias string) (bool, error) {
// evaluateIsExpr is a function for evaluating expressions of the form "column is ...."
func evaluateIsExpr(myFunc *sqlparser.IsExpr, row string, alias string) (bool, error) {
operator := myFunc.Operator
var colName string
var myVal string
switch myIs := myFunc.Expr.(type) {
// case for literal val
@@ -198,14 +197,10 @@ func evaluateIsExpr(myFunc *sqlparser.IsExpr, row []string, columnNames map[stri
myVal = string(myIs.Val)
// case for nested func val
case *sqlparser.FuncExpr:
myVal = evaluateFuncExpr(myIs, "", row, columnNames)
myVal = evaluateFuncExpr(myIs, "", row)
// case for col val
case *sqlparser.ColName:
colName = cleanCol(myIs.Name.CompliantName(), alias)
}
// case if it is a col val
if colName != "" {
myVal = row[columnNames[colName]]
myVal = jsonValue(myIs.Name.CompliantName(), row)
}
// case to evaluate is null
if strings.ToLower(operator) == "is null" {
@@ -221,11 +216,11 @@ func evaluateIsExpr(myFunc *sqlparser.IsExpr, row []string, columnNames map[stri
// supportedString is a function that checks whether the function is a supported
// string one
func supportedString(strFunc string) bool {
return stringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER"})
return format.StringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER"})
}
// supportedFunc is a function that checks whether the function is a supported
// S3 one.
func supportedFunc(strFunc string) bool {
return stringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER", "COALESCE", "NULLIF"})
return format.StringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER", "COALESCE", "NULLIF"})
}

View File

@@ -0,0 +1,334 @@
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package csv
import (
"compress/bzip2"
"encoding/csv"
"encoding/xml"
"io"
"strconv"
"strings"
gzip "github.com/klauspost/pgzip"
"github.com/minio/minio/pkg/ioutil"
"github.com/minio/minio/pkg/s3select/format"
)
// Options options are passed to the underlying encoding/csv reader.
type Options struct {
// HasHeader when true, will treat the first row as a header row.
HasHeader bool
// RecordDelimiter is the string that records are delimited by.
RecordDelimiter string
// FieldDelimiter is the string that fields are delimited by.
FieldDelimiter string
// Comments is the string the first character of a line of
// text matches the comment character.
Comments string
// Name of the table that is used for querying
Name string
// ReadFrom is where the data will be read from.
ReadFrom io.Reader
// If true then we need to add gzip or bzip reader.
// to extract the csv.
Compressed string
// SQL expression meant to be evaluated.
Expression string
// What the outputted CSV will be delimited by .
OutputFieldDelimiter string
// Size of incoming object
StreamSize int64
// Whether Header is "USE" or another
HeaderOpt bool
// Progress enabled, enable/disable progress messages.
Progress bool
}
// cinput represents a record producing input from a formatted object.
type cinput struct {
options *Options
reader *csv.Reader
firstRow []string
header []string
minOutputLength int
stats struct {
BytesScanned int64
BytesReturned int64
BytesProcessed int64
}
}
// New sets up a new Input, the first row is read when this is run.
// If there is a problem with reading the first row, the error is returned.
// Otherwise, the returned reader can be reliably consumed with Read().
// until Read() return err.
func New(opts *Options) (format.Select, error) {
myReader := opts.ReadFrom
var tempBytesScanned int64
tempBytesScanned = 0
switch opts.Compressed {
case "GZIP":
tempBytesScanned = opts.StreamSize
var err error
if myReader, err = gzip.NewReader(opts.ReadFrom); err != nil {
return nil, format.ErrTruncatedInput
}
case "BZIP2":
tempBytesScanned = opts.StreamSize
myReader = bzip2.NewReader(opts.ReadFrom)
}
// DelimitedReader treats custom record delimiter like `\r\n`,`\r`,`ab` etc and replaces it with `\n`.
normalizedReader := ioutil.NewDelimitedReader(myReader, []rune(opts.RecordDelimiter))
reader := &cinput{
options: opts,
reader: csv.NewReader(normalizedReader),
}
reader.stats.BytesScanned = tempBytesScanned
reader.stats.BytesProcessed = 0
reader.stats.BytesReturned = 0
reader.firstRow = nil
reader.reader.FieldsPerRecord = -1
if reader.options.FieldDelimiter != "" {
reader.reader.Comma = rune(reader.options.FieldDelimiter[0])
}
if reader.options.Comments != "" {
reader.reader.Comment = rune(reader.options.Comments[0])
}
// QuoteCharacter - " (defaulted currently)
reader.reader.LazyQuotes = true
if err := reader.readHeader(); err != nil {
return nil, err
}
return reader, nil
}
// Replace the spaces in columnnames with underscores
func cleanHeader(columns []string) []string {
for i := 0; i < len(columns); i++ {
columns[i] = strings.Replace(columns[i], " ", "_", -1)
}
return columns
}
// readHeader reads the header into the header variable if the header is present
// as the first row of the csv
func (reader *cinput) readHeader() error {
var readErr error
if reader.options.HasHeader {
reader.firstRow, readErr = reader.reader.Read()
if readErr != nil {
return format.ErrCSVParsingError
}
reader.header = cleanHeader(reader.firstRow)
reader.firstRow = nil
reader.minOutputLength = len(reader.header)
} else {
reader.firstRow, readErr = reader.reader.Read()
reader.header = make([]string, len(reader.firstRow))
for i := 0; i < reader.minOutputLength; i++ {
reader.header[i] = strconv.Itoa(i)
}
}
return nil
}
// Progress - return true if progress was requested.
func (reader *cinput) Progress() bool {
return reader.options.Progress
}
// UpdateBytesProcessed - populates the bytes Processed
func (reader *cinput) UpdateBytesProcessed(record map[string]interface{}) {
// Convert map to slice of values.
values := []string{}
for _, value := range record {
values = append(values, value.(string))
}
reader.stats.BytesProcessed += int64(len(values))
}
// Read the file and returns map[string]interface{}
func (reader *cinput) Read() (map[string]interface{}, error) {
record := make(map[string]interface{})
dec := reader.readRecord()
if dec != nil {
if reader.options.HasHeader {
columns := reader.header
for i, value := range dec {
record[columns[i]] = value
}
} else {
for i, value := range dec {
record["_"+strconv.Itoa(i)] = value
}
}
return record, nil
}
return nil, nil
}
// OutputFieldDelimiter - returns the delimiter specified in input request
func (reader *cinput) OutputFieldDelimiter() string {
return reader.options.OutputFieldDelimiter
}
// HasHeader - returns true or false depending upon the header.
func (reader *cinput) HasHeader() bool {
return reader.options.HasHeader
}
// Expression - return the Select Expression for
func (reader *cinput) Expression() string {
return reader.options.Expression
}
// UpdateBytesReturned - updates the Bytes returned for
func (reader *cinput) UpdateBytesReturned(size int64) {
reader.stats.BytesReturned += size
}
// Header returns the header of the reader. Either the first row if a header
// set in the options, or c#, where # is the column number, starting with 0.
func (reader *cinput) Header() []string {
return reader.header
}
// readRecord reads a single record from the stream and it always returns successfully.
// If the record is empty, an empty []string is returned.
// Record expand to match the current row size, adding blank fields as needed.
// Records never return less then the number of fields in the first row.
// Returns nil on EOF
// In the event of a parse error due to an invalid record, it is logged, and
// an empty []string is returned with the number of fields in the first row,
// as if the record were empty.
//
// In general, this is a very tolerant of problems reader.
func (reader *cinput) readRecord() []string {
var row []string
var fileErr error
if reader.firstRow != nil {
row = reader.firstRow
reader.firstRow = nil
return row
}
row, fileErr = reader.reader.Read()
emptysToAppend := reader.minOutputLength - len(row)
if fileErr == io.EOF || fileErr == io.ErrClosedPipe {
return nil
} else if _, ok := fileErr.(*csv.ParseError); ok {
emptysToAppend = reader.minOutputLength
}
if emptysToAppend > 0 {
for counter := 0; counter < emptysToAppend; counter++ {
row = append(row, "")
}
}
return row
}
// CreateStatXML is the function which does the marshaling from the stat
// structs into XML so that the progress and stat message can be sent
func (reader *cinput) CreateStatXML() (string, error) {
if reader.options.Compressed == "NONE" {
reader.stats.BytesProcessed = reader.options.StreamSize
reader.stats.BytesScanned = reader.stats.BytesProcessed
}
out, err := xml.Marshal(&format.Stats{
BytesScanned: reader.stats.BytesScanned,
BytesProcessed: reader.stats.BytesProcessed,
BytesReturned: reader.stats.BytesReturned,
})
if err != nil {
return "", err
}
return xml.Header + string(out), nil
}
// CreateProgressXML is the function which does the marshaling from the progress
// structs into XML so that the progress and stat message can be sent
func (reader *cinput) CreateProgressXML() (string, error) {
if reader.options.HasHeader {
reader.stats.BytesProcessed += format.ProcessSize(reader.header)
}
if reader.options.Compressed == "NONE" {
reader.stats.BytesScanned = reader.stats.BytesProcessed
}
out, err := xml.Marshal(&format.Progress{
BytesScanned: reader.stats.BytesScanned,
BytesProcessed: reader.stats.BytesProcessed,
BytesReturned: reader.stats.BytesReturned,
})
if err != nil {
return "", err
}
return xml.Header + string(out), nil
}
// Type - return the data format type {
func (reader *cinput) Type() format.Type {
return format.CSV
}
// ColNameErrs is a function which makes sure that the headers are requested are
// present in the file otherwise it throws an error.
func (reader *cinput) ColNameErrs(columnNames []string) error {
for i := 0; i < len(columnNames); i++ {
if columnNames[i] == "" {
continue
}
if !format.IsInt(columnNames[i]) && !reader.options.HeaderOpt {
return format.ErrInvalidColumnIndex
}
if format.IsInt(columnNames[i]) {
tempInt, _ := strconv.Atoi(columnNames[i])
if tempInt > len(reader.Header()) || tempInt == 0 {
return format.ErrInvalidColumnIndex
}
} else {
if reader.options.HeaderOpt && !format.StringInSlice(columnNames[i], reader.Header()) {
return format.ErrParseInvalidPathComponent
}
}
}
return nil
}

View File

@@ -0,0 +1,38 @@
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package format
import "errors"
// ErrTruncatedInput is an error if the object is not compressed properly and an
// error occurs during decompression.
var ErrTruncatedInput = errors.New("Object decompression failed. Check that the object is properly compressed using the format specified in the request")
// ErrCSVParsingError is an error if the CSV presents an error while being
// parsed.
var ErrCSVParsingError = errors.New("Encountered an Error parsing the CSV file. Check the file and try again")
// ErrInvalidColumnIndex is an error if you provide a column index which is not
// valid.
var ErrInvalidColumnIndex = errors.New("Column index in the SQL expression is invalid")
// ErrParseInvalidPathComponent is an error that occurs if there is an invalid
// path component.
var ErrParseInvalidPathComponent = errors.New("The SQL expression contains an invalid path component")
// ErrJSONParsingError is an error if while parsing the JSON an error arises.
var ErrJSONParsingError = errors.New("Encountered an error parsing the JSON file. Check the file and try again")

View File

@@ -0,0 +1,50 @@
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package format
import "strconv"
// IsInt - returns a true or false, whether a string can
// be represented as an int.
func IsInt(s string) bool {
_, err := strconv.Atoi(s)
return err == nil
}
// StringInSlice - this function finds whether a string is in a list
func StringInSlice(x string, list []string) bool {
for _, y := range list {
if x == y {
return true
}
}
return false
}
// ProcessSize - this function processes size so that we can calculate bytes BytesProcessed.
func ProcessSize(myrecord []string) int64 {
if len(myrecord) > 0 {
var size int64
size = int64(len(myrecord)-1) + 1
for i := range myrecord {
size += int64(len(myrecord[i]))
}
return size
}
return 0
}

View File

@@ -0,0 +1,200 @@
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package json
import (
"compress/bzip2"
"encoding/json"
"encoding/xml"
"io"
jsoniter "github.com/json-iterator/go"
gzip "github.com/klauspost/pgzip"
"github.com/minio/minio/pkg/s3select/format"
)
// Options options are passed to the underlying encoding/json reader.
type Options struct {
// Name of the table that is used for querying
Name string
// ReadFrom is where the data will be read from.
ReadFrom io.Reader
// If true then we need to add gzip or bzip reader.
// to extract the csv.
Compressed string
// SQL expression meant to be evaluated.
Expression string
// What the outputted will be delimited by .
RecordDelimiter string
// Size of incoming object
StreamSize int64
// True if Type is DOCUMENTS
Type bool
// Progress enabled, enable/disable progress messages.
Progress bool
}
// jinput represents a record producing input from a formatted file or pipe.
type jinput struct {
options *Options
reader *jsoniter.Decoder
firstRow []string
header []string
minOutputLength int
stats struct {
BytesScanned int64
BytesReturned int64
BytesProcessed int64
}
}
// New sets up a new, the first Json is read when this is run.
// If there is a problem with reading the first Json, the error is returned.
// Otherwise, the returned reader can be reliably consumed with jsonRead()
// until jsonRead() returns nil.
func New(opts *Options) (format.Select, error) {
myReader := opts.ReadFrom
var tempBytesScanned int64
tempBytesScanned = 0
switch opts.Compressed {
case "GZIP":
tempBytesScanned = opts.StreamSize
var err error
if myReader, err = gzip.NewReader(opts.ReadFrom); err != nil {
return nil, format.ErrTruncatedInput
}
case "BZIP2":
tempBytesScanned = opts.StreamSize
myReader = bzip2.NewReader(opts.ReadFrom)
}
reader := &jinput{
options: opts,
reader: jsoniter.NewDecoder(myReader),
}
reader.stats.BytesScanned = tempBytesScanned
reader.stats.BytesProcessed = 0
reader.stats.BytesReturned = 0
return reader, nil
}
// Progress - return true if progress was requested.
func (reader *jinput) Progress() bool {
return reader.options.Progress
}
// UpdateBytesProcessed - populates the bytes Processed
func (reader *jinput) UpdateBytesProcessed(record map[string]interface{}) {
out, _ := json.Marshal(record)
reader.stats.BytesProcessed += int64(len(out))
}
// Read the file and returns map[string]interface{}
func (reader *jinput) Read() (map[string]interface{}, error) {
dec := reader.reader
var record interface{}
for {
err := dec.Decode(&record)
if err == io.EOF || err == io.ErrClosedPipe {
break
}
if err != nil {
return nil, format.ErrJSONParsingError
}
return record.(map[string]interface{}), nil
}
return nil, nil
}
// OutputFieldDelimiter - returns the delimiter specified in input request
func (reader *jinput) OutputFieldDelimiter() string {
return ","
}
// HasHeader - returns true or false depending upon the header.
func (reader *jinput) HasHeader() bool {
return false
}
// Expression - return the Select Expression for
func (reader *jinput) Expression() string {
return reader.options.Expression
}
// UpdateBytesReturned - updates the Bytes returned for
func (reader *jinput) UpdateBytesReturned(size int64) {
reader.stats.BytesReturned += size
}
// Header returns a nil in case of
func (reader *jinput) Header() []string {
return nil
}
// CreateStatXML is the function which does the marshaling from the stat
// structs into XML so that the progress and stat message can be sent
func (reader *jinput) CreateStatXML() (string, error) {
if reader.options.Compressed == "NONE" {
reader.stats.BytesProcessed = reader.options.StreamSize
reader.stats.BytesScanned = reader.stats.BytesProcessed
}
out, err := xml.Marshal(&format.Stats{
BytesScanned: reader.stats.BytesScanned,
BytesProcessed: reader.stats.BytesProcessed,
BytesReturned: reader.stats.BytesReturned,
})
if err != nil {
return "", err
}
return xml.Header + string(out), nil
}
// CreateProgressXML is the function which does the marshaling from the progress
// structs into XML so that the progress and stat message can be sent
func (reader *jinput) CreateProgressXML() (string, error) {
if !(reader.options.Compressed != "NONE") {
reader.stats.BytesScanned = reader.stats.BytesProcessed
}
out, err := xml.Marshal(&format.Progress{
BytesScanned: reader.stats.BytesScanned,
BytesProcessed: reader.stats.BytesProcessed,
BytesReturned: reader.stats.BytesReturned,
})
if err != nil {
return "", err
}
return xml.Header + string(out), nil
}
// Type - return the data format type {
func (reader *jinput) Type() format.Type {
return format.JSON
}
// ColNameErrs - this is a dummy function for JSON input type.
func (reader *jinput) ColNameErrs(columnNames []string) error {
return nil
}

View File

@@ -0,0 +1,63 @@
/*
* Minio Cloud Storage, (C) 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package format
import "encoding/xml"
// Select Interface helper methods, implementing features needed for
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html
type Select interface {
Type() Type
Read() (map[string]interface{}, error)
Header() []string
HasHeader() bool
OutputFieldDelimiter() string
UpdateBytesProcessed(record map[string]interface{})
Expression() string
UpdateBytesReturned(int64)
CreateStatXML() (string, error)
CreateProgressXML() (string, error)
ColNameErrs(columnNames []string) error
Progress() bool
}
// Progress represents a struct that represents the format for XML of the
// progress messages
type Progress struct {
XMLName xml.Name `xml:"Progress" json:"-"`
BytesScanned int64 `xml:"BytesScanned"`
BytesProcessed int64 `xml:"BytesProcessed"`
BytesReturned int64 `xml:"BytesReturned"`
}
// Stats represents a struct that represents the format for XML of the stat
// messages
type Stats struct {
XMLName xml.Name `xml:"Stats" json:"-"`
BytesScanned int64 `xml:"BytesScanned"`
BytesProcessed int64 `xml:"BytesProcessed"`
BytesReturned int64 `xml:"BytesReturned"`
}
// Type different types of support data format types.
type Type string
// Different data format types.
const (
JSON Type = "json"
CSV Type = "csv"
)

View File

@@ -17,94 +17,58 @@
package s3select
import (
"encoding/json"
"fmt"
"math"
"reflect"
"strconv"
"strings"
"github.com/minio/minio/pkg/s3select/format"
"github.com/tidwall/gjson"
"github.com/xwb1989/sqlparser"
)
// MaxExpressionLength - 256KiB
const MaxExpressionLength = 256 * 1024
// This function processes size so that we can calculate bytes BytesProcessed.
func processSize(myrecord []string) int64 {
if len(myrecord) > 0 {
var size int64
size = int64(len(myrecord)-1) + 1
for i := range myrecord {
size += int64(len(myrecord[i]))
}
return size
}
return 0
}
// This function finds whether a string is in a list
func stringInSlice(x string, list []string) bool {
for _, y := range list {
if x == y {
return true
}
}
return false
}
// This function returns the index of a string in a list
func stringIndex(a string, list []string) int {
for i, v := range list {
if v == a {
return i
}
}
return -1
}
// Returns a true or false, whether a string can be represented as an int.
func representsInt(s string) bool {
_, err := strconv.Atoi(s)
return err == nil
}
// The function below processes the where clause into an acutal boolean given a
// row
func matchesMyWhereClause(row []string, columnNames map[string]int, alias string, whereClause interface{}) (bool, error) {
// This particular logic deals with the details of casting, e.g if we have to
// cast a column of string numbers into int's for comparison.
// matchesMyWhereClause takes map[string]interfaces{} , process the where clause and returns true if the row suffices
func matchesMyWhereClause(record map[string]interface{}, alias string, whereClause interface{}) (bool, error) {
var conversionColumn string
var operator string
var operand interface{}
if fmt.Sprintf("%v", whereClause) == "false" {
return false, nil
}
out, err := json.Marshal(record)
if err != nil {
return false, ErrExternalEvalException
}
switch expr := whereClause.(type) {
case *sqlparser.IsExpr:
return evaluateIsExpr(expr, row, columnNames, alias)
return evaluateIsExpr(expr, string(out), alias)
case *sqlparser.RangeCond:
operator = expr.Operator
if operator != "between" && operator != "not between" {
return false, ErrUnsupportedSQLOperation
}
if operator == "not between" {
myResult, err := evaluateBetween(expr, alias, row, columnNames)
result, err := evaluateBetween(expr, alias, string(out))
if err != nil {
return false, err
}
return !myResult, nil
return !result, nil
}
myResult, err := evaluateBetween(expr, alias, row, columnNames)
result, err := evaluateBetween(expr, alias, string(out))
if err != nil {
return false, err
}
return myResult, nil
return result, nil
case *sqlparser.ComparisonExpr:
operator = expr.Operator
switch right := expr.Right.(type) {
case *sqlparser.FuncExpr:
operand = evaluateFuncExpr(right, "", row, columnNames)
operand = evaluateFuncExpr(right, "", string(out))
case *sqlparser.SQLVal:
var err error
operand, err = evaluateParserType(right)
@@ -116,29 +80,22 @@ func matchesMyWhereClause(row []string, columnNames map[string]int, alias string
myVal = ""
switch left := expr.Left.(type) {
case *sqlparser.FuncExpr:
myVal = evaluateFuncExpr(left, "", row, columnNames)
myVal = evaluateFuncExpr(left, "", string(out))
conversionColumn = ""
case *sqlparser.ColName:
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
}
if representsInt(conversionColumn) {
intCol, err := strconv.Atoi(conversionColumn)
if err != nil {
return false, err
}
// Subtract 1 out because the index starts at 1 for Amazon instead of 0.
return evaluateOperator(row[intCol-1], operator, operand)
conversionColumn = left.Name.CompliantName()
}
if myVal != "" {
return evaluateOperator(myVal, operator, operand)
}
return evaluateOperator(row[columnNames[conversionColumn]], operator, operand)
return evaluateOperator(jsonValue(conversionColumn, string(out)), operator, operand)
case *sqlparser.AndExpr:
var leftVal bool
var rightVal bool
switch left := expr.Left.(type) {
case *sqlparser.ComparisonExpr:
temp, err := matchesMyWhereClause(row, columnNames, alias, left)
temp, err := matchesMyWhereClause(record, alias, left)
if err != nil {
return false, err
}
@@ -146,7 +103,7 @@ func matchesMyWhereClause(row []string, columnNames map[string]int, alias string
}
switch right := expr.Right.(type) {
case *sqlparser.ComparisonExpr:
temp, err := matchesMyWhereClause(row, columnNames, alias, right)
temp, err := matchesMyWhereClause(record, alias, right)
if err != nil {
return false, err
}
@@ -158,18 +115,18 @@ func matchesMyWhereClause(row []string, columnNames map[string]int, alias string
var rightVal bool
switch left := expr.Left.(type) {
case *sqlparser.ComparisonExpr:
leftVal, _ = matchesMyWhereClause(row, columnNames, alias, left)
leftVal, _ = matchesMyWhereClause(record, alias, left)
}
switch right := expr.Right.(type) {
case *sqlparser.ComparisonExpr:
rightVal, _ = matchesMyWhereClause(row, columnNames, alias, right)
rightVal, _ = matchesMyWhereClause(record, alias, right)
}
return (rightVal || leftVal), nil
}
return true, nil
}
func applyStrFunc(rawArg string, funcName string) string {
switch strings.ToUpper(funcName) {
case "TRIM":
@@ -192,6 +149,135 @@ func applyStrFunc(rawArg string, funcName string) string {
}
// evaluateBetween is a function which evaluates a Between Clause.
func evaluateBetween(betweenExpr *sqlparser.RangeCond, alias string, record string) (bool, error) {
var colToVal interface{}
var colFromVal interface{}
var conversionColumn string
var funcName string
switch colTo := betweenExpr.To.(type) {
case sqlparser.Expr:
switch colToMyVal := colTo.(type) {
case *sqlparser.FuncExpr:
colToVal = stringOps(colToMyVal, record, "")
case *sqlparser.SQLVal:
var err error
colToVal, err = evaluateParserType(colToMyVal)
if err != nil {
return false, err
}
}
}
switch colFrom := betweenExpr.From.(type) {
case sqlparser.Expr:
switch colFromMyVal := colFrom.(type) {
case *sqlparser.FuncExpr:
colFromVal = stringOps(colFromMyVal, record, "")
case *sqlparser.SQLVal:
var err error
colFromVal, err = evaluateParserType(colFromMyVal)
if err != nil {
return false, err
}
}
}
var myFuncVal string
switch left := betweenExpr.Left.(type) {
case *sqlparser.FuncExpr:
myFuncVal = evaluateFuncExpr(left, "", record)
conversionColumn = ""
case *sqlparser.ColName:
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
}
toGreater, err := evaluateOperator(fmt.Sprintf("%v", colToVal), ">", colFromVal)
if err != nil {
return false, err
}
if toGreater {
return evalBetweenGreater(conversionColumn, record, funcName, colFromVal, colToVal, myFuncVal)
}
return evalBetweenLess(conversionColumn, record, funcName, colFromVal, colToVal, myFuncVal)
}
// evalBetweenGreater is a function which evaluates the between given that the
// TO is > than the FROM.
func evalBetweenGreater(conversionColumn string, record string, funcName string, colFromVal interface{}, colToVal interface{}, myColVal string) (bool, error) {
if format.IsInt(conversionColumn) {
myVal, err := evaluateOperator(jsonValue("_"+conversionColumn, record), ">=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(jsonValue("_"+conversionColumn, record)))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
if myColVal != "" {
myVal, err := evaluateOperator(myColVal, ">=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(myColVal))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
myVal, err := evaluateOperator(jsonValue(conversionColumn, record), ">=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(jsonValue(conversionColumn, record)))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
// evalBetweenLess is a function which evaluates the between given that the
// FROM is > than the TO.
func evalBetweenLess(conversionColumn string, record string, funcName string, colFromVal interface{}, colToVal interface{}, myColVal string) (bool, error) {
if format.IsInt(conversionColumn) {
// Subtract 1 out because the index starts at 1 for Amazon instead of 0.
myVal, err := evaluateOperator(jsonValue("_"+conversionColumn, record), "<=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(jsonValue("_"+conversionColumn, record)))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
if myColVal != "" {
myVal, err := evaluateOperator(myColVal, "<=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(myColVal))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
myVal, err := evaluateOperator(jsonValue(conversionColumn, record), "<=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(jsonValue(conversionColumn, record)))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
// This is a really important function it actually evaluates the boolean
// statement and therefore actually returns a bool, it functions as the lowest
// level of the state machine.
@@ -432,151 +518,16 @@ func cleanCol(myCol string, alias string) string {
return myCol
}
// evaluateBetween is a function which evaluates a Between Clause.
func evaluateBetween(betweenExpr *sqlparser.RangeCond, alias string, record []string, columnNames map[string]int) (bool, error) {
var colToVal interface{}
var colFromVal interface{}
var conversionColumn string
var funcName string
switch colTo := betweenExpr.To.(type) {
case sqlparser.Expr:
switch colToMyVal := colTo.(type) {
case *sqlparser.FuncExpr:
var temp string
temp = stringOps(colToMyVal, record, "", columnNames)
colToVal = []byte(temp)
case *sqlparser.SQLVal:
var err error
colToVal, err = evaluateParserType(colToMyVal)
if err != nil {
return false, err
}
}
}
switch colFrom := betweenExpr.From.(type) {
case sqlparser.Expr:
switch colFromMyVal := colFrom.(type) {
case *sqlparser.FuncExpr:
colFromVal = stringOps(colFromMyVal, record, "", columnNames)
case *sqlparser.SQLVal:
var err error
colFromVal, err = evaluateParserType(colFromMyVal)
if err != nil {
return false, err
}
}
}
var myFuncVal string
myFuncVal = ""
switch left := betweenExpr.Left.(type) {
case *sqlparser.FuncExpr:
myFuncVal = evaluateFuncExpr(left, "", record, columnNames)
conversionColumn = ""
case *sqlparser.ColName:
conversionColumn = cleanCol(left.Name.CompliantName(), alias)
}
toGreater, err := evaluateOperator(fmt.Sprintf("%v", colToVal), ">", colFromVal)
if err != nil {
return false, err
}
if toGreater {
return evalBetweenGreater(conversionColumn, record, funcName, columnNames, colFromVal, colToVal, myFuncVal)
}
return evalBetweenLess(conversionColumn, record, funcName, columnNames, colFromVal, colToVal, myFuncVal)
}
// evalBetweenLess is a function which evaluates the between given that the
// FROM is > than the TO.
func evalBetweenLess(conversionColumn string, record []string, funcName string, columnNames map[string]int, colFromVal interface{}, colToVal interface{}, myCoalVal string) (bool, error) {
if representsInt(conversionColumn) {
myIndex, _ := strconv.Atoi(conversionColumn)
// Subtract 1 out because the index starts at 1 for Amazon instead of 0.
myVal, err := evaluateOperator(record[myIndex-1], "<=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(record[myIndex-1]))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
if myCoalVal != "" {
myVal, err := evaluateOperator(myCoalVal, "<=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(myCoalVal))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
myVal, err := evaluateOperator(record[columnNames[conversionColumn]], "<=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), "<=", checkStringType(record[columnNames[conversionColumn]]))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
// evalBetweenGreater is a function which evaluates the between given that the
// TO is > than the FROM.
func evalBetweenGreater(conversionColumn string, record []string, funcName string, columnNames map[string]int, colFromVal interface{}, colToVal interface{}, myCoalVal string) (bool, error) {
if representsInt(conversionColumn) {
myIndex, _ := strconv.Atoi(conversionColumn)
myVal, err := evaluateOperator(record[myIndex-1], ">=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(record[myIndex-1]))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
if myCoalVal != "" {
myVal, err := evaluateOperator(myCoalVal, ">=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(myCoalVal))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
myVal, err := evaluateOperator(record[columnNames[conversionColumn]], ">=", colFromVal)
if err != nil {
return false, err
}
var myOtherVal bool
myOtherVal, err = evaluateOperator(fmt.Sprintf("%v", colToVal), ">=", checkStringType(record[columnNames[conversionColumn]]))
if err != nil {
return false, err
}
return (myVal && myOtherVal), nil
}
// whereClauseNameErrs is a function which returns an error if there is a column
// in the where clause which does not exist.
func (reader *Input) whereClauseNameErrs(whereClause interface{}, alias string) error {
func whereClauseNameErrs(whereClause interface{}, alias string, f format.Select) error {
var conversionColumn string
switch expr := whereClause.(type) {
// case for checking errors within a clause of the form "col_name is ..."
case *sqlparser.IsExpr:
switch myCol := expr.Expr.(type) {
case *sqlparser.FuncExpr:
if err := reader.evaluateFuncErr(myCol); err != nil {
if err := evaluateFuncErr(myCol, f); err != nil {
return err
}
case *sqlparser.ColName:
@@ -585,7 +536,7 @@ func (reader *Input) whereClauseNameErrs(whereClause interface{}, alias string)
case *sqlparser.RangeCond:
switch left := expr.Left.(type) {
case *sqlparser.FuncExpr:
if err := reader.evaluateFuncErr(left); err != nil {
if err := evaluateFuncErr(left, f); err != nil {
return err
}
case *sqlparser.ColName:
@@ -594,7 +545,7 @@ func (reader *Input) whereClauseNameErrs(whereClause interface{}, alias string)
case *sqlparser.ComparisonExpr:
switch left := expr.Left.(type) {
case *sqlparser.FuncExpr:
if err := reader.evaluateFuncErr(left); err != nil {
if err := evaluateFuncErr(left, f); err != nil {
return err
}
case *sqlparser.ColName:
@@ -603,54 +554,30 @@ func (reader *Input) whereClauseNameErrs(whereClause interface{}, alias string)
case *sqlparser.AndExpr:
switch left := expr.Left.(type) {
case *sqlparser.ComparisonExpr:
return reader.whereClauseNameErrs(left, alias)
return whereClauseNameErrs(left, alias, f)
}
switch right := expr.Right.(type) {
case *sqlparser.ComparisonExpr:
return reader.whereClauseNameErrs(right, alias)
return whereClauseNameErrs(right, alias, f)
}
case *sqlparser.OrExpr:
switch left := expr.Left.(type) {
case *sqlparser.ComparisonExpr:
return reader.whereClauseNameErrs(left, alias)
return whereClauseNameErrs(left, alias, f)
}
switch right := expr.Right.(type) {
case *sqlparser.ComparisonExpr:
return reader.whereClauseNameErrs(right, alias)
return whereClauseNameErrs(right, alias, f)
}
}
if conversionColumn != "" {
return reader.colNameErrs([]string{conversionColumn})
}
return nil
}
// colNameErrs is a function which makes sure that the headers are requested are
// present in the file otherwise it throws an error.
func (reader *Input) colNameErrs(columnNames []string) error {
for i := 0; i < len(columnNames); i++ {
if columnNames[i] == "" {
continue
}
if !representsInt(columnNames[i]) && !reader.options.HeaderOpt {
return ErrInvalidColumnIndex
}
if representsInt(columnNames[i]) {
tempInt, _ := strconv.Atoi(columnNames[i])
if tempInt > len(reader.Header()) || tempInt == 0 {
return ErrInvalidColumnIndex
}
} else {
if reader.options.HeaderOpt && !stringInSlice(columnNames[i], reader.Header()) {
return ErrMissingHeaders
}
}
return f.ColNameErrs([]string{conversionColumn})
}
return nil
}
// aggFuncToStr converts an array of floats into a properly formatted string.
func (reader *Input) aggFuncToStr(aggVals []float64) string {
func aggFuncToStr(aggVals []float64, f format.Select) string {
// Define a number formatting function
numToStr := func(f float64) string {
if f == math.Trunc(f) {
@@ -666,7 +593,7 @@ func (reader *Input) aggFuncToStr(aggVals []float64) string {
}
// Intersperse field delimiter
return strings.Join(vals, reader.options.OutputFieldDelimiter)
return strings.Join(vals, f.OutputFieldDelimiter())
}
// checkForDuplicates ensures we do not have an ambigious column name.
@@ -714,18 +641,18 @@ func evaluateParserType(col *sqlparser.SQLVal) (interface{}, error) {
// parseErrs is the function which handles all the errors that could occur
// through use of function arguments such as column names in NULLIF
func (reader *Input) parseErrs(columnNames []string, whereClause interface{}, alias string, myFuncs *SelectFuncs) error {
func parseErrs(columnNames []string, whereClause interface{}, alias string, myFuncs *SelectFuncs, f format.Select) error {
// Below code cleans up column names.
reader.processColumnNames(columnNames, alias)
processColumnNames(columnNames, alias, f)
if columnNames[0] != "*" {
if err := reader.colNameErrs(columnNames); err != nil {
if err := f.ColNameErrs(columnNames); err != nil {
return err
}
}
// Below code ensures the whereClause has no errors.
if whereClause != nil {
tempClause := whereClause
if err := reader.whereClauseNameErrs(tempClause, alias); err != nil {
if err := whereClauseNameErrs(tempClause, alias, f); err != nil {
return err
}
}
@@ -733,9 +660,16 @@ func (reader *Input) parseErrs(columnNames []string, whereClause interface{}, al
if myFuncs.funcExpr[i] == nil {
continue
}
if err := reader.evaluateFuncErr(myFuncs.funcExpr[i]); err != nil {
if err := evaluateFuncErr(myFuncs.funcExpr[i], f); err != nil {
return err
}
}
return nil
}
// It return the value corresponding to the tag in Json .
// Input is the Key and row is the JSON string
func jsonValue(input string, row string) string {
value := gjson.Get(row, input)
return value.String()
}

View File

@@ -18,18 +18,14 @@ package s3select
import (
"bytes"
"compress/bzip2"
"encoding/csv"
"encoding/xml"
"io"
"strconv"
"net/http"
"strings"
"time"
"net/http"
gzip "github.com/klauspost/pgzip"
"github.com/minio/minio/pkg/ioutil"
"github.com/minio/minio/pkg/s3select/format"
"github.com/minio/minio/pkg/s3select/format/csv"
"github.com/minio/minio/pkg/s3select/format/json"
)
const (
@@ -40,245 +36,16 @@ const (
continuationTime time.Duration = 5 * time.Second
)
// progress represents a struct that represents the format for XML of the
// progress messages
type progress struct {
XMLName xml.Name `xml:"Progress" json:"-"`
BytesScanned int64 `xml:"BytesScanned"`
BytesProcessed int64 `xml:"BytesProcessed"`
BytesReturned int64 `xml:"BytesReturned"`
}
// stats represents a struct that represents the format for XML of the stat
// messages
type stats struct {
XMLName xml.Name `xml:"Stats" json:"-"`
BytesScanned int64 `xml:"BytesScanned"`
BytesProcessed int64 `xml:"BytesProcessed"`
BytesReturned int64 `xml:"BytesReturned"`
}
// StatInfo is a struct that represents the
type statInfo struct {
BytesScanned int64
BytesReturned int64
BytesProcessed int64
}
// Input represents a record producing input from a formatted file or pipe.
type Input struct {
options *Options
reader *csv.Reader
firstRow []string
header []string
minOutputLength int
stats *statInfo
}
// Options options are passed to the underlying encoding/csv reader.
type Options struct {
// HasHeader when true, will treat the first row as a header row.
HasHeader bool
// RecordDelimiter is the string that records are delimited by.
RecordDelimiter string
// FieldDelimiter is the string that fields are delimited by.
FieldDelimiter string
// Comments is the string the first character of a line of
// text matches the comment character.
Comments string
// Name of the table that is used for querying
Name string
// ReadFrom is where the data will be read from.
ReadFrom io.Reader
// If true then we need to add gzip or bzip reader.
// to extract the csv.
Compressed string
// SQL expression meant to be evaluated.
Expression string
// What the outputted CSV will be delimited by .
OutputFieldDelimiter string
// Size of incoming object
StreamSize int64
// Whether Header is "USE" or another
HeaderOpt bool
// Progress enabled, enable/disable progress messages.
Progress bool
}
// NewInput sets up a new Input, the first row is read when this is run.
// If there is a problem with reading the first row, the error is returned.
// Otherwise, the returned reader can be reliably consumed with ReadRecord()
// until ReadRecord() returns nil.
func NewInput(opts *Options) (*Input, error) {
myReader := opts.ReadFrom
var tempBytesScanned int64
tempBytesScanned = 0
switch opts.Compressed {
case "GZIP":
tempBytesScanned = opts.StreamSize
var err error
if myReader, err = gzip.NewReader(opts.ReadFrom); err != nil {
return nil, ErrTruncatedInput
}
case "BZIP2":
tempBytesScanned = opts.StreamSize
myReader = bzip2.NewReader(opts.ReadFrom)
}
// DelimitedReader treats custom record delimiter like `\r\n`,`\r`,`ab` etc and replaces it with `\n`.
normalizedReader := ioutil.NewDelimitedReader(myReader, []rune(opts.RecordDelimiter))
progress := &statInfo{
BytesScanned: tempBytesScanned,
BytesProcessed: 0,
BytesReturned: 0,
}
reader := &Input{
options: opts,
reader: csv.NewReader(normalizedReader),
stats: progress,
}
reader.firstRow = nil
reader.reader.FieldsPerRecord = -1
if reader.options.FieldDelimiter != "" {
reader.reader.Comma = rune(reader.options.FieldDelimiter[0])
}
if reader.options.Comments != "" {
reader.reader.Comment = rune(reader.options.Comments[0])
}
// QuoteCharacter - " (defaulted currently)
reader.reader.LazyQuotes = true
if err := reader.readHeader(); err != nil {
return nil, err
}
return reader, nil
}
// ReadRecord reads a single record from the . Always returns successfully.
// If the record is empty, an empty []string is returned.
// Record expand to match the current row size, adding blank fields as needed.
// Records never return less then the number of fields in the first row.
// Returns nil on EOF
// In the event of a parse error due to an invalid record, it is logged, and
// an empty []string is returned with the number of fields in the first row,
// as if the record were empty.
//
// In general, this is a very tolerant of problems reader.
func (reader *Input) ReadRecord() []string {
var row []string
var fileErr error
if reader.firstRow != nil {
row = reader.firstRow
reader.firstRow = nil
return row
}
row, fileErr = reader.reader.Read()
emptysToAppend := reader.minOutputLength - len(row)
if fileErr == io.EOF || fileErr == io.ErrClosedPipe {
return nil
} else if _, ok := fileErr.(*csv.ParseError); ok {
emptysToAppend = reader.minOutputLength
}
if emptysToAppend > 0 {
for counter := 0; counter < emptysToAppend; counter++ {
row = append(row, "")
}
}
return row
}
// readHeader reads the header into the header variable if the header is present
// as the first row of the csv
func (reader *Input) readHeader() error {
var readErr error
if reader.options.HasHeader {
reader.firstRow, readErr = reader.reader.Read()
if readErr != nil {
return ErrCSVParsingError
}
reader.header = cleanHeader(reader.firstRow)
reader.firstRow = nil
reader.minOutputLength = len(reader.header)
} else {
reader.firstRow, readErr = reader.reader.Read()
reader.header = make([]string, len(reader.firstRow))
for i := 0; i < reader.minOutputLength; i++ {
reader.header[i] = strconv.Itoa(i)
}
}
return nil
}
// Replace the spaces in columnnames with underscores
func cleanHeader(columns []string) []string {
for i := 0; i < len(columns); i++ {
columns[i] = strings.Replace(columns[i], " ", "_", -1)
}
return columns
}
// createStatXML is the function which does the marshaling from the stat
// structs into XML so that the progress and stat message can be sent
func (reader *Input) createStatXML() (string, error) {
if reader.options.Compressed == "NONE" {
reader.stats.BytesProcessed = reader.options.StreamSize
reader.stats.BytesScanned = reader.stats.BytesProcessed
}
out, err := xml.Marshal(&stats{
BytesScanned: reader.stats.BytesScanned,
BytesProcessed: reader.stats.BytesProcessed,
BytesReturned: reader.stats.BytesReturned,
})
if err != nil {
return "", err
}
return xml.Header + string(out), nil
}
// createProgressXML is the function which does the marshaling from the progress structs into XML so that the progress and stat message can be sent
func (reader *Input) createProgressXML() (string, error) {
if reader.options.HasHeader {
reader.stats.BytesProcessed += processSize(reader.header)
}
if reader.options.Compressed == "NONE" {
reader.stats.BytesScanned = reader.stats.BytesProcessed
}
out, err := xml.Marshal(&progress{
BytesScanned: reader.stats.BytesScanned,
BytesProcessed: reader.stats.BytesProcessed,
BytesReturned: reader.stats.BytesReturned,
})
if err != nil {
return "", err
}
return xml.Header + string(out), nil
}
// Header returns the header of the reader. Either the first row if a header
// set in the options, or c#, where # is the column number, starting with 0.
func (reader *Input) Header() []string {
return reader.header
// ParseSelectTokens tokenizes the select query into required Columns, Alias, limit value
// where clause, aggregate functions, myFunctions, error.
type ParseSelectTokens struct {
reqCols []string
alias string
myLimit int64
whereClause interface{}
aggFunctionNames []string
myFuncs *SelectFuncs
myErr error
}
// Row is a Struct for keeping track of key aspects of a row.
@@ -287,10 +54,58 @@ type Row struct {
err error
}
// This function replaces "",'' with `` for the select parser
func cleanExpr(expr string) string {
r := strings.NewReplacer("\"", "`", "'", "`")
return r.Replace(expr)
}
// New - initialize new select format
func New(gr io.Reader, size int64, req ObjectSelectRequest) (s3s format.Select, err error) {
// Initializating options for CSV
if req.InputSerialization.CSV != nil {
if req.OutputSerialization.CSV.FieldDelimiter == "" {
req.OutputSerialization.CSV.FieldDelimiter = ","
}
if req.InputSerialization.CSV.FileHeaderInfo == "" {
req.InputSerialization.CSV.FileHeaderInfo = CSVFileHeaderInfoNone
}
if req.InputSerialization.CSV.RecordDelimiter == "" {
req.InputSerialization.CSV.RecordDelimiter = "\n"
}
s3s, err = csv.New(&csv.Options{
HasHeader: req.InputSerialization.CSV.FileHeaderInfo != CSVFileHeaderInfoNone,
RecordDelimiter: req.InputSerialization.CSV.RecordDelimiter,
FieldDelimiter: req.InputSerialization.CSV.FieldDelimiter,
Comments: req.InputSerialization.CSV.Comments,
Name: "S3Object", // Default table name for all objects
ReadFrom: gr,
Compressed: string(req.InputSerialization.CompressionType),
Expression: cleanExpr(req.Expression),
OutputFieldDelimiter: req.OutputSerialization.CSV.FieldDelimiter,
StreamSize: size,
HeaderOpt: req.InputSerialization.CSV.FileHeaderInfo == CSVFileHeaderInfoUse,
Progress: req.RequestProgress.Enabled,
})
} else if req.InputSerialization.JSON != nil {
// Initializating options for JSON
s3s, err = json.New(&json.Options{
Name: "S3Object", // Default table name for all objects
ReadFrom: gr,
Compressed: string(req.InputSerialization.CompressionType),
Expression: cleanExpr(req.Expression),
StreamSize: size,
Type: req.InputSerialization.JSON.Type == JSONTypeDocument,
Progress: req.RequestProgress.Enabled,
})
}
return s3s, err
}
// Execute is the function where all the blocking occurs, It writes to the HTTP
// response writer in a streaming fashion so that the client can actively use
// the results before the query is finally finished executing. The
func (reader *Input) Execute(writer io.Writer) error {
func Execute(writer io.Writer, f format.Select) error {
myRow := make(chan *Row)
curBuf := bytes.NewBuffer(make([]byte, 1000000))
curBuf.Reset()
@@ -298,12 +113,14 @@ func (reader *Input) Execute(writer io.Writer) error {
continuationTimer := time.NewTimer(continuationTime)
defer progressTicker.Stop()
defer continuationTimer.Stop()
go reader.runSelectParser(reader.options.Expression, myRow)
go runSelectParser(f, myRow)
for {
select {
case row, ok := <-myRow:
if ok && row.err != nil {
errorMessage := reader.writeErrorMessage(row.err, curBuf)
errorMessage := writeErrorMessage(row.err, curBuf)
_, err := errorMessage.WriteTo(writer)
flusher, okFlush := writer.(http.Flusher)
if okFlush {
@@ -316,7 +133,7 @@ func (reader *Input) Execute(writer io.Writer) error {
close(myRow)
return nil
} else if ok {
message := reader.writeRecordMessage(row.record, curBuf)
message := writeRecordMessage(row.record, curBuf)
_, err := message.WriteTo(writer)
flusher, okFlush := writer.(http.Flusher)
if okFlush {
@@ -326,17 +143,17 @@ func (reader *Input) Execute(writer io.Writer) error {
return err
}
curBuf.Reset()
reader.stats.BytesReturned += int64(len(row.record))
f.UpdateBytesReturned(int64(len(row.record)))
if !continuationTimer.Stop() {
<-continuationTimer.C
}
continuationTimer.Reset(continuationTime)
} else if !ok {
statPayload, err := reader.createStatXML()
statPayload, err := f.CreateStatXML()
if err != nil {
return err
}
statMessage := reader.writeStatMessage(statPayload, curBuf)
statMessage := writeStatMessage(statPayload, curBuf)
_, err = statMessage.WriteTo(writer)
flusher, ok := writer.(http.Flusher)
if ok {
@@ -346,7 +163,7 @@ func (reader *Input) Execute(writer io.Writer) error {
return err
}
curBuf.Reset()
message := reader.writeEndMessage(curBuf)
message := writeEndMessage(curBuf)
_, err = message.WriteTo(writer)
flusher, ok = writer.(http.Flusher)
if ok {
@@ -360,12 +177,12 @@ func (reader *Input) Execute(writer io.Writer) error {
case <-progressTicker.C:
// Send progress messages only if requested by client.
if reader.options.Progress {
progressPayload, err := reader.createProgressXML()
if f.Progress() {
progressPayload, err := f.CreateProgressXML()
if err != nil {
return err
}
progressMessage := reader.writeProgressMessage(progressPayload, curBuf)
progressMessage := writeProgressMessage(progressPayload, curBuf)
_, err = progressMessage.WriteTo(writer)
flusher, ok := writer.(http.Flusher)
if ok {
@@ -377,7 +194,7 @@ func (reader *Input) Execute(writer io.Writer) error {
curBuf.Reset()
}
case <-continuationTimer.C:
message := reader.writeContinuationMessage(curBuf)
message := writeContinuationMessage(curBuf)
_, err := message.WriteTo(writer)
flusher, ok := writer.(http.Flusher)
if ok {

View File

@@ -282,7 +282,7 @@ func writeProgressHeader() []byte {
// writeRecordMessage is the function which constructs the binary message for a
// record message to be sent.
func (csvOutput *Input) writeRecordMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
func writeRecordMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
// The below are the specifications of the header for a "record" event
// 11 -event type - 7 - 7 "Records"
// 13 -content-type -7 -24 "application/octet-stream"
@@ -310,7 +310,7 @@ func (csvOutput *Input) writeRecordMessage(payload string, currentMessage *bytes
// writeContinuationMessage is the function which constructs the binary message
// for a continuation message to be sent.
func (csvOutput *Input) writeContinuationMessage(currentMessage *bytes.Buffer) *bytes.Buffer {
func writeContinuationMessage(currentMessage *bytes.Buffer) *bytes.Buffer {
// 11 -event type - 7 - 4 "Cont"
// 13 -message-type -7 5 "event"
// This is predefined from AMZ protocol found here:
@@ -333,7 +333,7 @@ func (csvOutput *Input) writeContinuationMessage(currentMessage *bytes.Buffer) *
// writeEndMessage is the function which constructs the binary message
// for a end message to be sent.
func (csvOutput *Input) writeEndMessage(currentMessage *bytes.Buffer) *bytes.Buffer {
func writeEndMessage(currentMessage *bytes.Buffer) *bytes.Buffer {
// 11 -event type - 7 - 3 "End"
// 13 -message-type -7 5 "event"
// This is predefined from AMZ protocol found here:
@@ -356,7 +356,7 @@ func (csvOutput *Input) writeEndMessage(currentMessage *bytes.Buffer) *bytes.Buf
// writeStateMessage is the function which constructs the binary message for a
// state message to be sent.
func (csvOutput *Input) writeStatMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
func writeStatMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
// 11 -event type - 7 - 5 "Stat" 20
// 13 -content-type -7 -8 "text/xml" 25
// 13 -message-type -7 5 "event" 22
@@ -384,7 +384,7 @@ func (csvOutput *Input) writeStatMessage(payload string, currentMessage *bytes.B
// writeProgressMessage is the function which constructs the binary message for
// a progress message to be sent.
func (csvOutput *Input) writeProgressMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
func writeProgressMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer {
// The below are the specifications of the header for a "Progress" event
// 11 -event type - 7 - 8 "Progress" 23
// 13 -content-type -7 -8 "text/xml" 25
@@ -413,7 +413,7 @@ func (csvOutput *Input) writeProgressMessage(payload string, currentMessage *byt
// writeErrorMessage is the function which constructs the binary message for a
// error message to be sent.
func (csvOutput *Input) writeErrorMessage(errorMessage error, currentMessage *bytes.Buffer) *bytes.Buffer {
func writeErrorMessage(errorMessage error, currentMessage *bytes.Buffer) *bytes.Buffer {
// The below are the specifications of the header for a "error" event
// 11 -error-code - 7 - DEFINED "DEFINED"

View File

@@ -17,10 +17,13 @@
package s3select
import (
"encoding/json"
"math"
"sort"
"strconv"
"strings"
"github.com/minio/minio/pkg/s3select/format"
"github.com/xwb1989/sqlparser"
)
@@ -33,8 +36,8 @@ type SelectFuncs struct {
// RunSqlParser allows us to easily bundle all the functions from above and run
// them in the appropriate order.
func (reader *Input) runSelectParser(selectExpression string, myRow chan *Row) {
reqCols, alias, myLimit, whereClause, aggFunctionNames, myFuncs, myErr := reader.ParseSelect(selectExpression)
func runSelectParser(f format.Select, myRow chan *Row) {
reqCols, alias, myLimit, whereClause, aggFunctionNames, myFuncs, myErr := ParseSelect(f)
if myErr != nil {
rowStruct := &Row{
err: myErr,
@@ -42,23 +45,26 @@ func (reader *Input) runSelectParser(selectExpression string, myRow chan *Row) {
myRow <- rowStruct
return
}
reader.processSelectReq(reqCols, alias, whereClause, myLimit, aggFunctionNames, myRow, myFuncs)
processSelectReq(reqCols, alias, whereClause, myLimit, aggFunctionNames, myRow, myFuncs, f)
}
// ParseSelect parses the SELECT expression, and effectively tokenizes it into
// its separate parts. It returns the requested column names,alias,limit of
// records, and the where clause.
func (reader *Input) ParseSelect(sqlInput string) ([]string, string, int64, interface{}, []string, *SelectFuncs, error) {
func ParseSelect(f format.Select) ([]string, string, int64, interface{}, []string, *SelectFuncs, error) {
// return columnNames, alias, limitOfRecords, whereclause,coalStore, nil
stmt, err := sqlparser.Parse(sqlInput)
var whereClause interface{}
var alias string
var limit int64
myFuncs := &SelectFuncs{}
stmt, err := sqlparser.Parse(cleanExpr(f.Expression()))
// TODO Maybe can parse their errors a bit to return some more of the s3 errors
if err != nil {
return nil, "", 0, nil, nil, nil, ErrLexerInvalidChar
}
var whereClause interface{}
var alias string
var limit int64
myFuncs := &SelectFuncs{}
switch stmt := stmt.(type) {
case *sqlparser.Select:
// evaluates the where clause
@@ -146,7 +152,7 @@ func (reader *Input) ParseSelect(sqlInput string) ([]string, string, int64, inte
if stmt.OrderBy != nil {
return nil, "", 0, nil, nil, nil, ErrParseUnsupportedToken
}
if err := reader.parseErrs(columnNames, whereClause, alias, myFuncs); err != nil {
if err := parseErrs(columnNames, whereClause, alias, myFuncs, f); err != nil {
return nil, "", 0, nil, nil, nil, err
}
return columnNames, alias, limit, whereClause, functionNames, myFuncs, nil
@@ -157,13 +163,13 @@ func (reader *Input) ParseSelect(sqlInput string) ([]string, string, int64, inte
// This is the main function, It goes row by row and for records which validate
// the where clause it currently prints the appropriate row given the requested
// columns.
func (reader *Input) processSelectReq(reqColNames []string, alias string, whereClause interface{}, limitOfRecords int64, functionNames []string, myRow chan *Row, myFunc *SelectFuncs) {
func processSelectReq(reqColNames []string, alias string, whereClause interface{}, limitOfRecords int64, functionNames []string, myRow chan *Row, myFunc *SelectFuncs, f format.Select) {
counter := -1
var columns []string
filtrCount := 0
functionFlag := false
// My values is used to store our aggregation values if we need to store them.
myAggVals := make([]float64, len(reqColNames))
var columns []string
// LowercasecolumnsMap is used in accordance with hasDuplicates so that we can
// raise the error "Ambigious" if a case insensitive column is provided and we
// have multiple matches.
@@ -174,23 +180,35 @@ func (reader *Input) processSelectReq(reqColNames []string, alias string, whereC
if limitOfRecords == 0 {
limitOfRecords = math.MaxInt64
}
for {
record := reader.ReadRecord()
reader.stats.BytesProcessed += processSize(record)
record, err := f.Read()
if err != nil {
rowStruct := &Row{
err: err,
}
myRow <- rowStruct
return
}
if record == nil {
if functionFlag {
rowStruct := &Row{
record: reader.aggFuncToStr(myAggVals) + "\n",
record: aggFuncToStr(myAggVals, f) + "\n",
}
myRow <- rowStruct
}
close(myRow)
return
}
if counter == -1 && reader.options.HeaderOpt && len(reader.header) > 0 {
columns = reader.Header()
out, _ := json.Marshal(record)
f.UpdateBytesProcessed(record)
if counter == -1 && f.HasHeader() && len(f.Header()) > 0 {
columns = f.Header()
myErr := checkForDuplicates(columns, columnsMap, hasDuplicates, lowercaseColumnsMap)
if format.IsInt(reqColNames[0]) {
myErr = ErrMissingHeaders
}
if myErr != nil {
rowStruct := &Row{
err: myErr,
@@ -198,17 +216,21 @@ func (reader *Input) processSelectReq(reqColNames []string, alias string, whereC
myRow <- rowStruct
return
}
} else if counter == -1 && len(reader.header) > 0 {
columns = reader.Header()
} else if counter == -1 && len(f.Header()) > 0 {
columns = f.Header()
for i := 0; i < len(columns); i++ {
columnsMap["_"+strconv.Itoa(i)] = i
}
}
// When we have reached our limit, on what the user specified as the number
// of rows they wanted, we terminate our interpreter.
// Return in case the number of record reaches the LIMIT defined in select query
if int64(filtrCount) == limitOfRecords && limitOfRecords != 0 {
close(myRow)
return
}
// The call to the where function clause,ensures that the rows we print match our where clause.
condition, myErr := matchesMyWhereClause(record, columnsMap, alias, whereClause)
condition, myErr := matchesMyWhereClause(record, alias, whereClause)
if myErr != nil {
rowStruct := &Row{
err: myErr,
@@ -219,25 +241,33 @@ func (reader *Input) processSelectReq(reqColNames []string, alias string, whereC
if condition {
// if its an asterix we just print everything in the row
if reqColNames[0] == "*" && functionNames[0] == "" {
rowStruct := &Row{
record: reader.printAsterix(record) + "\n",
var row *Row
switch f.Type() {
case format.CSV:
row = &Row{
record: strings.Join(convertToSlice(columnsMap, record, string(out)), f.OutputFieldDelimiter()) + "\n",
}
case format.JSON:
row = &Row{
record: string(out) + "\n",
}
}
myRow <- rowStruct
myRow <- row
} else if alias != "" {
// This is for dealing with the case of if we have to deal with a
// request for a column with an index e.g A_1.
if representsInt(reqColNames[0]) {
if format.IsInt(reqColNames[0]) {
// This checks whether any aggregation function was called as now we
// no longer will go through printing each row, and only print at the
// end
// no longer will go through printing each row, and only print at the end
if len(functionNames) > 0 && functionNames[0] != "" {
functionFlag = true
aggregationFunctions(counter, filtrCount, myAggVals, columnsMap, reqColNames, functionNames, record)
aggregationFunctions(counter, filtrCount, myAggVals, reqColNames, functionNames, string(out))
} else {
// The code below finds the appropriate columns of the row given the
// indicies provided in the SQL request and utilizes the map to
// retrieve the correct part of the row.
myQueryRow, myErr := reader.processColNameIndex(record, reqColNames, columns)
myQueryRow, myErr := processColNameIndex(string(out), reqColNames, columns, f)
if myErr != nil {
rowStruct := &Row{
err: myErr,
@@ -255,12 +285,12 @@ func (reader *Input) processSelectReq(reqColNames []string, alias string, whereC
// form of acutal names rather an indices.
if len(functionNames) > 0 && functionNames[0] != "" {
functionFlag = true
aggregationFunctions(counter, filtrCount, myAggVals, columnsMap, reqColNames, functionNames, record)
aggregationFunctions(counter, filtrCount, myAggVals, reqColNames, functionNames, string(out))
} else {
// This code prints the appropriate part of the row given the filter
// and select request, if the select request was based on column
// names rather than indices.
myQueryRow, myErr := reader.processColNameLiteral(record, reqColNames, columns, columnsMap, myFunc)
myQueryRow, myErr := processColNameLiteral(string(out), reqColNames, myFunc, f)
if myErr != nil {
rowStruct := &Row{
err: myErr,
@@ -281,75 +311,73 @@ func (reader *Input) processSelectReq(reqColNames []string, alias string, whereC
}
}
// printAsterix helps to print out the entire row if an asterix is used.
func (reader *Input) printAsterix(record []string) string {
return strings.Join(record, reader.options.OutputFieldDelimiter)
}
// processColumnNames is a function which allows for cleaning of column names.
func (reader *Input) processColumnNames(reqColNames []string, alias string) error {
for i := 0; i < len(reqColNames); i++ {
// The code below basically cleans the column name of its alias and other
// syntax, so that we can extract its pure name.
reqColNames[i] = cleanCol(reqColNames[i], alias)
func processColumnNames(reqColNames []string, alias string, f format.Select) error {
switch f.Type() {
case format.CSV:
for i := 0; i < len(reqColNames); i++ {
// The code below basically cleans the column name of its alias and other
// syntax, so that we can extract its pure name.
reqColNames[i] = cleanCol(reqColNames[i], alias)
}
case format.JSON:
// JSON doesnt have columns so no cleaning required
}
return nil
}
// processColNameIndex is the function which creates the row for an index based
// query.
func (reader *Input) processColNameIndex(record []string, reqColNames []string, columns []string) (string, error) {
func processColNameIndex(record string, reqColNames []string, columns []string, f format.Select) (string, error) {
row := make([]string, len(reqColNames))
for i := 0; i < len(reqColNames); i++ {
// COALESCE AND NULLIF do not support index based access.
if reqColNames[0] == "0" {
return "", ErrInvalidColumnIndex
return "", format.ErrInvalidColumnIndex
}
// Subtract 1 because AWS Indexing is not 0 based, it starts at 1.
mytempindex, err := strconv.Atoi(reqColNames[i])
if mytempindex > len(columns) {
return "", format.ErrInvalidColumnIndex
}
if err != nil {
return "", ErrMissingHeaders
}
mytempindex = mytempindex - 1
if mytempindex > len(columns) {
return "", ErrInvalidColumnIndex
}
row[i] = record[mytempindex]
// Subtract 1 because AWS Indexing is not 0 based, it starts at 1 generating the key like "_1".
row[i] = jsonValue(string("_"+strconv.Itoa(mytempindex-1)), record)
}
rowStr := strings.Join(row, reader.options.OutputFieldDelimiter)
if len(rowStr) > 1000000 {
rowStr := strings.Join(row, f.OutputFieldDelimiter())
if len(rowStr) > MaxCharsPerRecord {
return "", ErrOverMaxRecordSize
}
return rowStr, nil
}
// processColNameLiteral is the function which creates the row for an name based
// query.
func (reader *Input) processColNameLiteral(record []string, reqColNames []string, columns []string, columnsMap map[string]int, myFunc *SelectFuncs) (string, error) {
func processColNameLiteral(record string, reqColNames []string, myFunc *SelectFuncs, f format.Select) (string, error) {
row := make([]string, len(reqColNames))
for i := 0; i < len(reqColNames); i++ {
// this is the case to deal with COALESCE.
if reqColNames[i] == "" && isValidFunc(myFunc.index, i) {
row[i] = evaluateFuncExpr(myFunc.funcExpr[i], "", record, columnsMap)
row[i] = evaluateFuncExpr(myFunc.funcExpr[i], "", record)
continue
}
myTempIndex, notFound := columnsMap[trimQuotes(reqColNames[i])]
if !notFound {
return "", ErrMissingHeaders
}
row[i] = record[myTempIndex]
row[i] = jsonValue(reqColNames[i], record)
}
rowStr := strings.Join(row, reader.options.OutputFieldDelimiter)
if len(rowStr) > 1000000 {
rowStr := strings.Join(row, f.OutputFieldDelimiter())
if len(rowStr) > MaxCharsPerRecord {
return "", ErrOverMaxRecordSize
}
return rowStr, nil
}
// aggregationFunctions performs the actual aggregation methods on the
// given row, it uses an array defined for the main parsing function
// to keep track of values.
func aggregationFunctions(counter int, filtrCount int, myAggVals []float64, columnsMap map[string]int, storeReqCols []string, storeFunctions []string, record []string) error {
// aggregationFunctions is a function which performs the actual aggregation
// methods on the given row, it uses an array defined in the main parsing
// function to keep track of values.
func aggregationFunctions(counter int, filtrCount int, myAggVals []float64, storeReqCols []string, storeFunctions []string, record string) error {
for i := 0; i < len(storeFunctions); i++ {
if storeFunctions[i] == "" {
i++
@@ -358,15 +386,13 @@ func aggregationFunctions(counter int, filtrCount int, myAggVals []float64, colu
} else {
// If column names are provided as an index it'll use this if statement instead of the else/
var convAggFloat float64
if representsInt(storeReqCols[i]) {
colIndex, _ := strconv.Atoi(storeReqCols[i])
// colIndex is 1-based
convAggFloat, _ = strconv.ParseFloat(record[colIndex-1], 64)
if format.IsInt(storeReqCols[i]) {
myIndex, _ := strconv.Atoi(storeReqCols[i])
convAggFloat, _ = strconv.ParseFloat(jsonValue(string("_"+strconv.Itoa(myIndex)), record), 64)
} else {
// case that the columns are in the form of named columns rather than indices.
convAggFloat, _ = strconv.ParseFloat(record[columnsMap[trimQuotes(storeReqCols[i])]], 64)
convAggFloat, _ = strconv.ParseFloat(jsonValue(storeReqCols[i], record), 64)
}
// This if statement is for calculating the min.
if storeFunctions[i] == "min" {
@@ -404,3 +430,25 @@ func aggregationFunctions(counter int, filtrCount int, myAggVals []float64, colu
}
return nil
}
// convertToSlice takes the map[string]interface{} and convert it to []string
func convertToSlice(columnsMap map[string]int, record map[string]interface{}, marshalledRecord string) []string {
var result []string
type kv struct {
Key string
Value int
}
var ss []kv
for k, v := range columnsMap {
ss = append(ss, kv{k, v})
}
sort.Slice(ss, func(i, j int) bool {
return ss[i].Value < ss[j].Value
})
for _, kv := range ss {
if _, ok := record[kv.Key]; ok {
result = append(result, jsonValue(kv.Key, marshalledRecord))
}
}
return result
}

View File

@@ -17,10 +17,11 @@
package s3select
import (
"bytes"
"fmt"
"reflect"
"testing"
"github.com/minio/minio/pkg/s3select/format"
)
// Unit Test for the checkForDuplicates function.
@@ -35,6 +36,7 @@ func TestCheckForDuplicates(t *testing.T) {
{[]string{"name", "id", "last_name", "last_name"}, make(map[string]int), make(map[string]bool), make(map[string]int), ErrAmbiguousFieldName},
{[]string{"name", "id", "last_name", "another_name"}, make(map[string]int), make(map[string]bool), make(map[string]int), nil},
}
for _, table := range tables {
err := checkForDuplicates(table.myReq, table.myHeaders, table.myDup, table.myLow)
if err != table.myErr {
@@ -43,106 +45,14 @@ func TestCheckForDuplicates(t *testing.T) {
}
}
// Test for the function which processes columnnames to make sure that they are
// compatible with spaces.
func TestMyProcessing(t *testing.T) {
options := &Options{
HasHeader: false,
RecordDelimiter: "\n",
FieldDelimiter: ",",
Comments: "",
Name: "S3Object", // Default table name for all objects
ReadFrom: bytes.NewReader([]byte("Here , is, a, string + \n + random,random,stuff,stuff ")),
Compressed: "",
Expression: "",
OutputFieldDelimiter: ",",
StreamSize: 20,
}
s3s, err := NewInput(options)
if err != nil {
t.Error(err)
}
tables := []struct {
myReq []string
myHeaders map[string]int
myDup map[string]bool
myLow map[string]int
myOpts *Options
input *Input
length int
testOutput string
myErr error
}{
{[]string{"name", "id", "last_name", "CAST"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, 4, "CAST", nil},
{[]string{"name", "id", "last_name", "another_name"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, 4, "another_name", nil},
{[]string{"name", "id", "last_name", "another_name"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, 4, "another_name", nil},
{[]string{"name", "id", "random_name", "fame_name", "another_col"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, 5, "fame_name", nil},
}
for _, table := range tables {
err = checkForDuplicates(table.myReq, table.myHeaders, table.myDup, table.myLow)
if err != table.myErr {
t.Error()
}
if len(table.myReq) != table.length {
t.Errorf("UnexpectedError")
}
if table.myReq[3] != table.testOutput {
t.Error()
}
}
}
// TestMyRowIndexResults is a unit test which makes sure that the rows that are
// being printed are appropriate to the query being requested.
func TestMyRowIndexResults(t *testing.T) {
options := &Options{
HasHeader: false,
RecordDelimiter: "\n",
FieldDelimiter: ",",
Comments: "",
Name: "S3Object", // Default table name for all objects
ReadFrom: bytes.NewReader([]byte("Here , is, a, string + \n + random,random,stuff,stuff ")),
Compressed: "",
Expression: "",
OutputFieldDelimiter: ",",
StreamSize: 20,
}
s3s, err := NewInput(options)
if err != nil {
t.Error(err)
}
tables := []struct {
myReq []string
myHeaders map[string]int
myDup map[string]bool
myLow map[string]int
myOpts *Options
input *Input
myRecord []string
myTarget string
myAsterix string
columns []string
err error
}{
{[]string{"1", "2"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, []string{"target", "random", "hello", "stuff"}, "target,random", "target,random,hello,stuff", []string{"1", "2", "3", "4"}, nil},
{[]string{"2", "3", "4"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, []string{"random", "hullo", "thing", "stuff"}, "hullo,thing,stuff", "random,hullo,thing,stuff", []string{"1", "2", "3", "4"}, nil},
{[]string{"3", "2"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, []string{"random", "hullo", "thing", "stuff"}, "thing,hullo", "random,hullo,thing,stuff", []string{"1", "2", "3", "4"}, nil},
{[]string{"11", "1"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, s3s, []string{"random", "hullo", "thing", "stuff"}, "", "random,hullo,thing,stuff", []string{"1", "2", "3", "4"}, ErrInvalidColumnIndex},
}
for _, table := range tables {
checkForDuplicates(table.columns, table.myHeaders, table.myDup, table.myLow)
myRow, err := s3s.processColNameIndex(table.myRecord, table.myReq, table.columns)
if err != table.err {
t.Error()
}
if myRow != table.myTarget {
t.Error()
}
myRow = table.input.printAsterix(table.myRecord)
if myRow != table.myAsterix {
t.Error()
// This function returns the index of a string in a list
func stringIndex(a string, list []string) int {
for i, v := range list {
if v == a {
return i
}
}
return -1
}
// TestMyHelperFunctions is a unit test which tests some small helper string
@@ -159,7 +69,7 @@ func TestMyHelperFunctions(t *testing.T) {
{"test3", []string{"test1", "test2", "test3", "test4", "test5"}, 2, true},
}
for _, table := range tables {
if stringInSlice(table.myReq, table.myList) != table.expected {
if format.StringInSlice(table.myReq, table.myList) != table.expected {
t.Error()
}
if stringIndex(table.myReq, table.myList) != table.myIndex {
@@ -233,82 +143,6 @@ func TestMyConversion(t *testing.T) {
}
}
// Unit Tests for Parser.
func TestMyParser(t *testing.T) {
tables := []struct {
myQuery string
err error
reqCols []string
alias string
myLimit int
aggFuncs []string
header []string
}{
{"SELECT * FROM S3OBJECT", nil, []string{"*"}, "S3OBJECT", 0, make([]string, 1), []string{"name1", "name2", "name3", "name4"}},
{"SELECT * FROM S3OBJECT AS A", nil, []string{"*"}, "A", 0, make([]string, 1), []string{"name1", "name2", "name3", "name4"}},
{"SELECT col_name FROM S3OBJECT AS A", nil, []string{"col_name"}, "A", 0, make([]string, 1), []string{"col_name", "name2", "name3", "name4"}},
{"SELECT col_name,col_other FROM S3OBJECT AS A LIMIT 5", nil, []string{"col_name", "col_other"}, "A", 5, make([]string, 2), []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT col_name,col_other FROM S3OBJECT AS A WHERE col_name = 'Name' LIMIT 5", nil, []string{"col_name", "col_other"}, "A", 5, make([]string, 2), []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT col_name,col_other FROM S3OBJECT AS A WHERE col_name = 'Name LIMIT 5", ErrLexerInvalidChar, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT count(*) FROM S3OBJECT AS A WHERE col_name = 'Name' LIMIT 5", nil, []string{"*"}, "A", 5, []string{"count"}, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT sum(col_name),sum(col_other) FROM S3OBJECT AS A WHERE col_name = 'Name' LIMIT 5", nil, []string{"col_name", "col_other"}, "A", 5, []string{"sum", "sum"}, []string{"col_name", "col_other"}},
{"SELECT A.col_name FROM S3OBJECT AS A", nil, []string{"col_name"}, "A", 0, make([]string, 1), []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT A.`col name` FROM S3OBJECT AS A", nil, []string{"col_name"}, "A", 0, make([]string, 1), []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT A._col_name FROM S3OBJECT AS A", nil, []string{"col_name"}, "A", 0, make([]string, 1), []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT A._col_name FROM S3OBJECT AS A WHERE randomname > 5", ErrMissingHeaders, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT A._col_name FROM S3OBJECT AS A WHERE A._11 > 5", ErrInvalidColumnIndex, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT COALESCE(col_name,col_other) FROM S3OBJECT AS A WHERE A._3 > 5", nil, []string{""}, "A", 0, []string{""}, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT COALESCE(col_name,col_other),COALESCE(col_name,col_other) FROM S3OBJECT AS A WHERE A._3 > 5", nil, []string{"", ""}, "A", 0, []string{"", ""}, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT COALESCE(col_name,col_other) ,col_name , COALESCE(col_name,col_other) FROM S3OBJECT AS A WHERE col_name > 5", nil, []string{"", "col_name", ""}, "A", 0, []string{"", "", ""}, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT NULLIF(col_name,col_other) ,col_name , COALESCE(col_name,col_other) FROM S3OBJECT AS A WHERE col_name > 5", nil, []string{"", "col_name", ""}, "A", 0, []string{"", "", ""}, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT NULLIF(col_name,col_other) FROM S3OBJECT AS A WHERE col_name > 5", nil, []string{""}, "A", 0, []string{""}, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT NULLIF(randomname,col_other) FROM S3OBJECT AS A WHERE col_name > 5", ErrMissingHeaders, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT col_name FROM S3OBJECT AS A WHERE COALESCE(random,5) > 5", ErrMissingHeaders, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT col_name FROM S3OBJECT AS A WHERE NULLIF(random,5) > 5", ErrMissingHeaders, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT col_name FROM S3OBJECT AS A WHERE LOWER(col_name) BETWEEN 5 AND 7", nil, []string{"col_name"}, "A", 0, []string{""}, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT UPPER(col_name) FROM S3OBJECT AS A WHERE LOWER(col_name) BETWEEN 5 AND 7", nil, []string{""}, "A", 0, []string{""}, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT UPPER(*) FROM S3OBJECT AS A WHERE LOWER(col_name) BETWEEN 5 AND 7", ErrParseUnsupportedCallWithStar, nil, "", 0, nil, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT NULLIF(col_name,col_name) FROM S3OBJECT AS A WHERE NULLIF(LOWER(col_name),col_name) BETWEEN 5 AND 7", nil, []string{""}, "A", 0, []string{""}, []string{"col_name", "col_other", "name3", "name4"}},
{"SELECT COALESCE(col_name,col_name) FROM S3OBJECT AS A WHERE NULLIF(LOWER(col_name),col_name) BETWEEN 5 AND 7", nil, []string{""}, "A", 0, []string{""}, []string{"col_name", "col_other", "name3", "name4"}},
}
for _, table := range tables {
options := &Options{
HasHeader: false,
RecordDelimiter: "\n",
FieldDelimiter: ",",
Comments: "",
Name: "S3Object", // Default table name for all objects
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
Compressed: "",
Expression: "",
OutputFieldDelimiter: ",",
StreamSize: 20,
HeaderOpt: true,
}
s3s, err := NewInput(options)
if err != nil {
t.Error(err)
}
s3s.header = table.header
reqCols, alias, myLimit, _, aggFunctionNames, _, err := s3s.ParseSelect(table.myQuery)
if table.err != err {
t.Error()
}
if !reflect.DeepEqual(reqCols, table.reqCols) {
t.Error()
}
if alias != table.alias {
t.Error()
}
if myLimit != int64(table.myLimit) {
t.Error()
}
if !reflect.DeepEqual(table.aggFuncs, aggFunctionNames) {
t.Error()
}
}
}
// Unit tests for the main function that performs aggreggation.
func TestMyAggregationFunc(t *testing.T) {
columnsMap := make(map[string]int)
@@ -321,21 +155,22 @@ func TestMyAggregationFunc(t *testing.T) {
columnsMap map[string]int
storeReqCols []string
storeFunctions []string
record []string
record string
err error
expectedVal float64
}{
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"count"}, []string{"1", "2"}, nil, 11},
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"min"}, []string{"1", "2"}, nil, 1},
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"max"}, []string{"1", "2"}, nil, 10},
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"sum"}, []string{"1", "2"}, nil, 11},
{1, 1, []float64{10}, columnsMap, []string{"Col1"}, []string{"avg"}, []string{"1", "2"}, nil, 5.500},
{10, 5, []float64{0.000}, columnsMap, []string{"Col1"}, []string{"random"}, []string{"1", "2"}, ErrParseNonUnaryAgregateFunctionCall, 0},
{0, 5, []float64{0}, columnsMap, []string{"0"}, []string{"count"}, []string{"1", "2"}, nil, 1},
{10, 5, []float64{10}, columnsMap, []string{"1"}, []string{"min"}, []string{"1", "12"}, nil, 1},
{10, 5, []float64{10, 11, 12, 13, 14}, columnsMap, []string{"Col1"}, []string{"count"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 11},
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"min"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 1},
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"max"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 10},
{10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"sum"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 11},
{1, 1, []float64{10}, columnsMap, []string{"Col1"}, []string{"avg"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 5.500},
{10, 5, []float64{0.0000}, columnsMap, []string{"Col1"}, []string{"random"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", ErrParseNonUnaryAgregateFunctionCall, 0},
{0, 5, []float64{0}, columnsMap, []string{"0"}, []string{"count"}, "{\"Col1\":\"1\",\"Col2\":\"2\"}", nil, 1},
{10, 5, []float64{10}, columnsMap, []string{"1"}, []string{"min"}, "{\"_1\":\"1\",\"_2\":\"2\"}", nil, 1},
}
for _, table := range tables {
err := aggregationFunctions(table.counter, table.filtrCount, table.myAggVals, table.columnsMap, table.storeReqCols, table.storeFunctions, table.record)
err := aggregationFunctions(table.counter, table.filtrCount, table.myAggVals, table.storeReqCols, table.storeFunctions, table.record)
if table.err != err {
t.Error()
}
@@ -346,156 +181,6 @@ func TestMyAggregationFunc(t *testing.T) {
}
}
// Unit Tests for the function which converts a float array to string.
func TestToStringAgg(t *testing.T) {
options := &Options{
HasHeader: false,
RecordDelimiter: "\n",
FieldDelimiter: ",",
Comments: "",
Name: "S3Object", // Default table name for all objects
ReadFrom: bytes.NewReader([]byte("Here , is, a, string + \n + random,random,stuff,stuff ")),
Compressed: "",
Expression: "",
OutputFieldDelimiter: ",",
StreamSize: 20,
HeaderOpt: true,
}
s3s, err := NewInput(options)
if err != nil {
t.Error(err)
}
tables := []struct {
myAggVal []float64
expected string
}{
{[]float64{10, 11, 12, 13, 14}, "10,11,12,13,14"},
{[]float64{10, 11.3, 12, 13, 14}, "10,11.300000,12,13,14"},
{[]float64{10.235, 11.3, 12, 13, 14}, "10.235000,11.300000,12,13,14"},
{[]float64{10.235, 11.3, 12.123, 13.456, 14.789}, "10.235000,11.300000,12.123000,13.456000,14.789000"},
{[]float64{10}, "10"},
}
for _, table := range tables {
val := s3s.aggFuncToStr(table.myAggVal)
if val != table.expected {
t.Error()
}
}
}
// TestMyRowColLiteralResults is a unit test which makes sure that the rows that
// are being printed are appropriate to the query being requested.
func TestMyRowColLiteralResults(t *testing.T) {
options := &Options{
HasHeader: false,
RecordDelimiter: "\n",
FieldDelimiter: ",",
Comments: "",
Name: "S3Object", // Default table name for all objects
ReadFrom: bytes.NewReader([]byte("Here , is, a, string + \n + random,random,stuff,stuff ")),
Compressed: "",
Expression: "",
OutputFieldDelimiter: ",",
StreamSize: 20,
HeaderOpt: true,
}
s3s, err := NewInput(options)
if err != nil {
t.Error(err)
}
tables := []struct {
myReq []string
myHeaders map[string]int
myDup map[string]bool
myLow map[string]int
myOpts *Options
tempList []string
input *Input
myRecord []string
myTarget string
columns []string
err error
}{
{[]string{"draft", "year"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, []string{"draft", "year"}, s3s, []string{"target", "random", "hello", "stuff"}, "target,random", []string{"draft", "year", "random", "another"}, nil},
{[]string{"year", "draft"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, []string{"year", "draft"}, s3s, []string{"draft", "2012", "thing", "stuff"}, "2012,draft", []string{"draft", "year", "random", "another"}, nil},
{[]string{"yearrandomstuff", "draft"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, []string{"yearrandomstuff", "draft"}, s3s, []string{"draft", "2012", "thing", "stuff"}, "", []string{"draft", "year", "random", "another"}, ErrMissingHeaders},
{[]string{"draft", "randomstuff"}, make(map[string]int), make(map[string]bool), make(map[string]int), options, []string{"yearrandomstuff", "draft"}, s3s, []string{"draft", "2012", "thing", "stuff"}, "", []string{"draft", "year", "random", "another"}, ErrMissingHeaders},
}
for _, table := range tables {
checkForDuplicates(table.columns, table.myHeaders, table.myDup, table.myLow)
myRow, err := table.input.processColNameLiteral(table.myRecord, table.myReq, table.tempList, table.myHeaders, nil)
if err != table.err {
t.Error()
}
if myRow != table.myTarget {
t.Error()
}
}
}
// TestMyWhereEval is a function which provides unit tests for the function
// which evaluates the where clause.
func TestMyWhereEval(t *testing.T) {
columnsMap := make(map[string]int)
columnsMap["Col1"] = 0
columnsMap["Col2"] = 1
tables := []struct {
myQuery string
record []string
err error
expected bool
header []string
}{
{"SELECT * FROM S3OBJECT", []string{"record_1,record_2,record_3,record_4"}, nil, true, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT WHERE Col1 < -1", []string{"0", "1"}, nil, false, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT WHERE Col1 < -1 OR Col2 > 15", []string{"151", "12"}, nil, false, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT WHERE Col1 > -1 AND Col2 > 15", []string{"151", "12"}, nil, false, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT WHERE Col1 > 1.00", []string{"151.0000", "12"}, nil, true, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT WHERE Col1 > 100", []string{"random", "12"}, nil, false, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT WHERE Col1 BETWEEN 100 AND 0", []string{"151", "12"}, nil, false, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT WHERE Col1 BETWEEN 100.0 AND 0.0", []string{"151", "12"}, nil, false, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT AS A WHERE A.1 BETWEEN 160 AND 150", []string{"151", "12"}, nil, true, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT AS A WHERE A._1 BETWEEN 160 AND 0", []string{"151", "12"}, nil, true, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT AS A WHERE A._1 BETWEEN 0 AND 160", []string{"151", "12"}, nil, true, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT A._1 LIKE 'r%'", []string{"record_1,record_2,record_3,record_4"}, nil, true, []string{"Col1", "Col2"}},
{"SELECT s._2 FROM S3Object s WHERE s._2 = 'Steven'", []string{"record_1", "Steven", "Steven", "record_4"}, nil, true, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT AS A WHERE Col1 BETWEEN 0 AND 160", []string{"151", "12"}, nil, true, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT AS A WHERE Col1 BETWEEN 160 AND 0", []string{"151", "12"}, nil, true, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT AS A WHERE UPPER(Col1) BETWEEN 160 AND 0", []string{"151", "12"}, nil, true, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT AS A WHERE UPPER(Col1) = 'RANDOM'", []string{"random", "12"}, nil, true, []string{"Col1", "Col2"}},
{"SELECT * FROM S3OBJECT AS A WHERE LOWER(UPPER(Col1) = 'random'", []string{"random", "12"}, nil, true, []string{"Col1", "Col2"}},
}
for _, table := range tables {
options := &Options{
HasHeader: false,
RecordDelimiter: "\n",
FieldDelimiter: ",",
Comments: "",
Name: "S3Object", // Default table name for all objects
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
Compressed: "",
Expression: "",
OutputFieldDelimiter: ",",
StreamSize: 20,
HeaderOpt: true,
}
s3s, err := NewInput(options)
s3s.header = table.header
if err != nil {
t.Error(err)
}
_, alias, _, whereClause, _, _, _ := s3s.ParseSelect(table.myQuery)
myVal, err := matchesMyWhereClause(table.record, columnsMap, alias, whereClause)
if table.err != err {
t.Error()
}
if myVal != table.expected {
t.Error()
}
}
}
// TestMyStringComparator is a unit test which ensures that the appropriate
// values are being compared for strings.
func TestMyStringComparator(t *testing.T) {
@@ -594,231 +279,13 @@ func TestMySizeFunction(t *testing.T) {
{[]string{"test1", "test2", "test3", "test4", "test5"}, 30},
}
for _, table := range tables {
if processSize(table.myRecord) != table.expected {
if format.ProcessSize(table.myRecord) != table.expected {
t.Error()
}
}
}
// TestInterpreter is a function which provides unit testing for the main
// interpreter function.
func TestInterpreter(t *testing.T) {
tables := []struct {
myQuery string
myChan chan *Row
err error
header []string
}{
{"Select random from S3OBJECT", make(chan *Row), ErrMissingHeaders, []string{"name1", "name2", "name3", "name4"}},
{"Select * from S3OBJECT as A WHERE name2 > 5.00", make(chan *Row), nil, []string{"name1", "name2", "name3", "name4"}},
{"Select * from S3OBJECT", make(chan *Row), nil, []string{"name1", "name2", "name3", "name4"}},
{"Select A_1 from S3OBJECT as A", make(chan *Row), nil, []string{"1", "2", "3", "4"}},
{"Select count(*) from S3OBJECT", make(chan *Row), nil, []string{"name1", "name2", "name3", "name4"}},
{"Select * from S3OBJECT WHERE name1 > 5.00", make(chan *Row), nil, []string{"name1", "name2", "name3", "name4"}},
}
for _, table := range tables {
options := &Options{
HasHeader: false,
RecordDelimiter: "\n",
FieldDelimiter: ",",
Comments: "",
Name: "S3Object", // Default table name for all objects
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
Compressed: "",
Expression: "",
OutputFieldDelimiter: ",",
StreamSize: 20,
HeaderOpt: true,
}
s3s, err := NewInput(options)
if err != nil {
t.Error(err)
}
s3s.header = table.header
reqCols, alias, myLimit, whereClause, aggFunctionNames, _, err := s3s.ParseSelect(table.myQuery)
if err != table.err {
t.Fatal()
}
if err == nil {
go s3s.processSelectReq(reqCols, alias, whereClause, myLimit, aggFunctionNames, table.myChan, nil)
select {
case row, ok := <-table.myChan:
if ok && len(row.record) > 0 {
} else if ok && row.err != nil {
if row.err != table.err {
t.Error()
}
close(table.myChan)
} else if !ok {
}
}
}
}
}
// TestMyXMLFunction is a function that provides unit testing for the XML
// creating function.
func TestMyXMLFunction(t *testing.T) {
options := &Options{
HasHeader: false,
RecordDelimiter: "\n",
FieldDelimiter: ",",
Comments: "",
Name: "S3Object", // Default table name for all objects
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
Compressed: "",
Expression: "",
OutputFieldDelimiter: ",",
StreamSize: 20,
HeaderOpt: true,
}
s3s, err := NewInput(options)
if err != nil {
t.Error(err)
}
tables := []struct {
expectedStat int
expectedProgress int
}{
{150, 156},
}
for _, table := range tables {
myVal, _ := s3s.createStatXML()
myOtherVal, _ := s3s.createProgressXML()
if len(myVal) != table.expectedStat {
t.Error()
}
if len(myOtherVal) != table.expectedProgress {
fmt.Println(len(myOtherVal))
t.Error()
}
}
}
// TestMyProtocolFunction is a function which provides unit testing for several
// of the functions which write the binary protocol.
func TestMyProtocolFunction(t *testing.T) {
options := &Options{
HasHeader: false,
RecordDelimiter: "\n",
FieldDelimiter: ",",
Comments: "",
Name: "S3Object", // Default table name for all objects
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
Compressed: "",
Expression: "",
OutputFieldDelimiter: ",",
StreamSize: 20,
HeaderOpt: true,
}
s3s, err := NewInput(options)
if err != nil {
t.Error(err)
}
tables := []struct {
payloadMsg string
expectedRecord int
expectedEnd int
}{
{"random payload", 115, 56},
}
for _, table := range tables {
var currentMessage = &bytes.Buffer{}
if len(s3s.writeRecordMessage(table.payloadMsg, currentMessage).Bytes()) != table.expectedRecord {
t.Error()
}
currentMessage.Reset()
if len(s3s.writeEndMessage(currentMessage).Bytes()) != table.expectedEnd {
t.Error()
}
currentMessage.Reset()
if len(s3s.writeContinuationMessage(currentMessage).Bytes()) != 57 {
t.Error()
}
currentMessage.Reset()
}
}
// TestMyInfoProtocolFunctions is a function which provides unit testing for the
// stat and progress messages of the protocols.
func TestMyInfoProtocolFunctions(t *testing.T) {
options := &Options{
HasHeader: true,
RecordDelimiter: "\n",
FieldDelimiter: ",",
Comments: "",
Name: "S3Object", // Default table name for all objects
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
Compressed: "",
Expression: "",
OutputFieldDelimiter: ",",
StreamSize: 20,
}
s3s, err := NewInput(options)
if err != nil {
t.Error(err)
}
myVal, _ := s3s.createStatXML()
myOtherVal, _ := s3s.createProgressXML()
tables := []struct {
payloadStatMsg string
payloadProgressMsg string
expectedStat int
expectedProgress int
}{
{myVal, myOtherVal, 233, 243},
}
for _, table := range tables {
var currBuf = &bytes.Buffer{}
if len(s3s.writeStatMessage(table.payloadStatMsg, currBuf).Bytes()) != table.expectedStat {
t.Error()
}
currBuf.Reset()
if len(s3s.writeProgressMessage(table.payloadProgressMsg, currBuf).Bytes()) != table.expectedProgress {
t.Error()
}
}
}
// TestMyErrorProtocolFunctions is a function which provides unit testing for
// the error message type of protocol.
func TestMyErrorProtocolFunctions(t *testing.T) {
options := &Options{
HasHeader: false,
RecordDelimiter: "\n",
FieldDelimiter: ",",
Comments: "",
Name: "S3Object", // Default table name for all objects
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
Compressed: "",
Expression: "",
OutputFieldDelimiter: ",",
StreamSize: 20,
HeaderOpt: true,
}
s3s, err := NewInput(options)
if err != nil {
t.Error(err)
}
tables := []struct {
err error
expectedError int
}{
{ErrInvalidCast, 248},
{ErrTruncatedInput, 200},
{ErrUnsupportedSyntax, 114},
{ErrCSVParsingError, 157},
}
for _, table := range tables {
var currentMessage = &bytes.Buffer{}
if len(s3s.writeErrorMessage(table.err, currentMessage).Bytes()) != table.expectedError {
t.Error()
}
}
}
func TestMatch(t *testing.T) {
testCases := []struct {
pattern string
@@ -1004,51 +471,6 @@ func TestMatch(t *testing.T) {
}
}
// TestMyValids is a unit test which ensures that the appropriate values are
// being returned from the isValid... functions.
func TestMyValids(t *testing.T) {
tables := []struct {
myQuery string
indexList []int
myIndex int
myValIndex bool
header []string
err error
}{
{"SELECT UPPER(NULLIF(draft_year,random_name))", []int{3, 5, 6, 7, 8, 9}, 3, true, []string{"draft_year", "random_name"}, nil},
{"SELECT UPPER(NULLIF(draft_year,xandom_name))", []int{3, 5, 6, 7, 8, 9}, 3, true, []string{"draft_year", "random_name"}, ErrMissingHeaders},
}
for _, table := range tables {
options := &Options{
HasHeader: false,
RecordDelimiter: "\n",
FieldDelimiter: ",",
Comments: "",
Name: "S3Object", // Default table name for all objects
ReadFrom: bytes.NewReader([]byte("name1,name2,name3,name4" + "\n" + "5,is,a,string" + "\n" + "random,random,stuff,stuff")),
Compressed: "",
Expression: "",
OutputFieldDelimiter: ",",
StreamSize: 20,
HeaderOpt: true,
}
s3s, err := NewInput(options)
if err != nil {
t.Error(err)
}
s3s.header = table.header
_, _, _, _, _, _, err = s3s.ParseSelect(table.myQuery)
if err != table.err {
t.Fatal()
}
myVal := isValidFunc(table.indexList, table.myIndex)
if myVal != table.myValIndex {
t.Error()
}
}
}
// TestMyFuncProcessing is a unit test which ensures that the appropriate values are
// being returned from the Processing... functions.
func TestMyFuncProcessing(t *testing.T) {