minio/pkg/s3select/sql/value.go
Klaus Post c9b8bd8de2 S3 Select: optimize output (#8238)
Queue output items and reuse them.
Remove the unneeded type system in sql and just use the Go type system.

In best case this is more than an order of magnitude speedup:

```
BenchmarkSelectAll_1M-12    	       1	1841049400 ns/op	274299728 B/op	 4198522 allocs/op
BenchmarkSelectAll_1M-12    	      14	  84833400 ns/op	169228346 B/op	 3146541 allocs/op
```
2019-09-17 05:56:27 +05:30

807 lines
17 KiB
Go

/*
* MinIO Cloud Storage, (C) 2019 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package sql
import (
"errors"
"fmt"
"math"
"reflect"
"strconv"
"strings"
"time"
)
var (
errArithMismatchedTypes = errors.New("cannot perform arithmetic on mismatched types")
errArithInvalidOperator = errors.New("invalid arithmetic operator")
errArithDivideByZero = errors.New("cannot divide by 0")
errCmpMismatchedTypes = errors.New("cannot compare values of different types")
errCmpInvalidBoolOperator = errors.New("invalid comparison operator for boolean arguments")
)
// Value represents a value of restricted type reduced from an
// expression represented by an ASTNode. Only one of the fields is
// non-nil.
//
// In cases where we are fetching data from a data source (like csv),
// the type may not be determined yet. In these cases, a byte-slice is
// used.
type Value struct {
value interface{}
}
// GetTypeString returns a string representation for vType
func (v Value) GetTypeString() string {
switch v.value.(type) {
case nil:
return "NULL"
case bool:
return "BOOL"
case string:
return "STRING"
case int64:
return "INT"
case float64:
return "FLOAT"
case time.Time:
return "TIMESTAMP"
case []byte:
return "BYTES"
}
return "--"
}
// Repr returns a string representation of value.
func (v Value) Repr() string {
switch x := v.value.(type) {
case nil:
return ":NULL"
case bool, int64, float64:
return fmt.Sprintf("%v:%s", v.value, v.GetTypeString())
case time.Time:
return fmt.Sprintf("%s:TIMESTAMP", x)
case string:
return fmt.Sprintf("\"%s\":%s", x, v.GetTypeString())
case []byte:
return fmt.Sprintf("\"%s\":BYTES", string(x))
default:
return fmt.Sprintf("%v:INVALID", v.value)
}
}
// FromFloat creates a Value from a number
func FromFloat(f float64) *Value {
return &Value{value: f}
}
// FromInt creates a Value from an int
func FromInt(f int64) *Value {
return &Value{value: f}
}
// FromString creates a Value from a string
func FromString(str string) *Value {
return &Value{value: str}
}
// FromBool creates a Value from a bool
func FromBool(b bool) *Value {
return &Value{value: b}
}
// FromTimestamp creates a Value from a timestamp
func FromTimestamp(t time.Time) *Value {
return &Value{value: t}
}
// FromNull creates a Value with Null value
func FromNull() *Value {
return &Value{value: nil}
}
// FromBytes creates a Value from a []byte
func FromBytes(b []byte) *Value {
return &Value{value: b}
}
// ToFloat works for int and float values
func (v Value) ToFloat() (val float64, ok bool) {
switch x := v.value.(type) {
case float64:
return x, true
case int64:
return float64(x), true
}
return 0, false
}
// ToInt converts value to int.
func (v Value) ToInt() (val int64, ok bool) {
val, ok = v.value.(int64)
return
}
// ToString converts value to string.
func (v Value) ToString() (val string, ok bool) {
val, ok = v.value.(string)
return
}
// Equals returns whether the values strictly match.
// Both type and value must match.
func (v Value) Equals(b Value) (ok bool) {
if !v.SameTypeAs(b) {
return false
}
return reflect.DeepEqual(v.value, b.value)
}
// SameTypeAs return whether the two types are strictly the same.
func (v Value) SameTypeAs(b Value) (ok bool) {
switch v.value.(type) {
case bool:
_, ok = b.value.(bool)
case string:
_, ok = b.value.(string)
case int64:
_, ok = b.value.(int64)
case float64:
_, ok = b.value.(float64)
case time.Time:
_, ok = b.value.(time.Time)
case []byte:
_, ok = b.value.([]byte)
default:
ok = reflect.TypeOf(v.value) == reflect.TypeOf(b.value)
}
return ok
}
// ToBool returns the bool value; second return value refers to if the bool
// conversion succeeded.
func (v Value) ToBool() (val bool, ok bool) {
val, ok = v.value.(bool)
return
}
// ToTimestamp returns the timestamp value if present.
func (v Value) ToTimestamp() (t time.Time, ok bool) {
t, ok = v.value.(time.Time)
return
}
// ToBytes converts Value to byte-slice.
func (v Value) ToBytes() (val []byte, ok bool) {
val, ok = v.value.([]byte)
return
}
// IsNull - checks if value is missing.
func (v Value) IsNull() bool {
switch v.value.(type) {
case nil:
return true
}
return false
}
func (v Value) isNumeric() bool {
switch v.value.(type) {
case int64, float64:
return true
}
return false
}
// setters used internally to mutate values
func (v *Value) setInt(i int64) {
v.value = i
}
func (v *Value) setFloat(f float64) {
v.value = f
}
func (v *Value) setString(s string) {
v.value = s
}
func (v *Value) setBool(b bool) {
v.value = b
}
func (v *Value) setTimestamp(t time.Time) {
v.value = t
}
func (v Value) String() string {
return fmt.Sprintf("%#v", v.value)
}
// CSVString - convert to string for CSV serialization
func (v Value) CSVString() string {
switch x := v.value.(type) {
case nil:
return ""
case bool:
if x {
return "true"
}
return "false"
case string:
return x
case int64:
return strconv.FormatInt(x, 10)
case float64:
return strconv.FormatFloat(x, 'g', -1, 64)
case time.Time:
return FormatSQLTimestamp(x)
case []byte:
return string(x)
default:
return "CSV serialization not implemented for this type"
}
}
// floatToValue converts a float into int representation if needed.
func floatToValue(f float64) *Value {
intPart, fracPart := math.Modf(f)
if fracPart == 0 {
return FromInt(int64(intPart))
}
return FromFloat(f)
}
// negate negates a numeric value
func (v *Value) negate() {
switch x := v.value.(type) {
case float64:
v.value = -x
case int64:
v.value = -x
}
}
// Value comparison functions: we do not expose them outside the
// module. Logical operators "<", ">", ">=", "<=" work on strings and
// numbers. Equality operators "=", "!=" work on strings,
// numbers and booleans.
// Supported comparison operators
const (
opLt = "<"
opLte = "<="
opGt = ">"
opGte = ">="
opEq = "="
opIneq = "!="
)
// When numeric types are compared, type promotions could happen. If
// values do not have types (e.g. when reading from CSV), for
// comparison operations, automatic type conversion happens by trying
// to check if the value is a number (first an integer, then a float),
// and falling back to string.
func (v *Value) compareOp(op string, a *Value) (res bool, err error) {
if !isValidComparisonOperator(op) {
return false, errArithInvalidOperator
}
// Check if type conversion/inference is needed - it is needed
// if the Value is a byte-slice.
err = inferTypesForCmp(v, a)
if err != nil {
return false, err
}
isNumeric := v.isNumeric() && a.isNumeric()
if isNumeric {
intV, ok1i := v.ToInt()
intA, ok2i := a.ToInt()
if ok1i && ok2i {
return intCompare(op, intV, intA), nil
}
// If both values are numeric, then at least one is
// float since we got here, so we convert.
flV, _ := v.ToFloat()
flA, _ := a.ToFloat()
return floatCompare(op, flV, flA), nil
}
strV, ok1s := v.ToString()
strA, ok2s := a.ToString()
if ok1s && ok2s {
return stringCompare(op, strV, strA), nil
}
boolV, ok1b := v.ToBool()
boolA, ok2b := a.ToBool()
if ok1b && ok2b {
return boolCompare(op, boolV, boolA)
}
timestampV, ok1t := v.ToTimestamp()
timestampA, ok2t := a.ToTimestamp()
if ok1t && ok2t {
return timestampCompare(op, timestampV, timestampA), nil
}
return false, errCmpMismatchedTypes
}
func inferTypesForCmp(a *Value, b *Value) error {
_, okA := a.ToBytes()
_, okB := b.ToBytes()
switch {
case !okA && !okB:
// Both Values already have types
return nil
case okA && okB:
// Both Values are untyped so try the types in order:
// int, float, bool, string
// Check for numeric inference
iA, okAi := a.bytesToInt()
iB, okBi := b.bytesToInt()
if okAi && okBi {
a.setInt(iA)
b.setInt(iB)
return nil
}
fA, okAf := a.bytesToFloat()
fB, okBf := b.bytesToFloat()
if okAf && okBf {
a.setFloat(fA)
b.setFloat(fB)
return nil
}
// Check if they int and float combination.
if okAi && okBf {
a.setInt(iA)
b.setFloat(fA)
return nil
}
if okBi && okAf {
a.setFloat(fA)
b.setInt(iB)
return nil
}
// Not numeric types at this point.
// Check for bool inference
bA, okAb := a.bytesToBool()
bB, okBb := b.bytesToBool()
if okAb && okBb {
a.setBool(bA)
b.setBool(bB)
return nil
}
// Fallback to string
sA := a.bytesToString()
sB := b.bytesToString()
a.setString(sA)
b.setString(sB)
return nil
case okA && !okB:
// Here a has `a` is untyped, but `b` has a fixed
// type.
switch b.value.(type) {
case string:
s := a.bytesToString()
a.setString(s)
case int64, float64:
if iA, ok := a.bytesToInt(); ok {
a.setInt(iA)
} else if fA, ok := a.bytesToFloat(); ok {
a.setFloat(fA)
} else {
return fmt.Errorf("Could not convert %s to a number", a.String())
}
case bool:
if bA, ok := a.bytesToBool(); ok {
a.setBool(bA)
} else {
return fmt.Errorf("Could not convert %s to a boolean", a.String())
}
default:
return errCmpMismatchedTypes
}
return nil
case !okA && okB:
// swap arguments to avoid repeating code
return inferTypesForCmp(b, a)
default:
// Does not happen
return nil
}
}
// Value arithmetic functions: we do not expose them outside the
// module. All arithmetic works only on numeric values with automatic
// promotion to the "larger" type that can represent the value. TODO:
// Add support for large number arithmetic.
// Supported arithmetic operators
const (
opPlus = "+"
opMinus = "-"
opDivide = "/"
opMultiply = "*"
opModulo = "%"
)
// For arithmetic operations, if both values are numeric then the
// operation shall succeed. If the types are unknown automatic type
// conversion to a number is attempted.
func (v *Value) arithOp(op string, a *Value) error {
err := inferTypeForArithOp(v)
if err != nil {
return err
}
err = inferTypeForArithOp(a)
if err != nil {
return err
}
if !v.isNumeric() || !a.isNumeric() {
return errInvalidDataType(errArithMismatchedTypes)
}
if !isValidArithOperator(op) {
return errInvalidDataType(errArithMismatchedTypes)
}
intV, ok1i := v.ToInt()
intA, ok2i := a.ToInt()
switch {
case ok1i && ok2i:
res, err := intArithOp(op, intV, intA)
v.setInt(res)
return err
default:
// Convert arguments to float
flV, _ := v.ToFloat()
flA, _ := a.ToFloat()
res, err := floatArithOp(op, flV, flA)
v.setFloat(res)
return err
}
}
func inferTypeForArithOp(a *Value) error {
if _, ok := a.ToBytes(); !ok {
return nil
}
if i, ok := a.bytesToInt(); ok {
a.setInt(i)
return nil
}
if f, ok := a.bytesToFloat(); ok {
a.setFloat(f)
return nil
}
err := fmt.Errorf("Could not convert %s to a number", string(a.value.([]byte)))
return errInvalidDataType(err)
}
// All the bytesTo* functions defined below assume the value is a byte-slice.
// Converts untyped value into int. The bool return implies success -
// it returns false only if there is a conversion failure.
func (v *Value) bytesToInt() (int64, bool) {
bytes, _ := v.ToBytes()
i, err := strconv.ParseInt(string(bytes), 10, 64)
return i, err == nil
}
// Converts untyped value into float. The bool return implies success
// - it returns false only if there is a conversion failure.
func (v *Value) bytesToFloat() (float64, bool) {
bytes, _ := v.ToBytes()
i, err := strconv.ParseFloat(string(bytes), 64)
return i, err == nil
}
// Converts untyped value into bool. The second bool return implies
// success - it returns false in case of a conversion failure.
func (v *Value) bytesToBool() (val bool, ok bool) {
bytes, _ := v.ToBytes()
ok = true
switch strings.ToLower(string(bytes)) {
case "t", "true":
val = true
case "f", "false":
val = false
default:
ok = false
}
return val, ok
}
// bytesToString - never fails
func (v *Value) bytesToString() string {
bytes, _ := v.ToBytes()
return string(bytes)
}
// Calculates minimum or maximum of v and a and assigns the result to
// v - it works only on numeric arguments, where `v` is already
// assumed to be numeric. Attempts conversion to numeric type for `a`
// (first int, then float) only if the underlying values do not have a
// type.
func (v *Value) minmax(a *Value, isMax, isFirstRow bool) error {
err := inferTypeForArithOp(a)
if err != nil {
return err
}
if !a.isNumeric() {
return errArithMismatchedTypes
}
// In case of first row, set v to a.
if isFirstRow {
intA, okI := a.ToInt()
if okI {
v.setInt(intA)
return nil
}
floatA, _ := a.ToFloat()
v.setFloat(floatA)
return nil
}
intV, ok1i := v.ToInt()
intA, ok2i := a.ToInt()
if ok1i && ok2i {
result := intV
if !isMax {
if intA < result {
result = intA
}
} else {
if intA > result {
result = intA
}
}
v.setInt(result)
return nil
}
floatV, _ := v.ToFloat()
floatA, _ := a.ToFloat()
var result float64
if !isMax {
result = math.Min(floatV, floatA)
} else {
result = math.Max(floatV, floatA)
}
v.setFloat(result)
return nil
}
func inferTypeAsTimestamp(v *Value) error {
if s, ok := v.ToString(); ok {
t, err := parseSQLTimestamp(s)
if err != nil {
return err
}
v.setTimestamp(t)
} else if b, ok := v.ToBytes(); ok {
s := string(b)
t, err := parseSQLTimestamp(s)
if err != nil {
return err
}
v.setTimestamp(t)
}
return nil
}
// inferTypeAsString is used to convert untyped values to string - it
// is called when the caller requires a string context to proceed.
func inferTypeAsString(v *Value) {
b, ok := v.ToBytes()
if !ok {
return
}
v.setString(string(b))
}
func isValidComparisonOperator(op string) bool {
switch op {
case opLt:
case opLte:
case opGt:
case opGte:
case opEq:
case opIneq:
default:
return false
}
return true
}
func intCompare(op string, left, right int64) bool {
switch op {
case opLt:
return left < right
case opLte:
return left <= right
case opGt:
return left > right
case opGte:
return left >= right
case opEq:
return left == right
case opIneq:
return left != right
}
// This case does not happen
return false
}
func floatCompare(op string, left, right float64) bool {
switch op {
case opLt:
return left < right
case opLte:
return left <= right
case opGt:
return left > right
case opGte:
return left >= right
case opEq:
return left == right
case opIneq:
return left != right
}
// This case does not happen
return false
}
func stringCompare(op string, left, right string) bool {
switch op {
case opLt:
return left < right
case opLte:
return left <= right
case opGt:
return left > right
case opGte:
return left >= right
case opEq:
return left == right
case opIneq:
return left != right
}
// This case does not happen
return false
}
func boolCompare(op string, left, right bool) (bool, error) {
switch op {
case opEq:
return left == right, nil
case opIneq:
return left != right, nil
default:
return false, errCmpInvalidBoolOperator
}
}
func timestampCompare(op string, left, right time.Time) bool {
switch op {
case opLt:
return left.Before(right)
case opLte:
return left.Before(right) || left.Equal(right)
case opGt:
return left.After(right)
case opGte:
return left.After(right) || left.Equal(right)
case opEq:
return left.Equal(right)
case opIneq:
return !left.Equal(right)
}
// This case does not happen
return false
}
func isValidArithOperator(op string) bool {
switch op {
case opPlus:
case opMinus:
case opDivide:
case opMultiply:
case opModulo:
default:
return false
}
return true
}
// Overflow errors are ignored.
func intArithOp(op string, left, right int64) (int64, error) {
switch op {
case opPlus:
return left + right, nil
case opMinus:
return left - right, nil
case opDivide:
if right == 0 {
return 0, errArithDivideByZero
}
return left / right, nil
case opMultiply:
return left * right, nil
case opModulo:
if right == 0 {
return 0, errArithDivideByZero
}
return left % right, nil
}
// This does not happen
return 0, nil
}
// Overflow errors are ignored.
func floatArithOp(op string, left, right float64) (float64, error) {
switch op {
case opPlus:
return left + right, nil
case opMinus:
return left - right, nil
case opDivide:
if right == 0 {
return 0, errArithDivideByZero
}
return left / right, nil
case opMultiply:
return left * right, nil
case opModulo:
if right == 0 {
return 0, errArithDivideByZero
}
return math.Mod(left, right), nil
}
// This does not happen
return 0, nil
}