S3 Select: Convert CSV data to JSON (#8464)

This commit is contained in:
Klaus Post
2019-11-09 20:10:35 +03:00
committed by kannappanr
parent 26863009c0
commit 1c90a6bd49
3 changed files with 169 additions and 5 deletions

View File

@@ -25,6 +25,7 @@ import (
"strconv"
"strings"
"time"
"unicode/utf8"
)
var (
@@ -158,13 +159,13 @@ func (v Value) ToFloat() (val float64, ok bool) {
return 0, false
}
// ToInt converts value to int.
// ToInt returns the value if int.
func (v Value) ToInt() (val int64, ok bool) {
val, ok = v.value.(int64)
return
}
// ToString converts value to string.
// ToString returns the value if string.
func (v Value) ToString() (val string, ok bool) {
val, ok = v.value.(string)
return
@@ -215,7 +216,7 @@ func (v Value) ToTimestamp() (t time.Time, ok bool) {
return
}
// ToBytes converts Value to byte-slice.
// ToBytes returns the value if byte-slice.
func (v Value) ToBytes() (val []byte, ok bool) {
val, ok = v.value.([]byte)
return
@@ -339,6 +340,48 @@ const (
opIneq = "!="
)
// InferBytesType will attempt to infer the data type of bytes.
// Will fail if value type is not bytes or it would result in invalid utf8.
// ORDER: int, float, bool, JSON (object or array), timestamp, string
// If the content is valid JSON, the type will still be bytes.
func (v *Value) InferBytesType() (err error) {
b, ok := v.ToBytes()
if !ok {
return fmt.Errorf("InferByteType: Input is not bytes, but %v", v.GetTypeString())
}
// Check for numeric inference
if x, ok := v.bytesToInt(); ok {
v.setInt(x)
return nil
}
if x, ok := v.bytesToFloat(); ok {
v.setFloat(x)
return nil
}
if x, ok := v.bytesToBool(); ok {
v.setBool(x)
return nil
}
asString := strings.TrimSpace(v.bytesToString())
if len(b) > 0 &&
(strings.HasPrefix(asString, "{") || strings.HasPrefix(asString, "[")) {
return nil
}
if t, err := parseSQLTimestamp(asString); err == nil {
v.setTimestamp(t)
return nil
}
if !utf8.Valid(b) {
return errors.New("value is not valid utf-8")
}
// Fallback to string
v.setString(asString)
return
}
// When numeric types are compared, type promotions could happen. If
// values do not have types (e.g. when reading from CSV), for
// comparison operations, automatic type conversion happens by trying