From 88fd1cba71e3d574d07e36dfbf61f7be24eea0a4 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Fri, 25 Feb 2022 12:31:19 -0800 Subject: [PATCH] select: add MISSING operator support (#14406) Probably not full support, but for regular checks it should work. Fixes #14358 --- internal/s3select/csv/record.go | 1 + internal/s3select/json/record.go | 2 + internal/s3select/select_test.go | 83 ++++++++++++++++++++++++++ internal/s3select/sql/evaluate.go | 4 ++ internal/s3select/sql/jsonpath.go | 4 +- internal/s3select/sql/jsonpath_test.go | 46 +++++++------- internal/s3select/sql/parser.go | 5 +- internal/s3select/sql/value.go | 37 +++++++++++- 8 files changed, 153 insertions(+), 29 deletions(-) diff --git a/internal/s3select/csv/record.go b/internal/s3select/csv/record.go index 15a0965b7..c0dacc6c2 100644 --- a/internal/s3select/csv/record.go +++ b/internal/s3select/csv/record.go @@ -59,6 +59,7 @@ func (r *Record) Get(name string) (*sql.Value, error) { } return sql.FromBytes([]byte(r.csvRecord[idx])), nil } + // TODO: Return Missing? return nil, fmt.Errorf("column %v not found", name) } diff --git a/internal/s3select/json/record.go b/internal/s3select/json/record.go index be8d0d8a9..7b6ddad76 100644 --- a/internal/s3select/json/record.go +++ b/internal/s3select/json/record.go @@ -89,6 +89,8 @@ func (r *Record) Set(name string, value *sql.Value) (sql.Record, error) { v = s } else if value.IsNull() { v = nil + } else if value.IsMissing() { + return r, nil } else if b, ok := value.ToBytes(); ok { // This can either be raw json or a CSV value. // Only treat objects and arrays as JSON. diff --git a/internal/s3select/select_test.go b/internal/s3select/select_test.go index 66bac90c6..2d347f304 100644 --- a/internal/s3select/select_test.go +++ b/internal/s3select/select_test.go @@ -479,6 +479,89 @@ func TestJSONQueries(t *testing.T) { query: `select * from s3object[*] as s where s.request.header['User-Agent'] is not null`, wantResult: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}}`, withJSON: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":{}}}`, + }, + { + name: "is-not-missing", + query: `select * from s3object[*] as s where s.request.header['User-Agent'] is not missing`, + wantResult: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}}`, + withJSON: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":{}}}`, + }, + { + name: "is-not-missing-2", + query: `select * from s3object[*] as s where s.request.header is not missing`, + wantResult: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":{}}}`, + withJSON: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":{}}}`, + }, + { + name: "is-not-missing-null", + query: `select * from s3object[*] as s where s.request.header is not missing`, + wantResult: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":null}}`, + withJSON: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":null}}`, + }, + { + name: "is-not-missing", + query: `select * from s3object[*] as s where s.request.header['User-Agent'] is not missing`, + wantResult: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}}`, + withJSON: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":{}}}`, + }, + { + name: "is-not-missing-2", + query: `select * from s3object[*] as s where s.request.header is not missing`, + wantResult: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":{}}}`, + withJSON: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":{}}}`, + }, + { + name: "is-not-missing-null", + query: `select * from s3object[*] as s where s.request.header is not missing`, + wantResult: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":null}}`, + withJSON: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":null}}`, + }, + + { + name: "is-missing", + query: `select * from s3object[*] as s where s.request.header['User-Agent'] is missing`, + wantResult: `{"request":{"uri":"/2","header":{}}}`, + withJSON: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":{}}}`, + }, + { + name: "is-missing-2", + query: `select * from s3object[*] as s where s.request.header is missing`, + wantResult: ``, + withJSON: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":{}}}`, + }, + { + name: "is-missing", + query: `select * from s3object[*] as s where s.request.header['User-Agent'] = missing`, + wantResult: `{"request":{"uri":"/2","header":{}}}`, + withJSON: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":{}}}`, + }, + { + name: "is-missing-null", + query: `select * from s3object[*] as s where s.request.header is missing`, + wantResult: ``, + withJSON: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} +{"request":{"uri":"/2","header":null}}`, + }, + { + name: "is-missing-select", + query: `select s.request.header['User-Agent'] as h from s3object[*] as s`, + wantResult: `{"h":"test"} +{}`, + withJSON: `{"request":{"uri":"/1","header":{"User-Agent":"test"}}} {"request":{"uri":"/2","header":{}}}`, }, } diff --git a/internal/s3select/sql/evaluate.go b/internal/s3select/sql/evaluate.go index 21927d9f7..c06920dbd 100644 --- a/internal/s3select/sql/evaluate.go +++ b/internal/s3select/sql/evaluate.go @@ -449,6 +449,8 @@ func jsonToValue(result interface{}) (*Value, error) { return FromArray(rval), nil case nil: return FromNull(), nil + case Missing: + return FromMissing(), nil } return nil, fmt.Errorf("Unhandled value type: %T", result) } @@ -493,6 +495,8 @@ func (e *LitValue) evalNode(_ Record) (res *Value, err error) { return FromString(string(*e.String)), nil case e.Boolean != nil: return FromBool(bool(*e.Boolean)), nil + case e.Missing: + return FromMissing(), nil } return FromNull(), nil } diff --git a/internal/s3select/sql/jsonpath.go b/internal/s3select/sql/jsonpath.go index d3b53298f..737069358 100644 --- a/internal/s3select/sql/jsonpath.go +++ b/internal/s3select/sql/jsonpath.go @@ -52,12 +52,12 @@ func jsonpathEval(p []*JSONPathElement, v interface{}) (r interface{}, flat bool } } // Key not found - return nil result - return nil, false, nil + return Missing{}, false, nil case simdjson.Object: elem := kvs.FindKey(key, nil) if elem == nil { // Key not found - return nil result - return nil, false, nil + return Missing{}, false, nil } val, err := IterToValue(elem.Iter) if err != nil { diff --git a/internal/s3select/sql/jsonpath_test.go b/internal/s3select/sql/jsonpath_test.go index 7afd04f23..da5dea05e 100644 --- a/internal/s3select/sql/jsonpath_test.go +++ b/internal/s3select/sql/jsonpath_test.go @@ -66,32 +66,34 @@ func TestJsonpathEval(t *testing.T) { {"s.authorInfo.yearRange", []interface{}{[]interface{}{1890.0, 1976.0}, []interface{}{1920.0, 1992.0}, []interface{}{1881.0, 1975.0}}}, {"s.authorInfo.name", []interface{}{"Agatha Christie", "Isaac Asimov", "P. G. Wodehouse"}}, {"s.authorInfo.yearRange[0]", []interface{}{1890.0, 1920.0, 1881.0}}, - {"s.publicationHistory[0].pages", []interface{}{256.0, 336.0, nil}}, + {"s.publicationHistory[0].pages", []interface{}{256.0, 336.0, Missing{}}}, } for i, tc := range cases { - jp := JSONPath{} - err := p.ParseString(tc.str, &jp) - // fmt.Println(jp) - if err != nil { - t.Fatalf("parse failed!: %d %v %s", i, err, tc) - } - - // Read only the first json object from the file - recs, err := getJSONStructs(b) - if err != nil || len(recs) != 3 { - t.Fatalf("%v or length was not 3", err) - } - - for j, rec := range recs { - // fmt.Println(rec) - r, _, err := jsonpathEval(jp.PathExpr, rec) + t.Run(tc.str, func(t *testing.T) { + jp := JSONPath{} + err := p.ParseString(tc.str, &jp) + // fmt.Println(jp) if err != nil { - t.Errorf("Error: %d %d %v", i, j, err) + t.Fatalf("parse failed!: %d %v %s", i, err, tc) } - if !reflect.DeepEqual(r, tc.res[j]) { - fmt.Printf("%#v (%v) != %v (%v)\n", r, reflect.TypeOf(r), tc.res[j], reflect.TypeOf(tc.res[j])) - t.Errorf("case: %d %d failed", i, j) + + // Read only the first json object from the file + recs, err := getJSONStructs(b) + if err != nil || len(recs) != 3 { + t.Fatalf("%v or length was not 3", err) } - } + + for j, rec := range recs { + // fmt.Println(rec) + r, _, err := jsonpathEval(jp.PathExpr, rec) + if err != nil { + t.Errorf("Error: %d %d %v", i, j, err) + } + if !reflect.DeepEqual(r, tc.res[j]) { + fmt.Printf("%#v (%v) != %v (%v)\n", r, reflect.TypeOf(r), tc.res[j], reflect.TypeOf(tc.res[j])) + t.Errorf("case: %d %d failed", i, j) + } + } + }) } } diff --git a/internal/s3select/sql/parser.go b/internal/s3select/sql/parser.go index 13d4ccbec..bae580a38 100644 --- a/internal/s3select/sql/parser.go +++ b/internal/s3select/sql/parser.go @@ -340,7 +340,8 @@ type LitValue struct { Int *float64 `parser:" | @Int"` // To avoid value out of range, use float64 instead String *LiteralString `parser:" | @LitString"` Boolean *Boolean `parser:" | @(\"TRUE\" | \"FALSE\")"` - Null bool `parser:" | @\"NULL\")"` + Null bool `parser:" | @\"NULL\""` + Missing bool `parser:" | @\"MISSING\")"` } // Identifier represents a parsed identifier @@ -352,7 +353,7 @@ type Identifier struct { var ( sqlLexer = lexer.Must(lexer.Regexp(`(\s+)` + `|(?P(?i)\b(?:YEAR|MONTH|DAY|HOUR|MINUTE|SECOND|TIMEZONE_HOUR|TIMEZONE_MINUTE)\b)` + - `|(?P(?i)\b(?:SELECT|FROM|TOP|DISTINCT|ALL|WHERE|GROUP|BY|HAVING|UNION|MINUS|EXCEPT|INTERSECT|ORDER|LIMIT|OFFSET|TRUE|FALSE|NULL|IS|NOT|ANY|SOME|BETWEEN|AND|OR|LIKE|ESCAPE|AS|IN|BOOL|INT|INTEGER|STRING|FLOAT|DECIMAL|NUMERIC|TIMESTAMP|AVG|COUNT|MAX|MIN|SUM|COALESCE|NULLIF|CAST|DATE_ADD|DATE_DIFF|EXTRACT|TO_STRING|TO_TIMESTAMP|UTCNOW|CHAR_LENGTH|CHARACTER_LENGTH|LOWER|SUBSTRING|TRIM|UPPER|LEADING|TRAILING|BOTH|FOR)\b)` + + `|(?P(?i)\b(?:SELECT|FROM|TOP|DISTINCT|ALL|WHERE|GROUP|BY|HAVING|UNION|MINUS|EXCEPT|INTERSECT|ORDER|LIMIT|OFFSET|TRUE|FALSE|NULL|IS|NOT|ANY|SOME|BETWEEN|AND|OR|LIKE|ESCAPE|AS|IN|BOOL|INT|INTEGER|STRING|FLOAT|DECIMAL|NUMERIC|TIMESTAMP|AVG|COUNT|MAX|MIN|SUM|COALESCE|NULLIF|CAST|DATE_ADD|DATE_DIFF|EXTRACT|TO_STRING|TO_TIMESTAMP|UTCNOW|CHAR_LENGTH|CHARACTER_LENGTH|LOWER|SUBSTRING|TRIM|UPPER|LEADING|TRAILING|BOTH|FOR|MISSING)\b)` + `|(?P[a-zA-Z_][a-zA-Z0-9_]*)` + `|(?P"([^"]*("")?)*")` + `|(?P\d*\.\d+([eE][-+]?\d+)?)` + diff --git a/internal/s3select/sql/value.go b/internal/s3select/sql/value.go index 7676cd99c..8a778bcf1 100644 --- a/internal/s3select/sql/value.go +++ b/internal/s3select/sql/value.go @@ -49,6 +49,9 @@ type Value struct { value interface{} } +// Missing is used to indicate a non-existing value. +type Missing struct{} + // MarshalJSON provides json marshaling of values. func (v Value) MarshalJSON() ([]byte, error) { if b, ok := v.ToBytes(); ok { @@ -76,6 +79,8 @@ func (v Value) GetTypeString() string { return "BYTES" case []Value: return "ARRAY" + case Missing: + return "MISSING" } return "--" } @@ -104,6 +109,8 @@ func (v Value) Repr() string { } s.WriteString("]:ARRAY") return s.String() + case Missing: + return ":MISSING" default: return fmt.Sprintf("%v:INVALID", v.value) } @@ -139,6 +146,11 @@ func FromNull() *Value { return &Value{value: nil} } +// FromMissing creates a Value with Missing value +func FromMissing() *Value { + return &Value{value: Missing{}} +} + // FromBytes creates a Value from a []byte func FromBytes(b []byte) *Value { return &Value{value: b} @@ -239,6 +251,12 @@ func (v Value) IsNull() bool { return false } +// IsMissing - checks if value is missing. +func (v Value) IsMissing() bool { + _, ok := v.value.(Missing) + return ok +} + // IsArray returns whether the value is an array. func (v Value) IsArray() (ok bool) { _, ok = v.value.([]Value) @@ -283,7 +301,7 @@ func (v Value) String() string { // CSVString - convert to string for CSV serialization func (v Value) CSVString() string { switch x := v.value.(type) { - case nil: + case nil, Missing: return "" case bool: if x { @@ -390,13 +408,21 @@ func (v *Value) compareOp(op string, a *Value) (res bool, err error) { switch op { case opIs: if a.IsNull() { - return v.IsNull(), nil + // Missing is null + return v.IsNull() || v.IsMissing(), nil + } + if a.IsMissing() { + return v.IsMissing(), nil } // Forward to Equal op = opEq case opIsNot: if a.IsNull() { - return !v.IsNull(), nil + // Missing is not null + return !v.IsNull() && !v.IsMissing(), nil + } + if a.IsMissing() { + return !v.IsMissing(), nil } // Forward to not equal. op = opIneq @@ -416,6 +442,11 @@ func (v *Value) compareOp(op string, a *Value) (res bool, err error) { return boolCompare(op, v.IsNull(), a.IsNull()) } + if a.IsMissing() || v.IsMissing() { + // If one is, both must be. + return boolCompare(op, v.IsMissing(), a.IsMissing()) + } + // Check array values aArr, aOK := a.ToArray() vArr, vOK := v.ToArray()