mirror of
https://github.com/minio/minio.git
synced 2025-11-20 09:56:07 -05:00
Performance improvements to SELECT API on certain query operations (#6752)
This improves the performance of certain queries dramatically, such as 'count(*)' etc. Without this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m42.464s user 0m0.071s sys 0m0.010s ``` With this PR ``` ~ time mc select --query "select count(*) from S3Object" myminio/sjm-airlines/star2000.csv.gz 2173762 real 0m17.603s user 0m0.093s sys 0m0.008s ``` Almost a 250% improvement in performance. This PR avoids a lot of type conversions and instead relies on raw sequences of data and interprets them lazily. ``` benchcmp old new benchmark old ns/op new ns/op delta BenchmarkSQLAggregate_100K-4 551213 259782 -52.87% BenchmarkSQLAggregate_1M-4 6981901985 2432413729 -65.16% BenchmarkSQLAggregate_2M-4 13511978488 4536903552 -66.42% BenchmarkSQLAggregate_10M-4 68427084908 23266283336 -66.00% benchmark old allocs new allocs delta BenchmarkSQLAggregate_100K-4 2366 485 -79.50% BenchmarkSQLAggregate_1M-4 47455492 21462860 -54.77% BenchmarkSQLAggregate_2M-4 95163637 43110771 -54.70% BenchmarkSQLAggregate_10M-4 476959550 216906510 -54.52% benchmark old bytes new bytes delta BenchmarkSQLAggregate_100K-4 1233079 1086024 -11.93% BenchmarkSQLAggregate_1M-4 2607984120 557038536 -78.64% BenchmarkSQLAggregate_2M-4 5254103616 1128149168 -78.53% BenchmarkSQLAggregate_10M-4 26443524872 5722715992 -78.36% ```
This commit is contained in:
committed by
kannappanr
parent
f9779b24ad
commit
7e1661f4fa
3
vendor/github.com/tidwall/gjson/README.md
generated
vendored
3
vendor/github.com/tidwall/gjson/README.md
generated
vendored
@@ -88,13 +88,14 @@ The dot and wildcard characters can be escaped with '\\'.
|
||||
```
|
||||
|
||||
You can also query an array for the first match by using `#[...]`, or find all matches with `#[...]#`.
|
||||
Queries support the `==`, `!=`, `<`, `<=`, `>`, `>=` comparison operators and the simple pattern matching `%` operator.
|
||||
Queries support the `==`, `!=`, `<`, `<=`, `>`, `>=` comparison operators and the simple pattern matching `%` (like) and `!%` (not like) operators.
|
||||
|
||||
```
|
||||
friends.#[last=="Murphy"].first >> "Dale"
|
||||
friends.#[last=="Murphy"]#.first >> ["Dale","Jane"]
|
||||
friends.#[age>45]#.last >> ["Craig","Murphy"]
|
||||
friends.#[first%"D*"].last >> "Murphy"
|
||||
friends.#[first!%"D*"].last >> "Craig"
|
||||
```
|
||||
|
||||
## JSON Lines
|
||||
|
||||
41
vendor/github.com/tidwall/gjson/gjson.go
generated
vendored
41
vendor/github.com/tidwall/gjson/gjson.go
generated
vendored
@@ -77,7 +77,20 @@ func (t Result) String() string {
|
||||
case False:
|
||||
return "false"
|
||||
case Number:
|
||||
return strconv.FormatFloat(t.Num, 'f', -1, 64)
|
||||
if len(t.Raw) == 0 {
|
||||
// calculated result
|
||||
return strconv.FormatFloat(t.Num, 'f', -1, 64)
|
||||
}
|
||||
var i int
|
||||
if t.Raw[0] == '-' {
|
||||
i++
|
||||
}
|
||||
for ; i < len(t.Raw); i++ {
|
||||
if t.Raw[i] < '0' || t.Raw[i] > '9' {
|
||||
return strconv.FormatFloat(t.Num, 'f', -1, 64)
|
||||
}
|
||||
}
|
||||
return t.Raw
|
||||
case String:
|
||||
return t.Str
|
||||
case JSON:
|
||||
@@ -344,24 +357,30 @@ func (t Result) arrayOrMap(vc byte, valueize bool) (r arrayOrMapResult) {
|
||||
if (json[i] >= '0' && json[i] <= '9') || json[i] == '-' {
|
||||
value.Type = Number
|
||||
value.Raw, value.Num = tonum(json[i:])
|
||||
value.Str = ""
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
case '{', '[':
|
||||
value.Type = JSON
|
||||
value.Raw = squash(json[i:])
|
||||
value.Str, value.Num = "", 0
|
||||
case 'n':
|
||||
value.Type = Null
|
||||
value.Raw = tolit(json[i:])
|
||||
value.Str, value.Num = "", 0
|
||||
case 't':
|
||||
value.Type = True
|
||||
value.Raw = tolit(json[i:])
|
||||
value.Str, value.Num = "", 0
|
||||
case 'f':
|
||||
value.Type = False
|
||||
value.Raw = tolit(json[i:])
|
||||
value.Str, value.Num = "", 0
|
||||
case '"':
|
||||
value.Type = String
|
||||
value.Raw, value.Str = tostr(json[i:])
|
||||
value.Num = 0
|
||||
}
|
||||
i += len(value.Raw) - 1
|
||||
|
||||
@@ -370,9 +389,13 @@ func (t Result) arrayOrMap(vc byte, valueize bool) (r arrayOrMapResult) {
|
||||
key = value
|
||||
} else {
|
||||
if valueize {
|
||||
r.oi[key.Str] = value.Value()
|
||||
if _, ok := r.oi[key.Str]; !ok {
|
||||
r.oi[key.Str] = value.Value()
|
||||
}
|
||||
} else {
|
||||
r.o[key.Str] = value
|
||||
if _, ok := r.o[key.Str]; !ok {
|
||||
r.o[key.Str] = value
|
||||
}
|
||||
}
|
||||
}
|
||||
count++
|
||||
@@ -732,7 +755,7 @@ func parseArrayPath(path string) (r arrayPathResult) {
|
||||
if i < len(path) {
|
||||
s = i
|
||||
if path[i] == '!' {
|
||||
if i < len(path)-1 && path[i+1] == '=' {
|
||||
if i < len(path)-1 && (path[i+1] == '=' || path[i+1] == '%') {
|
||||
i++
|
||||
}
|
||||
} else if path[i] == '<' || path[i] == '>' {
|
||||
@@ -1076,6 +1099,8 @@ func queryMatches(rp *arrayPathResult, value Result) bool {
|
||||
return value.Str >= rpv
|
||||
case "%":
|
||||
return match.Match(value.Str, rpv)
|
||||
case "!%":
|
||||
return !match.Match(value.Str, rpv)
|
||||
}
|
||||
case Number:
|
||||
rpvn, _ := strconv.ParseFloat(rpv, 64)
|
||||
@@ -1288,7 +1313,7 @@ func parseArray(c *parseContext, i int, path string) (int, bool) {
|
||||
if rp.alogok {
|
||||
break
|
||||
}
|
||||
c.value.Raw = val
|
||||
c.value.Raw = ""
|
||||
c.value.Type = Number
|
||||
c.value.Num = float64(h - 1)
|
||||
c.calcd = true
|
||||
@@ -1603,7 +1628,11 @@ func GetMany(json string, path ...string) []Result {
|
||||
// The return value is a Result array where the number of items
|
||||
// will be equal to the number of input paths.
|
||||
func GetManyBytes(json []byte, path ...string) []Result {
|
||||
return GetMany(string(json), path...)
|
||||
res := make([]Result, len(path))
|
||||
for i, path := range path {
|
||||
res[i] = GetBytes(json, path)
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
var fieldsmu sync.RWMutex
|
||||
|
||||
Reference in New Issue
Block a user