mirror of
https://github.com/minio/minio.git
synced 2025-11-06 20:33:07 -05:00
Concurrent CSV parsing and reduce S3 select allocations (#8200)
``` CSV parsing, BEFORE: BenchmarkReaderBasic-12 2842 407533 ns/op 397860 B/op 957 allocs/op BenchmarkReaderReplace-12 2718 429914 ns/op 397844 B/op 957 allocs/op BenchmarkReaderReplaceTwo-12 2718 435556 ns/op 397855 B/op 957 allocs/op BenchmarkAggregateCount_100K-12 171 6798974 ns/op 16667102 B/op 308077 allocs/op BenchmarkAggregateCount_1M-12 19 65657411 ns/op 168057743 B/op 3146610 allocs/op BenchmarkSelectAll_10M-12 1 20882119900 ns/op 2758799896 B/op 41978762 allocs/op CSV parsing, AFTER: BenchmarkReaderBasic-12 3721 312549 ns/op 101920 B/op 338 allocs/op BenchmarkReaderReplace-12 3776 318810 ns/op 101993 B/op 340 allocs/op BenchmarkReaderReplaceTwo-12 3610 330967 ns/op 102012 B/op 341 allocs/op BenchmarkAggregateCount_100K-12 295 4149588 ns/op 3553623 B/op 103261 allocs/op BenchmarkAggregateCount_1M-12 30 37746503 ns/op 33827931 B/op 1049435 allocs/op BenchmarkSelectAll_10M-12 1 17608495800 ns/op 1416504040 B/op 21007082 allocs/op ~ benchcmp old.txt new.txt benchmark old ns/op new ns/op delta BenchmarkReaderBasic-12 407533 312549 -23.31% BenchmarkReaderReplace-12 429914 318810 -25.84% BenchmarkReaderReplaceTwo-12 435556 330967 -24.01% BenchmarkAggregateCount_100K-12 6798974 4149588 -38.97% BenchmarkAggregateCount_1M-12 65657411 37746503 -42.51% BenchmarkSelectAll_10M-12 20882119900 17608495800 -15.68% benchmark old allocs new allocs delta BenchmarkReaderBasic-12 957 338 -64.68% BenchmarkReaderReplace-12 957 340 -64.47% BenchmarkReaderReplaceTwo-12 957 341 -64.37% BenchmarkAggregateCount_100K-12 308077 103261 -66.48% BenchmarkAggregateCount_1M-12 3146610 1049435 -66.65% BenchmarkSelectAll_10M-12 41978762 21007082 -49.96% benchmark old bytes new bytes delta BenchmarkReaderBasic-12 397860 101920 -74.38% BenchmarkReaderReplace-12 397844 101993 -74.36% BenchmarkReaderReplaceTwo-12 397855 102012 -74.36% BenchmarkAggregateCount_100K-12 16667102 3553623 -78.68% BenchmarkAggregateCount_1M-12 168057743 33827931 -79.87% BenchmarkSelectAll_10M-12 2758799896 1416504040 -48.66% ``` ``` BenchmarkReaderHuge/97K-12 2200 540840 ns/op 184.32 MB/s 1604450 B/op 687 allocs/op BenchmarkReaderHuge/194K-12 1522 752257 ns/op 265.04 MB/s 2143135 B/op 1335 allocs/op BenchmarkReaderHuge/389K-12 1190 947858 ns/op 420.69 MB/s 3221831 B/op 2630 allocs/op BenchmarkReaderHuge/778K-12 806 1472486 ns/op 541.61 MB/s 5201856 B/op 5187 allocs/op BenchmarkReaderHuge/1557K-12 426 2575269 ns/op 619.36 MB/s 9101330 B/op 10233 allocs/op BenchmarkReaderHuge/3115K-12 286 4034656 ns/op 790.66 MB/s 12397968 B/op 16099 allocs/op BenchmarkReaderHuge/6230K-12 172 6830563 ns/op 934.05 MB/s 16008416 B/op 26844 allocs/op BenchmarkReaderHuge/12461K-12 100 11409467 ns/op 1118.39 MB/s 22655163 B/op 48107 allocs/op BenchmarkReaderHuge/24922K-12 66 19780395 ns/op 1290.19 MB/s 35158559 B/op 90216 allocs/op BenchmarkReaderHuge/49844K-12 34 37282559 ns/op 1369.03 MB/s 60528624 B/op 174497 allocs/op ```
This commit is contained in:
committed by
Harshavardhana
parent
e7f491a14b
commit
ddea0bdf11
@@ -18,6 +18,7 @@ package s3select
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
@@ -108,26 +109,29 @@ func TestCSVInput(t *testing.T) {
|
||||
2.5,baz,true
|
||||
`)
|
||||
|
||||
for _, testCase := range testTable {
|
||||
s3Select, err := NewS3Select(bytes.NewReader(testCase.requestXML))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for i, testCase := range testTable {
|
||||
t.Run(fmt.Sprint(i), func(t *testing.T) {
|
||||
s3Select, err := NewS3Select(bytes.NewReader(testCase.requestXML))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) {
|
||||
return ioutil.NopCloser(bytes.NewReader(csvData)), nil
|
||||
}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) {
|
||||
return ioutil.NopCloser(bytes.NewReader(csvData)), nil
|
||||
}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
w := &testResponseWriter{}
|
||||
s3Select.Evaluate(w)
|
||||
s3Select.Close()
|
||||
w := &testResponseWriter{}
|
||||
s3Select.Evaluate(w)
|
||||
s3Select.Close()
|
||||
|
||||
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
||||
t.Fatalf("received response does not match with expected reply")
|
||||
}
|
||||
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
||||
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v", w.response, testCase.expectedResult)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestJSONInput(t *testing.T) {
|
||||
@@ -191,26 +195,27 @@ func TestJSONInput(t *testing.T) {
|
||||
{"three":true,"two":"baz","one":2.5}
|
||||
`)
|
||||
|
||||
for _, testCase := range testTable {
|
||||
for i, testCase := range testTable {
|
||||
t.Run(fmt.Sprint(i), func(t *testing.T) {
|
||||
s3Select, err := NewS3Select(bytes.NewReader(testCase.requestXML))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
s3Select, err := NewS3Select(bytes.NewReader(testCase.requestXML))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) {
|
||||
return ioutil.NopCloser(bytes.NewReader(jsonData)), nil
|
||||
}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) {
|
||||
return ioutil.NopCloser(bytes.NewReader(jsonData)), nil
|
||||
}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
w := &testResponseWriter{}
|
||||
s3Select.Evaluate(w)
|
||||
s3Select.Close()
|
||||
|
||||
w := &testResponseWriter{}
|
||||
s3Select.Evaluate(w)
|
||||
s3Select.Close()
|
||||
|
||||
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
||||
t.Fatalf("received response does not match with expected reply")
|
||||
}
|
||||
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
||||
t.Errorf("received response does not match with expected reply\ngot: %s\nwant:%s", string(w.response), string(testCase.expectedResult))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -268,45 +273,47 @@ func TestParquetInput(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testTable {
|
||||
getReader := func(offset int64, length int64) (io.ReadCloser, error) {
|
||||
testdataFile := "testdata.parquet"
|
||||
file, err := os.Open(testdataFile)
|
||||
for i, testCase := range testTable {
|
||||
t.Run(fmt.Sprint(i), func(t *testing.T) {
|
||||
getReader := func(offset int64, length int64) (io.ReadCloser, error) {
|
||||
testdataFile := "testdata.parquet"
|
||||
file, err := os.Open(testdataFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fi, err := file.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if offset < 0 {
|
||||
offset = fi.Size() + offset
|
||||
}
|
||||
|
||||
if _, err = file.Seek(offset, os.SEEK_SET); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return file, nil
|
||||
}
|
||||
|
||||
s3Select, err := NewS3Select(bytes.NewReader(testCase.requestXML))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
fi, err := file.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
if err = s3Select.Open(getReader); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if offset < 0 {
|
||||
offset = fi.Size() + offset
|
||||
w := &testResponseWriter{}
|
||||
s3Select.Evaluate(w)
|
||||
s3Select.Close()
|
||||
|
||||
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
||||
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v", w.response, testCase.expectedResult)
|
||||
}
|
||||
|
||||
if _, err = file.Seek(offset, os.SEEK_SET); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return file, nil
|
||||
}
|
||||
|
||||
s3Select, err := NewS3Select(bytes.NewReader(testCase.requestXML))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = s3Select.Open(getReader); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
w := &testResponseWriter{}
|
||||
s3Select.Evaluate(w)
|
||||
s3Select.Close()
|
||||
|
||||
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
||||
t.Fatalf("received response does not match with expected reply")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user