S3 select switch to new parquet library and reduce locking (#14731)

- This change switches to a new parquet library
- SelectObjectContent now takes a single lock at the beginning and holds it
during the operation. Previously the operation took a lock every time the
parquet library performed a Seek on the underlying object stream.
- Add basic support for LogicalType annotations for timestamps.
This commit is contained in:
Aditya Manthramurthy
2022-04-14 06:54:47 -07:00
committed by GitHub
parent 67e17ed3f8
commit e8e48e4c4a
7 changed files with 336 additions and 348 deletions

View File

@@ -20,8 +20,6 @@ package s3select
import (
"bytes"
"encoding/csv"
"io"
"io/ioutil"
"math/rand"
"net/http"
"strconv"
@@ -112,9 +110,7 @@ func benchmarkSelect(b *testing.B, count int, query string) {
b.Fatal(err)
}
if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) {
return ioutil.NopCloser(bytes.NewReader(csvData)), nil
}); err != nil {
if err = s3Select.Open(newBytesRSC(csvData)); err != nil {
b.Fatal(err)
}