mirror of
https://github.com/minio/minio.git
synced 2025-11-06 20:33:07 -05:00
SIMDJSON S3 select input (#8401)
This commit is contained in:
@@ -21,13 +21,16 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"net/http"
|
||||
"os"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/klauspost/cpuid"
|
||||
"github.com/minio/minio-go/v6"
|
||||
"github.com/minio/simdjson-go"
|
||||
)
|
||||
|
||||
type testResponseWriter struct {
|
||||
@@ -56,6 +59,7 @@ func TestJSONQueries(t *testing.T) {
|
||||
{"id": 1,"title": "Second Record","desc": "another text","synonyms": ["some", "synonym", "value"]}
|
||||
{"id": 2,"title": "Second Record","desc": "another text","numbers": [2, 3.0, 4]}
|
||||
{"id": 3,"title": "Second Record","desc": "another text","nested": [[2, 3.0, 4], [7, 8.5, 9]]}`
|
||||
|
||||
var testTable = []struct {
|
||||
name string
|
||||
query string
|
||||
@@ -229,6 +233,11 @@ func TestJSONQueries(t *testing.T) {
|
||||
{"id":3, "value": "true"}
|
||||
`,
|
||||
},
|
||||
{
|
||||
name: "index-wildcard-in",
|
||||
query: `SELECT * from s3object s WHERE title = 'Test Record'`,
|
||||
wantResult: `{"id":0,"title":"Test Record","desc":"Some text","synonyms":["foo","bar","whatever"]}`,
|
||||
},
|
||||
{
|
||||
name: "select-output-field-as-csv",
|
||||
requestXML: []byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
@@ -260,7 +269,7 @@ func TestJSONQueries(t *testing.T) {
|
||||
<InputSerialization>
|
||||
<CompressionType>NONE</CompressionType>
|
||||
<JSON>
|
||||
<Type>DOCUMENT</Type>
|
||||
<Type>LINES</Type>
|
||||
</JSON>
|
||||
</InputSerialization>
|
||||
<OutputSerialization>
|
||||
@@ -274,6 +283,59 @@ func TestJSONQueries(t *testing.T) {
|
||||
|
||||
for _, testCase := range testTable {
|
||||
t.Run(testCase.name, func(t *testing.T) {
|
||||
// Hack cpuid to the CPU doesn't appear to support AVX2.
|
||||
// Restore whatever happens.
|
||||
defer func(f cpuid.Flags) {
|
||||
cpuid.CPU.Features = f
|
||||
}(cpuid.CPU.Features)
|
||||
cpuid.CPU.Features &= math.MaxUint64 - cpuid.AVX2
|
||||
|
||||
testReq := testCase.requestXML
|
||||
if len(testReq) == 0 {
|
||||
testReq = []byte(fmt.Sprintf(defRequest, testCase.query))
|
||||
}
|
||||
s3Select, err := NewS3Select(bytes.NewReader(testReq))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) {
|
||||
in := input
|
||||
if len(testCase.withJSON) > 0 {
|
||||
in = testCase.withJSON
|
||||
}
|
||||
return ioutil.NopCloser(bytes.NewBufferString(in)), nil
|
||||
}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
w := &testResponseWriter{}
|
||||
s3Select.Evaluate(w)
|
||||
s3Select.Close()
|
||||
resp := http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Body: ioutil.NopCloser(bytes.NewReader(w.response)),
|
||||
ContentLength: int64(len(w.response)),
|
||||
}
|
||||
res, err := minio.NewSelectResults(&resp, "testbucket")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
got, err := ioutil.ReadAll(res)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
gotS := strings.TrimSpace(string(got))
|
||||
if !reflect.DeepEqual(gotS, testCase.wantResult) {
|
||||
t.Errorf("received response does not match with expected reply. Query: %s\ngot: %s\nwant:%s", testCase.query, gotS, testCase.wantResult)
|
||||
}
|
||||
})
|
||||
t.Run("simd-"+testCase.name, func(t *testing.T) {
|
||||
if !simdjson.SupportedCPU() {
|
||||
t.Skip("No CPU support")
|
||||
}
|
||||
testReq := testCase.requestXML
|
||||
if len(testReq) == 0 {
|
||||
testReq = []byte(fmt.Sprintf(defRequest, testCase.query))
|
||||
@@ -320,6 +382,89 @@ func TestJSONQueries(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestCSVQueries(t *testing.T) {
|
||||
input := `index,ID,CaseNumber,Date,Day,Month,Year,Block,IUCR,PrimaryType,Description,LocationDescription,Arrest,Domestic,Beat,District,Ward,CommunityArea,FBI Code,XCoordinate,YCoordinate,UpdatedOn,Latitude,Longitude,Location
|
||||
2700763,7732229,,2010-05-26 00:00:00,26,May,2010,113XX S HALSTED ST,1150,,CREDIT CARD FRAUD,,False,False,2233,22.0,34.0,,11,,,,41.688043288,-87.6422444,"(41.688043288, -87.6422444)"`
|
||||
|
||||
var testTable = []struct {
|
||||
name string
|
||||
query string
|
||||
requestXML []byte
|
||||
wantResult string
|
||||
}{
|
||||
{
|
||||
name: "select-in-text-simple",
|
||||
query: `SELECT index FROM s3Object s WHERE "Month"='May'`,
|
||||
wantResult: `2700763`,
|
||||
},
|
||||
}
|
||||
|
||||
defRequest := `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<SelectObjectContentRequest>
|
||||
<Expression>%s</Expression>
|
||||
<ExpressionType>SQL</ExpressionType>
|
||||
<InputSerialization>
|
||||
<CompressionType>NONE</CompressionType>
|
||||
<CSV>
|
||||
<FieldDelimiter>,</FieldDelimiter>
|
||||
<FileHeaderInfo>USE</FileHeaderInfo>
|
||||
<QuoteCharacter>"</QuoteCharacter>
|
||||
<QuoteEscapeCharacter>"</QuoteEscapeCharacter>
|
||||
<RecordDelimiter>\n</RecordDelimiter>
|
||||
</CSV>
|
||||
</InputSerialization>
|
||||
<OutputSerialization>
|
||||
<CSV>
|
||||
</CSV>
|
||||
</OutputSerialization>
|
||||
<RequestProgress>
|
||||
<Enabled>FALSE</Enabled>
|
||||
</RequestProgress>
|
||||
</SelectObjectContentRequest>`
|
||||
|
||||
for _, testCase := range testTable {
|
||||
t.Run(testCase.name, func(t *testing.T) {
|
||||
testReq := testCase.requestXML
|
||||
if len(testReq) == 0 {
|
||||
testReq = []byte(fmt.Sprintf(defRequest, testCase.query))
|
||||
}
|
||||
s3Select, err := NewS3Select(bytes.NewReader(testReq))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) {
|
||||
return ioutil.NopCloser(bytes.NewBufferString(input)), nil
|
||||
}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
w := &testResponseWriter{}
|
||||
s3Select.Evaluate(w)
|
||||
s3Select.Close()
|
||||
resp := http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Body: ioutil.NopCloser(bytes.NewReader(w.response)),
|
||||
ContentLength: int64(len(w.response)),
|
||||
}
|
||||
res, err := minio.NewSelectResults(&resp, "testbucket")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
got, err := ioutil.ReadAll(res)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
gotS := strings.TrimSpace(string(got))
|
||||
if !reflect.DeepEqual(gotS, testCase.wantResult) {
|
||||
t.Errorf("received response does not match with expected reply. Query: %s\ngot: %s\nwant:%s", testCase.query, gotS, testCase.wantResult)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCSVQueries2(t *testing.T) {
|
||||
input := `id,time,num,num2,text
|
||||
1,2010-01-01T,7867786,4565.908123,"a text, with comma"
|
||||
2,2017-01-02T03:04Z,-5, 0.765111,
|
||||
@@ -559,7 +704,23 @@ func TestCSVInput(t *testing.T) {
|
||||
s3Select.Close()
|
||||
|
||||
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
||||
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v", w.response, testCase.expectedResult)
|
||||
resp := http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Body: ioutil.NopCloser(bytes.NewReader(w.response)),
|
||||
ContentLength: int64(len(w.response)),
|
||||
}
|
||||
res, err := minio.NewSelectResults(&resp, "testbucket")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
got, err := ioutil.ReadAll(res)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v\ndecoded:%s", w.response, testCase.expectedResult, string(got))
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -667,7 +828,23 @@ func TestJSONInput(t *testing.T) {
|
||||
s3Select.Close()
|
||||
|
||||
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
||||
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v", w.response, testCase.expectedResult)
|
||||
resp := http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Body: ioutil.NopCloser(bytes.NewReader(w.response)),
|
||||
ContentLength: int64(len(w.response)),
|
||||
}
|
||||
res, err := minio.NewSelectResults(&resp, "testbucket")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
got, err := ioutil.ReadAll(res)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v\ndecoded:%s", w.response, testCase.expectedResult, string(got))
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -766,7 +943,23 @@ func TestParquetInput(t *testing.T) {
|
||||
s3Select.Close()
|
||||
|
||||
if !reflect.DeepEqual(w.response, testCase.expectedResult) {
|
||||
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v", w.response, testCase.expectedResult)
|
||||
resp := http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Body: ioutil.NopCloser(bytes.NewReader(w.response)),
|
||||
ContentLength: int64(len(w.response)),
|
||||
}
|
||||
res, err := minio.NewSelectResults(&resp, "testbucket")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
got, err := ioutil.ReadAll(res)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
t.Errorf("received response does not match with expected reply\ngot: %#v\nwant:%#v\ndecoded:%s", w.response, testCase.expectedResult, string(got))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user