mirror of
https://github.com/minio/minio.git
synced 2025-11-21 18:26:04 -05:00
select: Support Parquet dates (#11928)
Pass schema to parser to support dates. Fixes #11926
This commit is contained in:
@@ -1069,7 +1069,7 @@ func TestParquetInput(t *testing.T) {
|
||||
for i, testCase := range testTable {
|
||||
t.Run(fmt.Sprint(i), func(t *testing.T) {
|
||||
getReader := func(offset int64, length int64) (io.ReadCloser, error) {
|
||||
testdataFile := "testdata.parquet"
|
||||
testdataFile := "testdata/testdata.parquet"
|
||||
file, err := os.Open(testdataFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -1126,3 +1126,243 @@ func TestParquetInput(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParquetInputSchema(t *testing.T) {
|
||||
os.Setenv("MINIO_API_SELECT_PARQUET", "on")
|
||||
defer os.Setenv("MINIO_API_SELECT_PARQUET", "off")
|
||||
|
||||
var testTable = []struct {
|
||||
requestXML []byte
|
||||
wantResult string
|
||||
}{
|
||||
{
|
||||
requestXML: []byte(`
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<SelectObjectContentRequest>
|
||||
<Expression>SELECT * FROM S3Object LIMIT 5</Expression>
|
||||
<ExpressionType>SQL</ExpressionType>
|
||||
<InputSerialization>
|
||||
<CompressionType>NONE</CompressionType>
|
||||
<Parquet>
|
||||
</Parquet>
|
||||
</InputSerialization>
|
||||
<OutputSerialization>
|
||||
<JSON>
|
||||
</JSON>
|
||||
</OutputSerialization>
|
||||
<RequestProgress>
|
||||
<Enabled>FALSE</Enabled>
|
||||
</RequestProgress>
|
||||
</SelectObjectContentRequest>
|
||||
`), wantResult: `{"shipdate":"1996-03-13T"}
|
||||
{"shipdate":"1996-04-12T"}
|
||||
{"shipdate":"1996-01-29T"}
|
||||
{"shipdate":"1996-04-21T"}
|
||||
{"shipdate":"1996-03-30T"}`,
|
||||
},
|
||||
{
|
||||
requestXML: []byte(`
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<SelectObjectContentRequest>
|
||||
<Expression>SELECT DATE_ADD(day, 2, shipdate) as shipdate FROM S3Object LIMIT 5</Expression>
|
||||
<ExpressionType>SQL</ExpressionType>
|
||||
<InputSerialization>
|
||||
<CompressionType>NONE</CompressionType>
|
||||
<Parquet>
|
||||
</Parquet>
|
||||
</InputSerialization>
|
||||
<OutputSerialization>
|
||||
<JSON>
|
||||
</JSON>
|
||||
</OutputSerialization>
|
||||
<RequestProgress>
|
||||
<Enabled>FALSE</Enabled>
|
||||
</RequestProgress>
|
||||
</SelectObjectContentRequest>
|
||||
`), wantResult: `{"shipdate":"1996-03-15T"}
|
||||
{"shipdate":"1996-04-14T"}
|
||||
{"shipdate":"1996-01-31T"}
|
||||
{"shipdate":"1996-04-23T"}
|
||||
{"shipdate":"1996-04T"}`,
|
||||
},
|
||||
}
|
||||
|
||||
for i, testCase := range testTable {
|
||||
t.Run(fmt.Sprint(i), func(t *testing.T) {
|
||||
getReader := func(offset int64, length int64) (io.ReadCloser, error) {
|
||||
testdataFile := "testdata/lineitem_shipdate.parquet"
|
||||
file, err := os.Open(testdataFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fi, err := file.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if offset < 0 {
|
||||
offset = fi.Size() + offset
|
||||
}
|
||||
|
||||
if _, err = file.Seek(offset, io.SeekStart); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return file, nil
|
||||
}
|
||||
|
||||
s3Select, err := NewS3Select(bytes.NewReader(testCase.requestXML))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = s3Select.Open(getReader); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
w := &testResponseWriter{}
|
||||
s3Select.Evaluate(w)
|
||||
s3Select.Close()
|
||||
resp := http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Body: ioutil.NopCloser(bytes.NewReader(w.response)),
|
||||
ContentLength: int64(len(w.response)),
|
||||
}
|
||||
res, err := minio.NewSelectResults(&resp, "testbucket")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
got, err := ioutil.ReadAll(res)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
gotS := strings.TrimSpace(string(got))
|
||||
if !reflect.DeepEqual(gotS, testCase.wantResult) {
|
||||
t.Errorf("received response does not match with expected reply. Query: %s\ngot: %s\nwant:%s", testCase.requestXML, gotS, testCase.wantResult)
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParquetInputSchemaCSV(t *testing.T) {
|
||||
os.Setenv("MINIO_API_SELECT_PARQUET", "on")
|
||||
defer os.Setenv("MINIO_API_SELECT_PARQUET", "off")
|
||||
|
||||
var testTable = []struct {
|
||||
requestXML []byte
|
||||
wantResult string
|
||||
}{
|
||||
{
|
||||
requestXML: []byte(`
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<SelectObjectContentRequest>
|
||||
<Expression>SELECT * FROM S3Object LIMIT 5</Expression>
|
||||
<ExpressionType>SQL</ExpressionType>
|
||||
<InputSerialization>
|
||||
<CompressionType>NONE</CompressionType>
|
||||
<Parquet>
|
||||
</Parquet>
|
||||
</InputSerialization>
|
||||
<OutputSerialization>
|
||||
<CSV/>
|
||||
</OutputSerialization>
|
||||
<RequestProgress>
|
||||
<Enabled>FALSE</Enabled>
|
||||
</RequestProgress>
|
||||
</SelectObjectContentRequest>
|
||||
`), wantResult: `1996-03-13T
|
||||
1996-04-12T
|
||||
1996-01-29T
|
||||
1996-04-21T
|
||||
1996-03-30T`,
|
||||
},
|
||||
{
|
||||
requestXML: []byte(`
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<SelectObjectContentRequest>
|
||||
<Expression>SELECT DATE_ADD(day, 2, shipdate) as shipdate FROM S3Object LIMIT 5</Expression>
|
||||
<ExpressionType>SQL</ExpressionType>
|
||||
<InputSerialization>
|
||||
<CompressionType>NONE</CompressionType>
|
||||
<Parquet>
|
||||
</Parquet>
|
||||
</InputSerialization>
|
||||
<OutputSerialization>
|
||||
<CSV/>
|
||||
</OutputSerialization>
|
||||
<RequestProgress>
|
||||
<Enabled>FALSE</Enabled>
|
||||
</RequestProgress>
|
||||
</SelectObjectContentRequest>
|
||||
`), wantResult: `1996-03-15T
|
||||
1996-04-14T
|
||||
1996-01-31T
|
||||
1996-04-23T
|
||||
1996-04T`,
|
||||
},
|
||||
}
|
||||
|
||||
for i, testCase := range testTable {
|
||||
t.Run(fmt.Sprint(i), func(t *testing.T) {
|
||||
getReader := func(offset int64, length int64) (io.ReadCloser, error) {
|
||||
testdataFile := "testdata/lineitem_shipdate.parquet"
|
||||
file, err := os.Open(testdataFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fi, err := file.Stat()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if offset < 0 {
|
||||
offset = fi.Size() + offset
|
||||
}
|
||||
|
||||
if _, err = file.Seek(offset, io.SeekStart); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return file, nil
|
||||
}
|
||||
|
||||
s3Select, err := NewS3Select(bytes.NewReader(testCase.requestXML))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = s3Select.Open(getReader); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
w := &testResponseWriter{}
|
||||
s3Select.Evaluate(w)
|
||||
s3Select.Close()
|
||||
resp := http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Body: ioutil.NopCloser(bytes.NewReader(w.response)),
|
||||
ContentLength: int64(len(w.response)),
|
||||
}
|
||||
res, err := minio.NewSelectResults(&resp, "testbucket")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
got, err := ioutil.ReadAll(res)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
gotS := strings.TrimSpace(string(got))
|
||||
if !reflect.DeepEqual(gotS, testCase.wantResult) {
|
||||
t.Errorf("received response does not match with expected reply. Query: %s\ngot: %s\nwant:%s", testCase.requestXML, gotS, testCase.wantResult)
|
||||
}
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user