mirror of
https://github.com/minio/minio.git
synced 2025-11-11 14:30:17 -05:00
Add archived parquet as int. package (#9912)
Since github.com/minio/parquet-go is archived add it as internal package.
This commit is contained in:
618
pkg/s3select/internal/parquet-go/data/column-grouplist_test.go
Normal file
618
pkg/s3select/internal/parquet-go/data/column-grouplist_test.go
Normal file
@@ -0,0 +1,618 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package data
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema"
|
||||
)
|
||||
|
||||
func TestPopulateGroupList(t *testing.T) {
|
||||
requiredList1 := schema.NewTree()
|
||||
{
|
||||
requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredList1.Set("group", requiredGroup); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList1.Set("group.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList1.Set("group.list.element", requiredElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList1.Set("group.list.element.col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err := requiredList1.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
requiredList2 := schema.NewTree()
|
||||
{
|
||||
requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredList2.Set("group", requiredGroup); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList2.Set("group.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList2.Set("group.list.element", requiredElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList2.Set("group.list.element.col", optionalCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err := requiredList2.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
requiredList3 := schema.NewTree()
|
||||
{
|
||||
requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredList3.Set("group", requiredGroup); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList3.Set("group.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList3.Set("group.list.element", optionalElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList3.Set("group.list.element.col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err := requiredList3.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
requiredList4 := schema.NewTree()
|
||||
{
|
||||
requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredList4.Set("group", requiredGroup); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList4.Set("group.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList4.Set("group.list.element", optionalElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList4.Set("group.list.element.col", optionalCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err := requiredList4.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalList1 := schema.NewTree()
|
||||
{
|
||||
optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalList1.Set("group", optionalGroup); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList1.Set("group.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList1.Set("group.list.element", requiredElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList1.Set("group.list.element.col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err := optionalList1.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalList2 := schema.NewTree()
|
||||
{
|
||||
optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalList2.Set("group", optionalGroup); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList2.Set("group.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList2.Set("group.list.element", requiredElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList2.Set("group.list.element.col", optionalCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err := optionalList2.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalList3 := schema.NewTree()
|
||||
{
|
||||
optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalList3.Set("group", optionalGroup); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList3.Set("group.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList3.Set("group.list.element", optionalElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList3.Set("group.list.element.col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err := optionalList3.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalList4 := schema.NewTree()
|
||||
{
|
||||
optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalList4.Set("group", optionalGroup); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList4.Set("group.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList4.Set("group.list.element", optionalElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList4.Set("group.list.element.col", optionalCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err := optionalList4.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
result1 := map[string]*Column{
|
||||
"group.list.element.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{1},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result2 := map[string]*Column{
|
||||
"group.list.element.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10, v20},
|
||||
definitionLevels: []int64{1, 1},
|
||||
repetitionLevels: []int64{0, 1},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 5,
|
||||
minValue: v10,
|
||||
maxValue: v20,
|
||||
},
|
||||
}
|
||||
|
||||
result3 := map[string]*Column{
|
||||
"group.list.element.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{1},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result4 := map[string]*Column{
|
||||
"group.list.element.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result5 := map[string]*Column{
|
||||
"group.list.element.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10, v20},
|
||||
definitionLevels: []int64{2, 2},
|
||||
repetitionLevels: []int64{0, 1},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 5,
|
||||
minValue: v10,
|
||||
maxValue: v20,
|
||||
},
|
||||
}
|
||||
|
||||
result6 := map[string]*Column{
|
||||
"group.list.element.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result7 := map[string]*Column{
|
||||
"group.list.element.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{3},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result8 := map[string]*Column{
|
||||
"group.list.element.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10, v20},
|
||||
definitionLevels: []int64{3, 3},
|
||||
repetitionLevels: []int64{0, 1},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 5,
|
||||
minValue: v10,
|
||||
maxValue: v20,
|
||||
},
|
||||
}
|
||||
|
||||
result9 := map[string]*Column{
|
||||
"group.list.element.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result10 := map[string]*Column{
|
||||
"group.list.element.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{3},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result11 := map[string]*Column{
|
||||
"group.list.element.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{4},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result12 := map[string]*Column{
|
||||
"group.list.element.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10, v20},
|
||||
definitionLevels: []int64{4, 4},
|
||||
repetitionLevels: []int64{0, 1},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 5,
|
||||
minValue: v10,
|
||||
maxValue: v20,
|
||||
},
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
schemaTree *schema.Tree
|
||||
data string
|
||||
expectedResult map[string]*Column
|
||||
expectErr bool
|
||||
}{
|
||||
{requiredList1, `{}`, nil, true}, // err: group: nil value for required field
|
||||
{requiredList1, `{"group": null}`, nil, true}, // err: group: nil value for required field
|
||||
{requiredList1, `{"group": [{"col": null}]}`, nil, true}, // err: group.list.element.col: nil value for required field
|
||||
{requiredList1, `{"group": [{"col": 10}]}`, result1, false},
|
||||
{requiredList1, `{"group": [{"col": 10}, {"col": 20}]}`, result2, false},
|
||||
{requiredList2, `{}`, nil, true}, // err: group: nil value for required field
|
||||
{requiredList2, `{"group": null}`, nil, true}, // err: group: nil value for required field
|
||||
{requiredList2, `{"group": [{"col": null}]}`, result3, false},
|
||||
{requiredList2, `{"group": [{"col": 10}]}`, result4, false},
|
||||
{requiredList2, `{"group": [{"col": 10}, {"col": 20}]}`, result5, false},
|
||||
{requiredList3, `{}`, nil, true}, // err: group: nil value for required field
|
||||
{requiredList3, `{"group": null}`, nil, true}, // err: group: nil value for required field
|
||||
{requiredList3, `{"group": [{"col": null}]}`, nil, true}, // err: group.list.element.col: nil value for required field
|
||||
{requiredList3, `{"group": [{"col": 10}]}`, result4, false},
|
||||
{requiredList3, `{"group": [{"col": 10}, {"col": 20}]}`, result5, false},
|
||||
{requiredList4, `{}`, nil, true}, // err: group: nil value for required field
|
||||
{requiredList4, `{"group": null}`, nil, true}, // err: group: nil value for required field
|
||||
{requiredList4, `{"group": [{"col": null}]}`, result6, false},
|
||||
{requiredList4, `{"group": [{"col": 10}]}`, result7, false},
|
||||
{requiredList4, `{"group": [{"col": 10}, {"col": 20}]}`, result8, false},
|
||||
{optionalList1, `{}`, result9, false},
|
||||
{optionalList1, `{"group": null}`, result9, false},
|
||||
{optionalList1, `{"group": [{"col": null}]}`, nil, true}, // err: group.list.element.col: nil value for required field
|
||||
{optionalList1, `{"group": [{"col": 10}]}`, result4, false},
|
||||
{optionalList1, `{"group": [{"col": 10}, {"col": 20}]}`, result5, false},
|
||||
{optionalList2, `{}`, result9, false},
|
||||
{optionalList2, `{"group": null}`, result9, false},
|
||||
{optionalList2, `{"group": [{"col": null}]}`, result6, false},
|
||||
{optionalList2, `{"group": [{"col": 10}]}`, result7, false},
|
||||
{optionalList2, `{"group": [{"col": 10}, {"col": 20}]}`, result8, false},
|
||||
{optionalList3, `{}`, result9, false},
|
||||
{optionalList3, `{"group": null}`, result9, false},
|
||||
{optionalList3, `{"group": [{"col": null}]}`, nil, true}, // err: group.list.element.col: nil value for required field
|
||||
{optionalList3, `{"group": [{"col": 10}]}`, result7, false},
|
||||
{optionalList3, `{"group": [{"col": 10}, {"col": 20}]}`, result8, false},
|
||||
{optionalList4, `{}`, result9, false},
|
||||
{optionalList4, `{"group": null}`, result9, false},
|
||||
{optionalList4, `{"group": [{"col": null}]}`, result10, false},
|
||||
{optionalList4, `{"group": [{"col": 10}]}`, result11, false},
|
||||
{optionalList4, `{"group": [{"col": 10}, {"col": 20}]}`, result12, false},
|
||||
}
|
||||
|
||||
for i, testCase := range testCases {
|
||||
result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree)
|
||||
expectErr := (err != nil)
|
||||
|
||||
if testCase.expectErr != expectErr {
|
||||
t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr)
|
||||
}
|
||||
|
||||
if !testCase.expectErr {
|
||||
if !reflect.DeepEqual(result, testCase.expectedResult) {
|
||||
t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
237
pkg/s3select/internal/parquet-go/data/column-grouptype_test.go
Normal file
237
pkg/s3select/internal/parquet-go/data/column-grouptype_test.go
Normal file
@@ -0,0 +1,237 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package data
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema"
|
||||
)
|
||||
|
||||
func TestPopulateGroupType(t *testing.T) {
|
||||
requiredGroup1 := schema.NewTree()
|
||||
{
|
||||
requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredGroup1.Set("group", requiredGroup); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredGroup1.Set("group.col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err := requiredGroup1.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
requiredGroup2 := schema.NewTree()
|
||||
{
|
||||
requiredGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredGroup2.Set("group", requiredGroup); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredGroup2.Set("group.col", optionalCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err := requiredGroup2.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalGroup1 := schema.NewTree()
|
||||
{
|
||||
optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalGroup1.Set("group", optionalGroup); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalGroup1.Set("group.col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err := optionalGroup1.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalGroup2 := schema.NewTree()
|
||||
{
|
||||
optionalGroup, err := schema.NewElement("group", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalGroup2.Set("group", optionalGroup); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalGroup2.Set("group.col", optionalCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err := optionalGroup2.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
result1 := map[string]*Column{
|
||||
"group.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result2 := map[string]*Column{
|
||||
"group.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result3 := map[string]*Column{
|
||||
"group.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{1},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result4 := map[string]*Column{
|
||||
"group.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{1},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result5 := map[string]*Column{
|
||||
"group.col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
schemaTree *schema.Tree
|
||||
data string
|
||||
expectedResult map[string]*Column
|
||||
expectErr bool
|
||||
}{
|
||||
{requiredGroup1, `{}`, nil, true}, // err: group: nil value for required field
|
||||
{requiredGroup1, `{"group": null}`, nil, true}, // err: group: nil value for required field
|
||||
{requiredGroup1, `{"group": {"col": null}}`, nil, true}, // err: group.col: nil value for required field
|
||||
{requiredGroup1, `{"group": {"col": 10}}`, result1, false},
|
||||
{requiredGroup2, `{}`, nil, true}, // err: group: nil value for required field
|
||||
{requiredGroup2, `{"group": null}`, nil, true}, // err: group: nil value for required field
|
||||
{requiredGroup2, `{"group": {"col": null}}`, result2, false},
|
||||
{requiredGroup2, `{"group": {"col": 10}}`, result3, false},
|
||||
{optionalGroup1, `{}`, result2, false},
|
||||
{optionalGroup1, `{"group": null}`, result2, false},
|
||||
{optionalGroup1, `{"group": {"col": null}}`, nil, true}, // err: group.col: nil value for required field
|
||||
{optionalGroup1, `{"group": {"col": 10}}`, result3, false},
|
||||
{optionalGroup2, `{}`, result2, false},
|
||||
{optionalGroup2, `{"group": null}`, result2, false},
|
||||
{optionalGroup2, `{"group": {"col": null}}`, result4, false},
|
||||
{optionalGroup2, `{"group": {"col": 10}}`, result5, false},
|
||||
}
|
||||
|
||||
for i, testCase := range testCases {
|
||||
result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree)
|
||||
expectErr := (err != nil)
|
||||
|
||||
if testCase.expectErr != expectErr {
|
||||
t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr)
|
||||
}
|
||||
|
||||
if !testCase.expectErr {
|
||||
if !reflect.DeepEqual(result, testCase.expectedResult) {
|
||||
t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
698
pkg/s3select/internal/parquet-go/data/column-listoflist_test.go
Normal file
698
pkg/s3select/internal/parquet-go/data/column-listoflist_test.go
Normal file
@@ -0,0 +1,698 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package data
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema"
|
||||
)
|
||||
|
||||
func TestPopulateListOfList(t *testing.T) {
|
||||
requiredList1 := schema.NewTree()
|
||||
{
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredList1.Set("col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList1.Set("col.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList1.Set("col.list.element", requiredElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList1.Set("col.list.element.list", subList); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList1.Set("col.list.element.list.element", requiredSubElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = requiredList1.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
requiredList2 := schema.NewTree()
|
||||
{
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredList2.Set("col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList2.Set("col.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList2.Set("col.list.element", requiredElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList2.Set("col.list.element.list", subList); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList2.Set("col.list.element.list.element", optionalSubElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = requiredList2.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
requiredList3 := schema.NewTree()
|
||||
{
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optioonalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredList3.Set("col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList3.Set("col.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList3.Set("col.list.element", optioonalElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList3.Set("col.list.element.list", subList); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList3.Set("col.list.element.list.element", requiredSubElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = requiredList3.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
requiredList4 := schema.NewTree()
|
||||
{
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optioonalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredList4.Set("col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList4.Set("col.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList4.Set("col.list.element", optioonalElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList4.Set("col.list.element.list", subList); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList4.Set("col.list.element.list.element", optionalSubElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = requiredList4.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalList1 := schema.NewTree()
|
||||
{
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalList1.Set("col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList1.Set("col.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList1.Set("col.list.element", requiredElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList1.Set("col.list.element.list", subList); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList1.Set("col.list.element.list.element", requiredSubElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = optionalList1.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalList2 := schema.NewTree()
|
||||
{
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalList2.Set("col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList2.Set("col.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList2.Set("col.list.element", requiredElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList2.Set("col.list.element.list", subList); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList2.Set("col.list.element.list.element", optionalSubElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = optionalList2.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalList3 := schema.NewTree()
|
||||
{
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optioonalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalList3.Set("col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList3.Set("col.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList3.Set("col.list.element", optioonalElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList3.Set("col.list.element.list", subList); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList3.Set("col.list.element.list.element", requiredSubElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = optionalList3.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalList4 := schema.NewTree()
|
||||
{
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optioonalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
subList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalSubElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalList4.Set("col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList4.Set("col.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList4.Set("col.list.element", optioonalElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList4.Set("col.list.element.list", subList); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList4.Set("col.list.element.list.element", optionalSubElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = optionalList4.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
result1 := map[string]*Column{
|
||||
"col.list.element.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result2 := map[string]*Column{
|
||||
"col.list.element.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10, v20, v30, v10, v20, v10, v30},
|
||||
definitionLevels: []int64{2, 2, 2, 2, 2, 2, 2},
|
||||
repetitionLevels: []int64{0, 2, 1, 2, 1, 2, 2},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 5,
|
||||
minValue: v10,
|
||||
maxValue: v30,
|
||||
},
|
||||
}
|
||||
|
||||
result3 := map[string]*Column{
|
||||
"col.list.element.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result4 := map[string]*Column{
|
||||
"col.list.element.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{3},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result5 := map[string]*Column{
|
||||
"col.list.element.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10, v20, v30, v10, v20, v10, v30},
|
||||
definitionLevels: []int64{3, 3, 3, 3, 3, 3, 3},
|
||||
repetitionLevels: []int64{0, 2, 1, 2, 1, 2, 2},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 5,
|
||||
minValue: v10,
|
||||
maxValue: v30,
|
||||
},
|
||||
}
|
||||
|
||||
result6 := map[string]*Column{
|
||||
"col.list.element.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{3},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result7 := map[string]*Column{
|
||||
"col.list.element.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{4},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result8 := map[string]*Column{
|
||||
"col.list.element.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10, v20, v30, v10, v20, v10, v30},
|
||||
definitionLevels: []int64{4, 4, 4, 4, 4, 4, 4},
|
||||
repetitionLevels: []int64{0, 2, 1, 2, 1, 2, 2},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 5,
|
||||
minValue: v10,
|
||||
maxValue: v30,
|
||||
},
|
||||
}
|
||||
|
||||
result9 := map[string]*Column{
|
||||
"col.list.element.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result10 := map[string]*Column{
|
||||
"col.list.element.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{4},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result11 := map[string]*Column{
|
||||
"col.list.element.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{5},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result12 := map[string]*Column{
|
||||
"col.list.element.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10, v20, v30, v10, v20, v10, v30},
|
||||
definitionLevels: []int64{5, 5, 5, 5, 5, 5, 5},
|
||||
repetitionLevels: []int64{0, 2, 1, 2, 1, 2, 2},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 5,
|
||||
minValue: v10,
|
||||
maxValue: v30,
|
||||
},
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
schemaTree *schema.Tree
|
||||
data string
|
||||
expectedResult map[string]*Column
|
||||
expectErr bool
|
||||
}{
|
||||
{requiredList1, `{}`, nil, true}, // err: col: nil value for required field
|
||||
{requiredList1, `{"col": null}`, nil, true}, // err: col: nil value for required field
|
||||
{requiredList1, `{"col": [[null]]}`, nil, true}, // err: col.list.element.list.element: nil value for required field
|
||||
{requiredList1, `{"col": [[10]]}`, result1, false},
|
||||
{requiredList1, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result2, false},
|
||||
{requiredList2, `{}`, nil, true}, // err: col: nil value for required field
|
||||
{requiredList2, `{"col": null}`, nil, true}, // err: col: nil value for required field
|
||||
{requiredList2, `{"col": [[null]]}`, result3, false},
|
||||
{requiredList2, `{"col": [[10]]}`, result4, false},
|
||||
{requiredList2, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result5, false},
|
||||
{requiredList3, `{}`, nil, true}, // err: col: nil value for required field
|
||||
{requiredList3, `{"col": null}`, nil, true}, // err: col: nil value for required field
|
||||
{requiredList3, `{"col": [[null]]}`, nil, true}, // err: col.list.element.list.element: nil value for required field
|
||||
{requiredList3, `{"col": [[10]]}`, result4, false},
|
||||
{requiredList3, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result5, false},
|
||||
{requiredList4, `{}`, nil, true}, // err: col: nil value for required field
|
||||
{requiredList4, `{"col": null}`, nil, true}, // err: col: nil value for required field
|
||||
{requiredList4, `{"col": [[null]]}`, result6, false},
|
||||
{requiredList4, `{"col": [[10]]}`, result7, false},
|
||||
{requiredList4, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result8, false},
|
||||
{optionalList1, `{}`, result9, false},
|
||||
{optionalList1, `{"col": null}`, result9, false},
|
||||
{optionalList1, `{"col": [[null]]}`, nil, true}, // err: col.list.element.list.element: nil value for required field
|
||||
{optionalList1, `{"col": [[10]]}`, result4, false},
|
||||
{optionalList1, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result5, false},
|
||||
{optionalList2, `{}`, result9, false},
|
||||
{optionalList2, `{"col": null}`, result9, false},
|
||||
{optionalList2, `{"col": [[null]]}`, result6, false},
|
||||
{optionalList2, `{"col": [[10]]}`, result7, false},
|
||||
{optionalList2, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result8, false},
|
||||
{optionalList3, `{}`, result9, false},
|
||||
{optionalList3, `{"col": null}`, result9, false},
|
||||
{optionalList3, `{"col": [[null]]}`, nil, true}, // err: col.list.element.list.element: nil value for required field
|
||||
{optionalList3, `{"col": [[10]]}`, result7, false},
|
||||
{optionalList3, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result8, false},
|
||||
{optionalList4, `{}`, result9, false},
|
||||
{optionalList4, `{"col": null}`, result9, false},
|
||||
{optionalList4, `{"col": [[null]]}`, result10, false},
|
||||
{optionalList4, `{"col": [[10]]}`, result11, false},
|
||||
{optionalList4, `{"col": [[10, 20], [30, 10], [20, 10, 30]]}`, result12, false},
|
||||
}
|
||||
|
||||
for i, testCase := range testCases {
|
||||
result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree)
|
||||
expectErr := (err != nil)
|
||||
|
||||
if testCase.expectErr != expectErr {
|
||||
t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr)
|
||||
}
|
||||
|
||||
if !testCase.expectErr {
|
||||
if !reflect.DeepEqual(result, testCase.expectedResult) {
|
||||
t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
370
pkg/s3select/internal/parquet-go/data/column-map_test.go
Normal file
370
pkg/s3select/internal/parquet-go/data/column-map_test.go
Normal file
@@ -0,0 +1,370 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package data
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema"
|
||||
)
|
||||
|
||||
func TestPopulateMap(t *testing.T) {
|
||||
t.Skip("Broken")
|
||||
requiredMap1 := schema.NewTree()
|
||||
{
|
||||
mapElement, err := schema.NewElement("map", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
keyValue, err := schema.NewElement("key_value", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredKey, err := schema.NewElement("key", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredValue, err := schema.NewElement("value", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredMap1.Set("map", mapElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredMap1.Set("map.key_value", keyValue); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredMap1.Set("map.key_value.key", requiredKey); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredMap1.Set("map.key_value.value", requiredValue); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = requiredMap1.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
requiredMap2 := schema.NewTree()
|
||||
{
|
||||
mapElement, err := schema.NewElement("map", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
keyValue, err := schema.NewElement("key_value", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredKey, err := schema.NewElement("key", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalValue, err := schema.NewElement("value", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredMap2.Set("map", mapElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredMap2.Set("map.key_value", keyValue); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredMap2.Set("map.key_value.key", requiredKey); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredMap2.Set("map.key_value.value", optionalValue); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = requiredMap2.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalMap1 := schema.NewTree()
|
||||
{
|
||||
mapElement, err := schema.NewElement("map", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
keyValue, err := schema.NewElement("key_value", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredKey, err := schema.NewElement("key", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredValue, err := schema.NewElement("value", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalMap1.Set("map", mapElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalMap1.Set("map.key_value", keyValue); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalMap1.Set("map.key_value.key", requiredKey); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalMap1.Set("map.key_value.value", requiredValue); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = optionalMap1.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalMap2 := schema.NewTree()
|
||||
{
|
||||
mapElement, err := schema.NewElement("map", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_MAP),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
keyValue, err := schema.NewElement("key_value", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredKey, err := schema.NewElement("key", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalValue, err := schema.NewElement("value", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalMap2.Set("map", mapElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalMap2.Set("map.key_value", keyValue); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalMap2.Set("map.key_value.key", requiredKey); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalMap2.Set("map.key_value.value", optionalValue); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = optionalMap2.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
result1 := map[string]*Column{
|
||||
"map.key_value.key": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{ten},
|
||||
definitionLevels: []int64{1},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"map.key_value.value": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{1},
|
||||
repetitionLevels: []int64{1},
|
||||
},
|
||||
}
|
||||
|
||||
result2 := map[string]*Column{
|
||||
"map.key_value.key": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{ten},
|
||||
definitionLevels: []int64{1},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"map.key_value.value": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{1},
|
||||
repetitionLevels: []int64{1},
|
||||
},
|
||||
}
|
||||
|
||||
result3 := map[string]*Column{
|
||||
"map.key_value.key": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{ten},
|
||||
definitionLevels: []int64{1},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"map.key_value.value": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{1},
|
||||
},
|
||||
}
|
||||
|
||||
result4 := map[string]*Column{
|
||||
"map.key_value.key": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
}
|
||||
|
||||
result5 := map[string]*Column{
|
||||
"map.key_value.key": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{ten},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"map.key_value.value": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{1},
|
||||
},
|
||||
}
|
||||
|
||||
result6 := map[string]*Column{
|
||||
"map.key_value.key": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{ten},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"map.key_value.value": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{1},
|
||||
},
|
||||
}
|
||||
|
||||
result7 := map[string]*Column{
|
||||
"map.key_value.key": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{ten},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"map.key_value.value": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{3},
|
||||
repetitionLevels: []int64{1},
|
||||
},
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
schemaTree *schema.Tree
|
||||
data string
|
||||
expectedResult map[string]*Column
|
||||
expectErr bool
|
||||
}{
|
||||
{requiredMap1, `{}`, nil, true}, // err: map: nil value for required field
|
||||
{requiredMap1, `{"map": null}`, nil, true}, // err: map: nil value for required field
|
||||
{requiredMap1, `{"map": {"ten": null}}`, nil, true}, // err: map.key_value.value: nil value for required field
|
||||
{requiredMap1, `{"map": {"ten": 10}}`, result1, false},
|
||||
{requiredMap2, `{}`, nil, true}, // err: map: nil value for required field
|
||||
{requiredMap2, `{"map": null}`, nil, true}, // err: map: nil value for required field
|
||||
{requiredMap2, `{"map": {"ten": null}}`, result2, false},
|
||||
{requiredMap2, `{"map": {"ten": 10}}`, result3, false},
|
||||
{optionalMap1, `{}`, result4, false},
|
||||
{optionalMap1, `{"map": null}`, result4, false},
|
||||
{optionalMap1, `{"map": {"ten": null}}`, nil, true}, // err: map.key_value.value: nil value for required field
|
||||
{optionalMap1, `{"map": {"ten": 10}}`, result5, false},
|
||||
{optionalMap2, `{}`, result4, false},
|
||||
{optionalMap2, `{"map": null}`, result4, false},
|
||||
{optionalMap2, `{"map": {"ten": null}}`, result6, false},
|
||||
{optionalMap2, `{"map": {"ten": 10}}`, result7, false},
|
||||
}
|
||||
|
||||
for i, testCase := range testCases {
|
||||
result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree)
|
||||
expectErr := (err != nil)
|
||||
|
||||
if testCase.expectErr != expectErr {
|
||||
t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr)
|
||||
}
|
||||
|
||||
if !testCase.expectErr {
|
||||
if !reflect.DeepEqual(result, testCase.expectedResult) {
|
||||
t.Errorf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,330 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package data
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema"
|
||||
)
|
||||
|
||||
func TestPopulatePrimitiveList(t *testing.T) {
|
||||
requiredList1 := schema.NewTree()
|
||||
{
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredList1.Set("col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList1.Set("col.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList1.Set("col.list.element", requiredElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = requiredList1.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
requiredList2 := schema.NewTree()
|
||||
{
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredList2.Set("col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList2.Set("col.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = requiredList2.Set("col.list.element", optionalElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = requiredList2.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalList1 := schema.NewTree()
|
||||
{
|
||||
optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
requiredElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalList1.Set("col", optionalCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList1.Set("col.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList1.Set("col.list.element", requiredElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = optionalList1.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalList2 := schema.NewTree()
|
||||
{
|
||||
optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
list, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
optionalElement, err := schema.NewElement("element", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalList2.Set("col", optionalCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList2.Set("col.list", list); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = optionalList2.Set("col.list.element", optionalElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = optionalList2.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
result1 := map[string]*Column{
|
||||
"col.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{1},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result2 := map[string]*Column{
|
||||
"col.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10, v20, v30},
|
||||
definitionLevels: []int64{1, 1, 1},
|
||||
repetitionLevels: []int64{0, 1, 1},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 5,
|
||||
minValue: v10,
|
||||
maxValue: v30,
|
||||
},
|
||||
}
|
||||
|
||||
result3 := map[string]*Column{
|
||||
"col.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{1},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result4 := map[string]*Column{
|
||||
"col.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result5 := map[string]*Column{
|
||||
"col.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10, v20, v30},
|
||||
definitionLevels: []int64{2, 2, 2},
|
||||
repetitionLevels: []int64{0, 1, 1},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 5,
|
||||
minValue: v10,
|
||||
maxValue: v30,
|
||||
},
|
||||
}
|
||||
|
||||
result6 := map[string]*Column{
|
||||
"col.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result7 := map[string]*Column{
|
||||
"col.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result8 := map[string]*Column{
|
||||
"col.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{3},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result9 := map[string]*Column{
|
||||
"col.list.element": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10, v20, v30},
|
||||
definitionLevels: []int64{3, 3, 3},
|
||||
repetitionLevels: []int64{0, 1, 1},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 5,
|
||||
minValue: v10,
|
||||
maxValue: v30,
|
||||
},
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
schemaTree *schema.Tree
|
||||
data string
|
||||
expectedResult map[string]*Column
|
||||
expectErr bool
|
||||
}{
|
||||
{requiredList1, `{}`, nil, true}, // err: col: nil value for required field
|
||||
{requiredList1, `{"col": null}`, nil, true}, // err: col: nil value for required field
|
||||
{requiredList1, `{"col": [null]}`, nil, true}, // err: col.list.element: nil value for required field
|
||||
{requiredList1, `{"col": [10]}`, result1, false},
|
||||
{requiredList1, `{"col": [10, 20, 30]}`, result2, false},
|
||||
{requiredList2, `{}`, nil, true}, // err: col: nil value for required field
|
||||
{requiredList2, `{"col": null}`, nil, true}, // err: col: nil value for required field
|
||||
{requiredList2, `{"col": [null]}`, result3, false},
|
||||
{requiredList2, `{"col": [10]}`, result4, false},
|
||||
{requiredList2, `{"col": [10, 20, 30]}`, result5, false},
|
||||
{optionalList1, `{}`, result6, false},
|
||||
{optionalList1, `{"col": null}`, result6, false},
|
||||
{optionalList1, `{"col": [null]}`, nil, true}, // err: col.list.element: nil value for required field
|
||||
{optionalList1, `{"col": [10]}`, result4, false},
|
||||
{optionalList1, `{"col": [10, 20, 30]}`, result5, false},
|
||||
{optionalList2, `{}`, result6, false},
|
||||
{optionalList2, `{"col": null}`, result6, false},
|
||||
{optionalList2, `{"col": [null]}`, result7, false},
|
||||
{optionalList2, `{"col": [10]}`, result8, false},
|
||||
{optionalList2, `{"col": [10, 20, 30]}`, result9, false},
|
||||
}
|
||||
|
||||
for i, testCase := range testCases {
|
||||
result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree)
|
||||
expectErr := (err != nil)
|
||||
|
||||
if testCase.expectErr != expectErr {
|
||||
t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr)
|
||||
}
|
||||
|
||||
if !testCase.expectErr {
|
||||
if !reflect.DeepEqual(result, testCase.expectedResult) {
|
||||
t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package data
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema"
|
||||
)
|
||||
|
||||
func TestPopulatePrimitiveType(t *testing.T) {
|
||||
requiredField := schema.NewTree()
|
||||
{
|
||||
requiredCol, err := schema.NewElement("col", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_INT32), nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = requiredField.Set("col", requiredCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = requiredField.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
optionalField := schema.NewTree()
|
||||
{
|
||||
optionalCol, err := schema.NewElement("col", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_INT32), parquet.ConvertedTypePtr(parquet.ConvertedType_UINT_32),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = optionalField.Set("col", optionalCol); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, _, err = optionalField.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
result1 := map[string]*Column{
|
||||
"col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
result2 := map[string]*Column{
|
||||
"col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
result3 := map[string]*Column{
|
||||
"col": {
|
||||
parquetType: parquet.Type_INT32,
|
||||
values: []interface{}{v10},
|
||||
definitionLevels: []int64{1},
|
||||
repetitionLevels: []int64{0},
|
||||
rowCount: 1,
|
||||
maxBitWidth: 4,
|
||||
minValue: v10,
|
||||
maxValue: v10,
|
||||
},
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
schemaTree *schema.Tree
|
||||
data string
|
||||
expectedResult map[string]*Column
|
||||
expectErr bool
|
||||
}{
|
||||
{requiredField, `{}`, nil, true},
|
||||
{requiredField, `{"col": null}`, nil, true}, // err: col: nil value for required field
|
||||
{requiredField, `{"col": 10}`, result1, false},
|
||||
{optionalField, `{}`, result2, false},
|
||||
{optionalField, `{"col": null}`, result2, false},
|
||||
{optionalField, `{"col": 10}`, result3, false},
|
||||
}
|
||||
|
||||
for i, testCase := range testCases {
|
||||
result, err := UnmarshalJSON([]byte(testCase.data), testCase.schemaTree)
|
||||
expectErr := (err != nil)
|
||||
|
||||
if testCase.expectErr != expectErr {
|
||||
t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr)
|
||||
}
|
||||
|
||||
if !testCase.expectErr {
|
||||
if !reflect.DeepEqual(result, testCase.expectedResult) {
|
||||
t.Fatalf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
680
pkg/s3select/internal/parquet-go/data/column.go
Normal file
680
pkg/s3select/internal/parquet-go/data/column.go
Normal file
@@ -0,0 +1,680 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package data
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"git.apache.org/thrift.git/lib/go/thrift"
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/common"
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/encoding"
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
|
||||
func getDefaultEncoding(parquetType parquet.Type) parquet.Encoding {
|
||||
switch parquetType {
|
||||
case parquet.Type_BOOLEAN:
|
||||
return parquet.Encoding_PLAIN
|
||||
case parquet.Type_INT32, parquet.Type_INT64, parquet.Type_FLOAT, parquet.Type_DOUBLE:
|
||||
return parquet.Encoding_RLE_DICTIONARY
|
||||
case parquet.Type_BYTE_ARRAY:
|
||||
return parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY
|
||||
}
|
||||
|
||||
return parquet.Encoding_PLAIN
|
||||
}
|
||||
|
||||
func getFirstValueElement(tree *schema.Tree) (valueElement *schema.Element) {
|
||||
tree.Range(func(name string, element *schema.Element) bool {
|
||||
if element.Children == nil {
|
||||
valueElement = element
|
||||
} else {
|
||||
valueElement = getFirstValueElement(element.Children)
|
||||
}
|
||||
|
||||
return false
|
||||
})
|
||||
|
||||
return valueElement
|
||||
}
|
||||
|
||||
func populate(columnDataMap map[string]*Column, input *jsonValue, tree *schema.Tree, firstValueRL int64) (map[string]*Column, error) {
|
||||
var err error
|
||||
|
||||
pos := 0
|
||||
handleElement := func(name string, element *schema.Element) bool {
|
||||
pos++
|
||||
|
||||
dataPath := element.PathInTree
|
||||
|
||||
if *element.RepetitionType == parquet.FieldRepetitionType_REPEATED {
|
||||
panic(fmt.Errorf("%v: repetition type must be REQUIRED or OPTIONAL type", dataPath))
|
||||
}
|
||||
|
||||
inputValue := input.Get(name)
|
||||
if *element.RepetitionType == parquet.FieldRepetitionType_REQUIRED && inputValue.IsNull() {
|
||||
err = fmt.Errorf("%v: nil value for required field", dataPath)
|
||||
return false
|
||||
}
|
||||
|
||||
add := func(element *schema.Element, value interface{}, DL, RL int64) {
|
||||
columnData := columnDataMap[element.PathInSchema]
|
||||
if columnData == nil {
|
||||
columnData = NewColumn(*element.Type)
|
||||
}
|
||||
columnData.add(value, DL, RL)
|
||||
columnDataMap[element.PathInSchema] = columnData
|
||||
}
|
||||
|
||||
// Handle primitive type element.
|
||||
if element.Type != nil {
|
||||
var value interface{}
|
||||
if value, err = inputValue.GetValue(*element.Type, element.ConvertedType); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
DL := element.MaxDefinitionLevel
|
||||
if value == nil && DL > 0 {
|
||||
DL--
|
||||
}
|
||||
|
||||
RL := element.MaxRepetitionLevel
|
||||
if pos == 1 {
|
||||
RL = firstValueRL
|
||||
}
|
||||
|
||||
add(element, value, DL, RL)
|
||||
return true
|
||||
}
|
||||
|
||||
addNull := func() {
|
||||
valueElement := getFirstValueElement(element.Children)
|
||||
|
||||
DL := element.MaxDefinitionLevel
|
||||
if DL > 0 {
|
||||
DL--
|
||||
}
|
||||
|
||||
RL := element.MaxRepetitionLevel
|
||||
if RL > 0 {
|
||||
RL--
|
||||
}
|
||||
|
||||
add(valueElement, nil, DL, RL)
|
||||
}
|
||||
|
||||
// Handle group type element.
|
||||
if element.ConvertedType == nil {
|
||||
if inputValue.IsNull() {
|
||||
addNull()
|
||||
return true
|
||||
}
|
||||
|
||||
columnDataMap, err = populate(columnDataMap, inputValue, element.Children, firstValueRL)
|
||||
return (err == nil)
|
||||
}
|
||||
|
||||
// Handle list type element.
|
||||
if *element.ConvertedType == parquet.ConvertedType_LIST {
|
||||
if inputValue.IsNull() {
|
||||
addNull()
|
||||
return true
|
||||
}
|
||||
|
||||
var results []gjson.Result
|
||||
if results, err = inputValue.GetArray(); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
listElement, _ := element.Children.Get("list")
|
||||
valueElement, _ := listElement.Children.Get("element")
|
||||
for i := range results {
|
||||
rl := valueElement.MaxRepetitionLevel
|
||||
if i == 0 {
|
||||
rl = firstValueRL
|
||||
}
|
||||
|
||||
var jsonData []byte
|
||||
if jsonData, err = sjson.SetBytes([]byte{}, "element", results[i].Value()); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var jv *jsonValue
|
||||
if jv, err = bytesToJSONValue(jsonData); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if columnDataMap, err = populate(columnDataMap, jv, listElement.Children, rl); err != nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
if *element.ConvertedType == parquet.ConvertedType_MAP {
|
||||
if inputValue.IsNull() {
|
||||
addNull()
|
||||
return true
|
||||
}
|
||||
|
||||
keyValueElement, _ := element.Children.Get("key_value")
|
||||
var rerr error
|
||||
err = inputValue.Range(func(key, value gjson.Result) bool {
|
||||
if !key.Exists() || key.Type == gjson.Null {
|
||||
rerr = fmt.Errorf("%v.key_value.key: not found or null", dataPath)
|
||||
return false
|
||||
}
|
||||
|
||||
var jsonData []byte
|
||||
if jsonData, rerr = sjson.SetBytes([]byte{}, "key", key.Value()); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if jsonData, rerr = sjson.SetBytes(jsonData, "value", value.Value()); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var jv *jsonValue
|
||||
if jv, rerr = bytesToJSONValue(jsonData); rerr != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if columnDataMap, rerr = populate(columnDataMap, jv, keyValueElement.Children, firstValueRL); err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
err = rerr
|
||||
return (err == nil)
|
||||
}
|
||||
|
||||
err = fmt.Errorf("%v: unsupported converted type %v in %v field type", dataPath, *element.ConvertedType, *element.RepetitionType)
|
||||
return false
|
||||
}
|
||||
|
||||
tree.Range(handleElement)
|
||||
return columnDataMap, err
|
||||
}
|
||||
|
||||
// Column - denotes values of a column.
|
||||
type Column struct {
|
||||
parquetType parquet.Type // value type.
|
||||
values []interface{} // must be a slice of parquet typed values.
|
||||
definitionLevels []int64 // exactly same length of values.
|
||||
repetitionLevels []int64 // exactly same length of values.
|
||||
rowCount int32
|
||||
maxBitWidth int32
|
||||
minValue interface{}
|
||||
maxValue interface{}
|
||||
}
|
||||
|
||||
func (column *Column) updateMinMaxValue(value interface{}) {
|
||||
if column.minValue == nil && column.maxValue == nil {
|
||||
column.minValue = value
|
||||
column.maxValue = value
|
||||
return
|
||||
}
|
||||
|
||||
switch column.parquetType {
|
||||
case parquet.Type_BOOLEAN:
|
||||
if column.minValue.(bool) && !value.(bool) {
|
||||
column.minValue = value
|
||||
}
|
||||
|
||||
if !column.maxValue.(bool) && value.(bool) {
|
||||
column.maxValue = value
|
||||
}
|
||||
|
||||
case parquet.Type_INT32:
|
||||
if column.minValue.(int32) > value.(int32) {
|
||||
column.minValue = value
|
||||
}
|
||||
|
||||
if column.maxValue.(int32) < value.(int32) {
|
||||
column.maxValue = value
|
||||
}
|
||||
|
||||
case parquet.Type_INT64:
|
||||
if column.minValue.(int64) > value.(int64) {
|
||||
column.minValue = value
|
||||
}
|
||||
|
||||
if column.maxValue.(int64) < value.(int64) {
|
||||
column.maxValue = value
|
||||
}
|
||||
|
||||
case parquet.Type_FLOAT:
|
||||
if column.minValue.(float32) > value.(float32) {
|
||||
column.minValue = value
|
||||
}
|
||||
|
||||
if column.maxValue.(float32) < value.(float32) {
|
||||
column.maxValue = value
|
||||
}
|
||||
|
||||
case parquet.Type_DOUBLE:
|
||||
if column.minValue.(float64) > value.(float64) {
|
||||
column.minValue = value
|
||||
}
|
||||
|
||||
if column.maxValue.(float64) < value.(float64) {
|
||||
column.maxValue = value
|
||||
}
|
||||
|
||||
case parquet.Type_BYTE_ARRAY:
|
||||
if bytes.Compare(column.minValue.([]byte), value.([]byte)) > 0 {
|
||||
column.minValue = value
|
||||
}
|
||||
|
||||
if bytes.Compare(column.minValue.([]byte), value.([]byte)) < 0 {
|
||||
column.maxValue = value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (column *Column) updateStats(value interface{}, DL, RL int64) {
|
||||
if RL == 0 {
|
||||
column.rowCount++
|
||||
}
|
||||
|
||||
if value == nil {
|
||||
return
|
||||
}
|
||||
|
||||
var bitWidth int32
|
||||
switch column.parquetType {
|
||||
case parquet.Type_BOOLEAN:
|
||||
bitWidth = 1
|
||||
case parquet.Type_INT32:
|
||||
bitWidth = common.BitWidth(uint64(value.(int32)))
|
||||
case parquet.Type_INT64:
|
||||
bitWidth = common.BitWidth(uint64(value.(int64)))
|
||||
case parquet.Type_FLOAT:
|
||||
bitWidth = 32
|
||||
case parquet.Type_DOUBLE:
|
||||
bitWidth = 64
|
||||
case parquet.Type_BYTE_ARRAY:
|
||||
bitWidth = int32(len(value.([]byte)))
|
||||
}
|
||||
if column.maxBitWidth < bitWidth {
|
||||
column.maxBitWidth = bitWidth
|
||||
}
|
||||
|
||||
column.updateMinMaxValue(value)
|
||||
}
|
||||
|
||||
func (column *Column) add(value interface{}, DL, RL int64) {
|
||||
column.values = append(column.values, value)
|
||||
column.definitionLevels = append(column.definitionLevels, DL)
|
||||
column.repetitionLevels = append(column.repetitionLevels, RL)
|
||||
column.updateStats(value, DL, RL)
|
||||
}
|
||||
|
||||
// AddNull - adds nil value.
|
||||
func (column *Column) AddNull(DL, RL int64) {
|
||||
column.add(nil, DL, RL)
|
||||
}
|
||||
|
||||
// AddBoolean - adds boolean value.
|
||||
func (column *Column) AddBoolean(value bool, DL, RL int64) {
|
||||
if column.parquetType != parquet.Type_BOOLEAN {
|
||||
panic(fmt.Errorf("expected %v value", column.parquetType))
|
||||
}
|
||||
|
||||
column.add(value, DL, RL)
|
||||
}
|
||||
|
||||
// AddInt32 - adds int32 value.
|
||||
func (column *Column) AddInt32(value int32, DL, RL int64) {
|
||||
if column.parquetType != parquet.Type_INT32 {
|
||||
panic(fmt.Errorf("expected %v value", column.parquetType))
|
||||
}
|
||||
|
||||
column.add(value, DL, RL)
|
||||
}
|
||||
|
||||
// AddInt64 - adds int64 value.
|
||||
func (column *Column) AddInt64(value int64, DL, RL int64) {
|
||||
if column.parquetType != parquet.Type_INT64 {
|
||||
panic(fmt.Errorf("expected %v value", column.parquetType))
|
||||
}
|
||||
|
||||
column.add(value, DL, RL)
|
||||
}
|
||||
|
||||
// AddFloat - adds float32 value.
|
||||
func (column *Column) AddFloat(value float32, DL, RL int64) {
|
||||
if column.parquetType != parquet.Type_FLOAT {
|
||||
panic(fmt.Errorf("expected %v value", column.parquetType))
|
||||
}
|
||||
|
||||
column.add(value, DL, RL)
|
||||
}
|
||||
|
||||
// AddDouble - adds float64 value.
|
||||
func (column *Column) AddDouble(value float64, DL, RL int64) {
|
||||
if column.parquetType != parquet.Type_DOUBLE {
|
||||
panic(fmt.Errorf("expected %v value", column.parquetType))
|
||||
}
|
||||
|
||||
column.add(value, DL, RL)
|
||||
}
|
||||
|
||||
// AddByteArray - adds byte array value.
|
||||
func (column *Column) AddByteArray(value []byte, DL, RL int64) {
|
||||
if column.parquetType != parquet.Type_BYTE_ARRAY {
|
||||
panic(fmt.Errorf("expected %v value", column.parquetType))
|
||||
}
|
||||
|
||||
column.add(value, DL, RL)
|
||||
}
|
||||
|
||||
// Merge - merges columns.
|
||||
func (column *Column) Merge(column2 *Column) {
|
||||
if column.parquetType != column2.parquetType {
|
||||
panic(fmt.Errorf("merge differs in parquet type"))
|
||||
}
|
||||
|
||||
column.values = append(column.values, column2.values...)
|
||||
column.definitionLevels = append(column.definitionLevels, column2.definitionLevels...)
|
||||
column.repetitionLevels = append(column.repetitionLevels, column2.repetitionLevels...)
|
||||
|
||||
column.rowCount += column2.rowCount
|
||||
if column.maxBitWidth < column2.maxBitWidth {
|
||||
column.maxBitWidth = column2.maxBitWidth
|
||||
}
|
||||
|
||||
column.updateMinMaxValue(column2.minValue)
|
||||
column.updateMinMaxValue(column2.maxValue)
|
||||
}
|
||||
|
||||
func (column *Column) String() string {
|
||||
var strs []string
|
||||
strs = append(strs, fmt.Sprintf("parquetType: %v", column.parquetType))
|
||||
strs = append(strs, fmt.Sprintf("values: %v", column.values))
|
||||
strs = append(strs, fmt.Sprintf("definitionLevels: %v", column.definitionLevels))
|
||||
strs = append(strs, fmt.Sprintf("repetitionLevels: %v", column.repetitionLevels))
|
||||
strs = append(strs, fmt.Sprintf("rowCount: %v", column.rowCount))
|
||||
strs = append(strs, fmt.Sprintf("maxBitWidth: %v", column.maxBitWidth))
|
||||
strs = append(strs, fmt.Sprintf("minValue: %v", column.minValue))
|
||||
strs = append(strs, fmt.Sprintf("maxValue: %v", column.maxValue))
|
||||
return "{" + strings.Join(strs, ", ") + "}"
|
||||
}
|
||||
|
||||
func (column *Column) encodeValue(value interface{}, element *schema.Element) []byte {
|
||||
if value == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
valueData := encoding.PlainEncode(common.ToSliceValue([]interface{}{value}, column.parquetType), column.parquetType)
|
||||
if column.parquetType == parquet.Type_BYTE_ARRAY && element.ConvertedType != nil {
|
||||
switch *element.ConvertedType {
|
||||
case parquet.ConvertedType_UTF8, parquet.ConvertedType_DECIMAL:
|
||||
valueData = valueData[4:]
|
||||
}
|
||||
}
|
||||
|
||||
return valueData
|
||||
}
|
||||
|
||||
func (column *Column) toDataPageV2(element *schema.Element, parquetEncoding parquet.Encoding) *ColumnChunk {
|
||||
var definedValues []interface{}
|
||||
for _, value := range column.values {
|
||||
if value != nil {
|
||||
definedValues = append(definedValues, value)
|
||||
}
|
||||
}
|
||||
|
||||
var encodedData []byte
|
||||
switch parquetEncoding {
|
||||
case parquet.Encoding_PLAIN:
|
||||
encodedData = encoding.PlainEncode(common.ToSliceValue(definedValues, column.parquetType), column.parquetType)
|
||||
|
||||
case parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY:
|
||||
var bytesSlices [][]byte
|
||||
for _, value := range column.values {
|
||||
bytesSlices = append(bytesSlices, value.([]byte))
|
||||
}
|
||||
encodedData = encoding.DeltaLengthByteArrayEncode(bytesSlices)
|
||||
}
|
||||
|
||||
compressionType := parquet.CompressionCodec_SNAPPY
|
||||
if element.CompressionType != nil {
|
||||
compressionType = *element.CompressionType
|
||||
}
|
||||
|
||||
compressedData, err := common.Compress(compressionType, encodedData)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
DLData := encoding.RLEBitPackedHybridEncode(
|
||||
column.definitionLevels,
|
||||
common.BitWidth(uint64(element.MaxDefinitionLevel)),
|
||||
parquet.Type_INT64,
|
||||
)
|
||||
|
||||
RLData := encoding.RLEBitPackedHybridEncode(
|
||||
column.repetitionLevels,
|
||||
common.BitWidth(uint64(element.MaxRepetitionLevel)),
|
||||
parquet.Type_INT64,
|
||||
)
|
||||
|
||||
pageHeader := parquet.NewPageHeader()
|
||||
pageHeader.Type = parquet.PageType_DATA_PAGE_V2
|
||||
pageHeader.CompressedPageSize = int32(len(compressedData) + len(DLData) + len(RLData))
|
||||
pageHeader.UncompressedPageSize = int32(len(encodedData) + len(DLData) + len(RLData))
|
||||
pageHeader.DataPageHeaderV2 = parquet.NewDataPageHeaderV2()
|
||||
pageHeader.DataPageHeaderV2.NumValues = int32(len(column.values))
|
||||
pageHeader.DataPageHeaderV2.NumNulls = int32(len(column.values) - len(definedValues))
|
||||
pageHeader.DataPageHeaderV2.NumRows = column.rowCount
|
||||
pageHeader.DataPageHeaderV2.Encoding = parquetEncoding
|
||||
pageHeader.DataPageHeaderV2.DefinitionLevelsByteLength = int32(len(DLData))
|
||||
pageHeader.DataPageHeaderV2.RepetitionLevelsByteLength = int32(len(RLData))
|
||||
pageHeader.DataPageHeaderV2.IsCompressed = true
|
||||
pageHeader.DataPageHeaderV2.Statistics = parquet.NewStatistics()
|
||||
pageHeader.DataPageHeaderV2.Statistics.Min = column.encodeValue(column.minValue, element)
|
||||
pageHeader.DataPageHeaderV2.Statistics.Max = column.encodeValue(column.maxValue, element)
|
||||
|
||||
ts := thrift.NewTSerializer()
|
||||
ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport)
|
||||
rawData, err := ts.Write(context.TODO(), pageHeader)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
rawData = append(rawData, RLData...)
|
||||
rawData = append(rawData, DLData...)
|
||||
rawData = append(rawData, compressedData...)
|
||||
|
||||
metadata := parquet.NewColumnMetaData()
|
||||
metadata.Type = column.parquetType
|
||||
metadata.Encodings = []parquet.Encoding{
|
||||
parquet.Encoding_PLAIN,
|
||||
parquet.Encoding_RLE,
|
||||
parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY,
|
||||
}
|
||||
metadata.Codec = compressionType
|
||||
metadata.NumValues = int64(pageHeader.DataPageHeaderV2.NumValues)
|
||||
metadata.TotalCompressedSize = int64(len(rawData))
|
||||
metadata.TotalUncompressedSize = int64(pageHeader.UncompressedPageSize) + int64(len(rawData)) - int64(pageHeader.CompressedPageSize)
|
||||
metadata.PathInSchema = strings.Split(element.PathInSchema, ".")
|
||||
metadata.Statistics = parquet.NewStatistics()
|
||||
metadata.Statistics.Min = pageHeader.DataPageHeaderV2.Statistics.Min
|
||||
metadata.Statistics.Max = pageHeader.DataPageHeaderV2.Statistics.Max
|
||||
|
||||
chunk := new(ColumnChunk)
|
||||
chunk.ColumnChunk.MetaData = metadata
|
||||
chunk.dataPageLen = int64(len(rawData))
|
||||
chunk.dataLen = int64(len(rawData))
|
||||
chunk.data = rawData
|
||||
|
||||
return chunk
|
||||
}
|
||||
|
||||
func (column *Column) toRLEDictPage(element *schema.Element) *ColumnChunk {
|
||||
dictPageData, dataPageData, dictValueCount, indexBitWidth := encoding.RLEDictEncode(column.values, column.parquetType, column.maxBitWidth)
|
||||
|
||||
compressionType := parquet.CompressionCodec_SNAPPY
|
||||
if element.CompressionType != nil {
|
||||
compressionType = *element.CompressionType
|
||||
}
|
||||
|
||||
compressedData, err := common.Compress(compressionType, dictPageData)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
dictPageHeader := parquet.NewPageHeader()
|
||||
dictPageHeader.Type = parquet.PageType_DICTIONARY_PAGE
|
||||
dictPageHeader.CompressedPageSize = int32(len(compressedData))
|
||||
dictPageHeader.UncompressedPageSize = int32(len(dictPageData))
|
||||
dictPageHeader.DictionaryPageHeader = parquet.NewDictionaryPageHeader()
|
||||
dictPageHeader.DictionaryPageHeader.NumValues = dictValueCount
|
||||
dictPageHeader.DictionaryPageHeader.Encoding = parquet.Encoding_PLAIN
|
||||
|
||||
ts := thrift.NewTSerializer()
|
||||
ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport)
|
||||
dictPageRawData, err := ts.Write(context.TODO(), dictPageHeader)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
dictPageRawData = append(dictPageRawData, compressedData...)
|
||||
|
||||
RLData := encoding.RLEBitPackedHybridEncode(
|
||||
column.repetitionLevels,
|
||||
common.BitWidth(uint64(element.MaxRepetitionLevel)),
|
||||
parquet.Type_INT64,
|
||||
)
|
||||
encodedData := RLData
|
||||
|
||||
DLData := encoding.RLEBitPackedHybridEncode(
|
||||
column.definitionLevels,
|
||||
common.BitWidth(uint64(element.MaxDefinitionLevel)),
|
||||
parquet.Type_INT64,
|
||||
)
|
||||
encodedData = append(encodedData, DLData...)
|
||||
|
||||
encodedData = append(encodedData, indexBitWidth)
|
||||
encodedData = append(encodedData, dataPageData...)
|
||||
|
||||
compressedData, err = common.Compress(compressionType, encodedData)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
dataPageHeader := parquet.NewPageHeader()
|
||||
dataPageHeader.Type = parquet.PageType_DATA_PAGE
|
||||
dataPageHeader.CompressedPageSize = int32(len(compressedData))
|
||||
dataPageHeader.UncompressedPageSize = int32(len(encodedData))
|
||||
dataPageHeader.DataPageHeader = parquet.NewDataPageHeader()
|
||||
dataPageHeader.DataPageHeader.NumValues = int32(len(column.values))
|
||||
dataPageHeader.DataPageHeader.DefinitionLevelEncoding = parquet.Encoding_RLE
|
||||
dataPageHeader.DataPageHeader.RepetitionLevelEncoding = parquet.Encoding_RLE
|
||||
dataPageHeader.DataPageHeader.Encoding = parquet.Encoding_RLE_DICTIONARY
|
||||
|
||||
ts = thrift.NewTSerializer()
|
||||
ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport)
|
||||
dataPageRawData, err := ts.Write(context.TODO(), dataPageHeader)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
dataPageRawData = append(dataPageRawData, compressedData...)
|
||||
|
||||
metadata := parquet.NewColumnMetaData()
|
||||
metadata.Type = column.parquetType
|
||||
metadata.Encodings = []parquet.Encoding{
|
||||
parquet.Encoding_PLAIN,
|
||||
parquet.Encoding_RLE,
|
||||
parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY,
|
||||
parquet.Encoding_RLE_DICTIONARY,
|
||||
}
|
||||
metadata.Codec = compressionType
|
||||
metadata.NumValues = int64(dataPageHeader.DataPageHeader.NumValues)
|
||||
metadata.TotalCompressedSize = int64(len(dictPageRawData)) + int64(len(dataPageRawData))
|
||||
uncompressedSize := int64(dictPageHeader.UncompressedPageSize) + int64(len(dictPageData)) - int64(dictPageHeader.CompressedPageSize)
|
||||
uncompressedSize += int64(dataPageHeader.UncompressedPageSize) + int64(len(dataPageData)) - int64(dataPageHeader.CompressedPageSize)
|
||||
metadata.TotalUncompressedSize = uncompressedSize
|
||||
metadata.PathInSchema = strings.Split(element.PathInSchema, ".")
|
||||
metadata.Statistics = parquet.NewStatistics()
|
||||
metadata.Statistics.Min = column.encodeValue(column.minValue, element)
|
||||
metadata.Statistics.Max = column.encodeValue(column.maxValue, element)
|
||||
|
||||
chunk := new(ColumnChunk)
|
||||
chunk.ColumnChunk.MetaData = metadata
|
||||
chunk.isDictPage = true
|
||||
chunk.dictPageLen = int64(len(dictPageRawData))
|
||||
chunk.dataPageLen = int64(len(dataPageRawData))
|
||||
chunk.dataLen = chunk.dictPageLen + chunk.dataPageLen
|
||||
chunk.data = append(dictPageRawData, dataPageRawData...)
|
||||
|
||||
return chunk
|
||||
}
|
||||
|
||||
// Encode an element.
|
||||
func (column *Column) Encode(element *schema.Element) *ColumnChunk {
|
||||
parquetEncoding := getDefaultEncoding(column.parquetType)
|
||||
if element.Encoding != nil {
|
||||
parquetEncoding = *element.Encoding
|
||||
}
|
||||
|
||||
switch parquetEncoding {
|
||||
case parquet.Encoding_PLAIN, parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY:
|
||||
return column.toDataPageV2(element, parquetEncoding)
|
||||
}
|
||||
|
||||
return column.toRLEDictPage(element)
|
||||
}
|
||||
|
||||
// NewColumn - creates new column data
|
||||
func NewColumn(parquetType parquet.Type) *Column {
|
||||
switch parquetType {
|
||||
case parquet.Type_BOOLEAN, parquet.Type_INT32, parquet.Type_INT64, parquet.Type_FLOAT, parquet.Type_DOUBLE, parquet.Type_BYTE_ARRAY:
|
||||
default:
|
||||
panic(fmt.Errorf("unsupported parquet type %v", parquetType))
|
||||
}
|
||||
|
||||
return &Column{
|
||||
parquetType: parquetType,
|
||||
}
|
||||
}
|
||||
|
||||
// UnmarshalJSON - decodes JSON data into map of Column.
|
||||
func UnmarshalJSON(data []byte, tree *schema.Tree) (map[string]*Column, error) {
|
||||
if !tree.ReadOnly() {
|
||||
return nil, fmt.Errorf("tree must be read only")
|
||||
}
|
||||
|
||||
inputValue, err := bytesToJSONValue(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
columnDataMap := make(map[string]*Column)
|
||||
return populate(columnDataMap, inputValue, tree, 0)
|
||||
}
|
||||
369
pkg/s3select/internal/parquet-go/data/column_test.go
Normal file
369
pkg/s3select/internal/parquet-go/data/column_test.go
Normal file
@@ -0,0 +1,369 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package data
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/schema"
|
||||
)
|
||||
|
||||
var (
|
||||
v10 = int32(10)
|
||||
v20 = int32(20)
|
||||
v30 = int32(30)
|
||||
ten = []byte("ten")
|
||||
foo = []byte("foo")
|
||||
bar = []byte("bar")
|
||||
phone1 = []byte("1-234-567-8901")
|
||||
phone2 = []byte("1-234-567-1098")
|
||||
phone3 = []byte("1-111-222-3333")
|
||||
)
|
||||
|
||||
func TestAddressBookExample(t *testing.T) {
|
||||
// message AddressBook {
|
||||
// required string owner;
|
||||
// repeated string ownerPhoneNumbers;
|
||||
// repeated group contacts {
|
||||
// required string name;
|
||||
// optional string phoneNumber;
|
||||
// }
|
||||
// }
|
||||
t.Skip("Broken")
|
||||
|
||||
addressBook := schema.NewTree()
|
||||
{
|
||||
owner, err := schema.NewElement("owner", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
ownerPhoneNumbers, err := schema.NewElement("ownerPhoneNumbers", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
ownerPhoneNumbersList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
ownerPhoneNumbersElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
contacts, err := schema.NewElement("contacts", parquet.FieldRepetitionType_OPTIONAL,
|
||||
nil, parquet.ConvertedTypePtr(parquet.ConvertedType_LIST),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
contactsList, err := schema.NewElement("list", parquet.FieldRepetitionType_REPEATED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
contactsElement, err := schema.NewElement("element", parquet.FieldRepetitionType_REQUIRED,
|
||||
nil, nil,
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
contactName, err := schema.NewElement("name", parquet.FieldRepetitionType_REQUIRED,
|
||||
parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
contactPhoneNumber, err := schema.NewElement("phoneNumber", parquet.FieldRepetitionType_OPTIONAL,
|
||||
parquet.TypePtr(parquet.Type_BYTE_ARRAY), parquet.ConvertedTypePtr(parquet.ConvertedType_UTF8),
|
||||
nil, nil, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = addressBook.Set("owner", owner); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = addressBook.Set("ownerPhoneNumbers", ownerPhoneNumbers); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = addressBook.Set("ownerPhoneNumbers.list", ownerPhoneNumbersList); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = addressBook.Set("ownerPhoneNumbers.list.element", ownerPhoneNumbersElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err = addressBook.Set("contacts", contacts); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = addressBook.Set("contacts.list", contactsList); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = addressBook.Set("contacts.list.element", contactsElement); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = addressBook.Set("contacts.list.element.name", contactName); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err = addressBook.Set("contacts.list.element.phoneNumber", contactPhoneNumber); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
if _, _, err := addressBook.ToParquetSchema(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
case2Data := `{
|
||||
"owner": "foo"
|
||||
}`
|
||||
result2 := map[string]*Column{
|
||||
"owner": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{foo},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"ownerPhoneNumbers.list.element": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"contacts.list.element.name": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
}
|
||||
|
||||
case3Data := `{
|
||||
"owner": "foo",
|
||||
"ownerPhoneNumbers": [
|
||||
"1-234-567-8901"
|
||||
]
|
||||
}
|
||||
`
|
||||
result3 := map[string]*Column{
|
||||
"owner": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{foo},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"ownerPhoneNumbers.list.element": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{phone1},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"contacts.list.element.name": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
}
|
||||
|
||||
case4Data := `{
|
||||
"owner": "foo",
|
||||
"ownerPhoneNumbers": [
|
||||
"1-234-567-8901",
|
||||
"1-234-567-1098"
|
||||
]
|
||||
}
|
||||
`
|
||||
result4 := map[string]*Column{
|
||||
"owner": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{foo},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"ownerPhoneNumbers.list.element": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{phone1, phone2},
|
||||
definitionLevels: []int64{2, 2},
|
||||
repetitionLevels: []int64{0, 1},
|
||||
},
|
||||
"contacts.list.element.name": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
}
|
||||
|
||||
case5Data := `{
|
||||
"contacts": [
|
||||
{
|
||||
"name": "bar"
|
||||
}
|
||||
],
|
||||
"owner": "foo"
|
||||
}`
|
||||
result5 := map[string]*Column{
|
||||
"owner": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{foo},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"ownerPhoneNumbers.list.element": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"contacts.list.element.name": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{bar},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"contacts.list.element.phoneNumber": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{1},
|
||||
},
|
||||
}
|
||||
|
||||
case6Data := `{
|
||||
"contacts": [
|
||||
{
|
||||
"name": "bar",
|
||||
"phoneNumber": "1-111-222-3333"
|
||||
}
|
||||
],
|
||||
"owner": "foo"
|
||||
}`
|
||||
result6 := map[string]*Column{
|
||||
"owner": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{foo},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"ownerPhoneNumbers.list.element": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{nil},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"contacts.list.element.name": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{bar},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"contacts.list.element.phoneNumber": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{phone3},
|
||||
definitionLevels: []int64{3},
|
||||
repetitionLevels: []int64{1},
|
||||
},
|
||||
}
|
||||
|
||||
case7Data := `{
|
||||
"contacts": [
|
||||
{
|
||||
"name": "bar",
|
||||
"phoneNumber": "1-111-222-3333"
|
||||
}
|
||||
],
|
||||
"owner": "foo",
|
||||
"ownerPhoneNumbers": [
|
||||
"1-234-567-8901",
|
||||
"1-234-567-1098"
|
||||
]
|
||||
}`
|
||||
result7 := map[string]*Column{
|
||||
"owner": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{foo},
|
||||
definitionLevels: []int64{0},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"ownerPhoneNumbers.list.element": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{phone1, phone2},
|
||||
definitionLevels: []int64{2, 2},
|
||||
repetitionLevels: []int64{0, 1},
|
||||
},
|
||||
"contacts.list.element.name": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{bar},
|
||||
definitionLevels: []int64{2},
|
||||
repetitionLevels: []int64{0},
|
||||
},
|
||||
"contacts.list.element.phoneNumber": {
|
||||
parquetType: parquet.Type_BYTE_ARRAY,
|
||||
values: []interface{}{phone3},
|
||||
definitionLevels: []int64{3},
|
||||
repetitionLevels: []int64{1},
|
||||
},
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
data string
|
||||
expectedResult map[string]*Column
|
||||
expectErr bool
|
||||
}{
|
||||
{`{}`, nil, true}, // err: owner: nil value for required field
|
||||
{case2Data, result2, false},
|
||||
{case3Data, result3, false},
|
||||
{case4Data, result4, false},
|
||||
{case5Data, result5, false},
|
||||
{case6Data, result6, false},
|
||||
{case7Data, result7, false},
|
||||
}
|
||||
|
||||
for i, testCase := range testCases {
|
||||
result, err := UnmarshalJSON([]byte(testCase.data), addressBook)
|
||||
expectErr := (err != nil)
|
||||
|
||||
if testCase.expectErr != expectErr {
|
||||
t.Fatalf("case %v: error: expected: %v, got: %v", i+1, testCase.expectErr, expectErr)
|
||||
}
|
||||
|
||||
if !testCase.expectErr {
|
||||
if !reflect.DeepEqual(result, testCase.expectedResult) {
|
||||
t.Errorf("case %v: result: expected: %v, got: %v", i+1, testCase.expectedResult, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
65
pkg/s3select/internal/parquet-go/data/data.go
Normal file
65
pkg/s3select/internal/parquet-go/data/data.go
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package data
|
||||
|
||||
import (
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
|
||||
)
|
||||
|
||||
// ColumnChunk ...
|
||||
type ColumnChunk struct {
|
||||
parquet.ColumnChunk
|
||||
isDictPage bool
|
||||
dictPageLen int64
|
||||
dataPageLen int64
|
||||
dataLen int64
|
||||
data []byte
|
||||
}
|
||||
|
||||
// Data returns the data.
|
||||
func (chunk *ColumnChunk) Data() []byte {
|
||||
return chunk.data
|
||||
}
|
||||
|
||||
// DataLen returns the length of the data.
|
||||
func (chunk *ColumnChunk) DataLen() int64 {
|
||||
return chunk.dataLen
|
||||
}
|
||||
|
||||
// NewRowGroup creates a new row group.
|
||||
func NewRowGroup(chunks []*ColumnChunk, numRows, offset int64) *parquet.RowGroup {
|
||||
rows := parquet.NewRowGroup()
|
||||
rows.NumRows = numRows
|
||||
|
||||
for _, chunk := range chunks {
|
||||
rows.Columns = append(rows.Columns, &chunk.ColumnChunk)
|
||||
rows.TotalByteSize += chunk.dataLen
|
||||
|
||||
chunk.ColumnChunk.FileOffset = offset
|
||||
|
||||
if chunk.isDictPage {
|
||||
dictPageOffset := offset
|
||||
chunk.ColumnChunk.MetaData.DictionaryPageOffset = &dictPageOffset
|
||||
offset += chunk.dictPageLen
|
||||
}
|
||||
|
||||
chunk.ColumnChunk.MetaData.DataPageOffset = offset
|
||||
offset += chunk.dataPageLen
|
||||
}
|
||||
|
||||
return rows
|
||||
}
|
||||
107
pkg/s3select/internal/parquet-go/data/jsonvalue.go
Normal file
107
pkg/s3select/internal/parquet-go/data/jsonvalue.go
Normal file
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package data
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
type jsonValue struct {
|
||||
result *gjson.Result
|
||||
path *string
|
||||
}
|
||||
|
||||
func (v *jsonValue) String() string {
|
||||
if v.result == nil {
|
||||
return "<nil>"
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%v", *v.result)
|
||||
}
|
||||
|
||||
func (v *jsonValue) IsNull() bool {
|
||||
return v.result == nil || v.result.Type == gjson.Null
|
||||
}
|
||||
|
||||
func (v *jsonValue) Get(path string) *jsonValue {
|
||||
if v.path != nil {
|
||||
var result *gjson.Result
|
||||
if *v.path == path {
|
||||
result = v.result
|
||||
}
|
||||
|
||||
return resultToJSONValue(result)
|
||||
}
|
||||
|
||||
if v.result == nil {
|
||||
return resultToJSONValue(nil)
|
||||
}
|
||||
|
||||
result := v.result.Get(path)
|
||||
if !result.Exists() {
|
||||
return resultToJSONValue(nil)
|
||||
}
|
||||
|
||||
return resultToJSONValue(&result)
|
||||
}
|
||||
|
||||
func (v *jsonValue) GetValue(parquetType parquet.Type, convertedType *parquet.ConvertedType) (interface{}, error) {
|
||||
if v.result == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return resultToParquetValue(*v.result, parquetType, convertedType)
|
||||
}
|
||||
|
||||
func (v *jsonValue) GetArray() ([]gjson.Result, error) {
|
||||
if v.result == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return resultToArray(*v.result)
|
||||
}
|
||||
|
||||
func (v *jsonValue) Range(iterator func(key, value gjson.Result) bool) error {
|
||||
if v.result == nil || v.result.Type == gjson.Null {
|
||||
return nil
|
||||
}
|
||||
|
||||
if v.result.Type != gjson.JSON || !v.result.IsObject() {
|
||||
return fmt.Errorf("result is not Map but %v", v.result.Type)
|
||||
}
|
||||
|
||||
v.result.ForEach(iterator)
|
||||
return nil
|
||||
}
|
||||
|
||||
func resultToJSONValue(result *gjson.Result) *jsonValue {
|
||||
return &jsonValue{
|
||||
result: result,
|
||||
}
|
||||
}
|
||||
|
||||
func bytesToJSONValue(data []byte) (*jsonValue, error) {
|
||||
if !gjson.ValidBytes(data) {
|
||||
return nil, fmt.Errorf("invalid JSON data")
|
||||
}
|
||||
|
||||
result := gjson.ParseBytes(data)
|
||||
return resultToJSONValue(&result), nil
|
||||
}
|
||||
360
pkg/s3select/internal/parquet-go/data/result.go
Normal file
360
pkg/s3select/internal/parquet-go/data/result.go
Normal file
@@ -0,0 +1,360 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package data
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func resultToBool(result gjson.Result) (value interface{}, err error) {
|
||||
switch result.Type {
|
||||
case gjson.False, gjson.True:
|
||||
return result.Bool(), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("result is not Bool but %v", result.Type)
|
||||
}
|
||||
|
||||
func resultToInt32(result gjson.Result) (value interface{}, err error) {
|
||||
if value, err = resultToInt64(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if value.(int64) < math.MinInt32 || value.(int64) > math.MaxInt32 {
|
||||
return nil, fmt.Errorf("int32 overflow")
|
||||
}
|
||||
|
||||
return int32(value.(int64)), nil
|
||||
}
|
||||
|
||||
func resultToInt64(result gjson.Result) (value interface{}, err error) {
|
||||
if result.Type == gjson.Number {
|
||||
return result.Int(), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("result is not Number but %v", result.Type)
|
||||
}
|
||||
|
||||
func resultToFloat(result gjson.Result) (value interface{}, err error) {
|
||||
if result.Type == gjson.Number {
|
||||
return float32(result.Float()), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("result is not float32 but %v", result.Type)
|
||||
}
|
||||
|
||||
func resultToDouble(result gjson.Result) (value interface{}, err error) {
|
||||
if result.Type == gjson.Number {
|
||||
return result.Float(), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("result is not float64 but %v", result.Type)
|
||||
}
|
||||
|
||||
func resultToBytes(result gjson.Result) (interface{}, error) {
|
||||
if result.Type != gjson.JSON || !result.IsArray() {
|
||||
return nil, fmt.Errorf("result is not byte array but %v", result.Type)
|
||||
}
|
||||
|
||||
data := []byte{}
|
||||
for i, r := range result.Array() {
|
||||
if r.Type != gjson.Number {
|
||||
return nil, fmt.Errorf("result[%v] is not byte but %v", i, r.Type)
|
||||
}
|
||||
|
||||
value := r.Uint()
|
||||
if value > math.MaxUint8 {
|
||||
return nil, fmt.Errorf("byte overflow in result[%v]", i)
|
||||
}
|
||||
|
||||
data = append(data, byte(value))
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func resultToString(result gjson.Result) (value interface{}, err error) {
|
||||
if result.Type == gjson.String {
|
||||
return result.String(), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("result is not String but %v", result.Type)
|
||||
}
|
||||
|
||||
func resultToUint8(result gjson.Result) (value interface{}, err error) {
|
||||
if value, err = resultToUint64(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if value.(uint64) > math.MaxUint8 {
|
||||
return nil, fmt.Errorf("uint8 overflow")
|
||||
}
|
||||
|
||||
return uint8(value.(uint64)), nil
|
||||
}
|
||||
|
||||
func resultToUint16(result gjson.Result) (value interface{}, err error) {
|
||||
if value, err = resultToUint64(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if value.(uint64) > math.MaxUint16 {
|
||||
return nil, fmt.Errorf("uint16 overflow")
|
||||
}
|
||||
|
||||
return uint16(value.(uint64)), nil
|
||||
}
|
||||
|
||||
func resultToUint32(result gjson.Result) (value interface{}, err error) {
|
||||
if value, err = resultToUint64(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if value.(uint64) > math.MaxUint32 {
|
||||
return nil, fmt.Errorf("uint32 overflow")
|
||||
}
|
||||
|
||||
return uint32(value.(uint64)), nil
|
||||
}
|
||||
|
||||
func resultToUint64(result gjson.Result) (value interface{}, err error) {
|
||||
if result.Type == gjson.Number {
|
||||
return result.Uint(), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("result is not Number but %v", result.Type)
|
||||
}
|
||||
|
||||
func resultToInt8(result gjson.Result) (value interface{}, err error) {
|
||||
if value, err = resultToInt64(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if value.(int64) < math.MinInt8 || value.(int64) > math.MaxInt8 {
|
||||
return nil, fmt.Errorf("int8 overflow")
|
||||
}
|
||||
|
||||
return int8(value.(int64)), nil
|
||||
}
|
||||
|
||||
func resultToInt16(result gjson.Result) (value interface{}, err error) {
|
||||
if value, err = resultToInt64(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if value.(int64) < math.MinInt16 || value.(int64) > math.MaxInt16 {
|
||||
return nil, fmt.Errorf("int16 overflow")
|
||||
}
|
||||
|
||||
return int16(value.(int64)), nil
|
||||
}
|
||||
|
||||
func stringToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) {
|
||||
switch parquetType {
|
||||
case parquet.Type_INT96, parquet.Type_BYTE_ARRAY, parquet.Type_FIXED_LEN_BYTE_ARRAY:
|
||||
return []byte(value.(string)), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("string cannot be converted to parquet type %v", parquetType)
|
||||
}
|
||||
|
||||
func uint8ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) {
|
||||
switch parquetType {
|
||||
case parquet.Type_INT32:
|
||||
return int32(value.(uint8)), nil
|
||||
case parquet.Type_INT64:
|
||||
return int64(value.(uint8)), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("uint8 cannot be converted to parquet type %v", parquetType)
|
||||
}
|
||||
|
||||
func uint16ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) {
|
||||
switch parquetType {
|
||||
case parquet.Type_INT32:
|
||||
return int32(value.(uint16)), nil
|
||||
case parquet.Type_INT64:
|
||||
return int64(value.(uint16)), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("uint16 cannot be converted to parquet type %v", parquetType)
|
||||
}
|
||||
|
||||
func uint32ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) {
|
||||
switch parquetType {
|
||||
case parquet.Type_INT32:
|
||||
return int32(value.(uint32)), nil
|
||||
case parquet.Type_INT64:
|
||||
return int64(value.(uint32)), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("uint32 cannot be converted to parquet type %v", parquetType)
|
||||
}
|
||||
|
||||
func uint64ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) {
|
||||
switch parquetType {
|
||||
case parquet.Type_INT32:
|
||||
return int32(value.(uint64)), nil
|
||||
case parquet.Type_INT64:
|
||||
return int64(value.(uint64)), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("uint64 cannot be converted to parquet type %v", parquetType)
|
||||
}
|
||||
|
||||
func int8ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) {
|
||||
switch parquetType {
|
||||
case parquet.Type_INT32:
|
||||
return int32(value.(int8)), nil
|
||||
case parquet.Type_INT64:
|
||||
return int64(value.(int8)), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("int8 cannot be converted to parquet type %v", parquetType)
|
||||
}
|
||||
|
||||
func int16ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) {
|
||||
switch parquetType {
|
||||
case parquet.Type_INT32:
|
||||
return int32(value.(int16)), nil
|
||||
case parquet.Type_INT64:
|
||||
return int64(value.(int16)), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("int16 cannot be converted to parquet type %v", parquetType)
|
||||
}
|
||||
|
||||
func int32ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) {
|
||||
switch parquetType {
|
||||
case parquet.Type_INT32:
|
||||
return value.(int32), nil
|
||||
case parquet.Type_INT64:
|
||||
return int64(value.(int32)), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("int32 cannot be converted to parquet type %v", parquetType)
|
||||
}
|
||||
|
||||
func int64ToParquetValue(value interface{}, parquetType parquet.Type) (interface{}, error) {
|
||||
switch parquetType {
|
||||
case parquet.Type_INT32:
|
||||
return int32(value.(int64)), nil
|
||||
case parquet.Type_INT64:
|
||||
return value.(int64), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("int64 cannot be converted to parquet type %v", parquetType)
|
||||
}
|
||||
|
||||
func resultToParquetValueByConvertedValue(result gjson.Result, convertedType parquet.ConvertedType, parquetType parquet.Type) (value interface{}, err error) {
|
||||
if result.Type == gjson.Null {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
switch convertedType {
|
||||
case parquet.ConvertedType_UTF8:
|
||||
if value, err = resultToString(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stringToParquetValue(value, parquetType)
|
||||
case parquet.ConvertedType_UINT_8:
|
||||
if value, err = resultToUint8(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return uint8ToParquetValue(value, parquetType)
|
||||
case parquet.ConvertedType_UINT_16:
|
||||
if value, err = resultToUint16(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return uint16ToParquetValue(value, parquetType)
|
||||
case parquet.ConvertedType_UINT_32:
|
||||
if value, err = resultToUint32(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return uint32ToParquetValue(value, parquetType)
|
||||
case parquet.ConvertedType_UINT_64:
|
||||
if value, err = resultToUint64(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return uint64ToParquetValue(value, parquetType)
|
||||
case parquet.ConvertedType_INT_8:
|
||||
if value, err = resultToInt8(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return int8ToParquetValue(value, parquetType)
|
||||
case parquet.ConvertedType_INT_16:
|
||||
if value, err = resultToInt16(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return int16ToParquetValue(value, parquetType)
|
||||
case parquet.ConvertedType_INT_32:
|
||||
if value, err = resultToInt32(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return int32ToParquetValue(value, parquetType)
|
||||
case parquet.ConvertedType_INT_64:
|
||||
if value, err = resultToInt64(result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return int64ToParquetValue(value, parquetType)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unsupported converted type %v", convertedType)
|
||||
}
|
||||
|
||||
func resultToParquetValue(result gjson.Result, parquetType parquet.Type, convertedType *parquet.ConvertedType) (interface{}, error) {
|
||||
if convertedType != nil {
|
||||
return resultToParquetValueByConvertedValue(result, *convertedType, parquetType)
|
||||
}
|
||||
|
||||
if result.Type == gjson.Null {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
switch parquetType {
|
||||
case parquet.Type_BOOLEAN:
|
||||
return resultToBool(result)
|
||||
case parquet.Type_INT32:
|
||||
return resultToInt32(result)
|
||||
case parquet.Type_INT64:
|
||||
return resultToInt64(result)
|
||||
case parquet.Type_FLOAT:
|
||||
return resultToFloat(result)
|
||||
case parquet.Type_DOUBLE:
|
||||
return resultToDouble(result)
|
||||
case parquet.Type_INT96, parquet.Type_BYTE_ARRAY, parquet.Type_FIXED_LEN_BYTE_ARRAY:
|
||||
return resultToBytes(result)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unknown parquet type %v", parquetType)
|
||||
}
|
||||
|
||||
func resultToArray(result gjson.Result) ([]gjson.Result, error) {
|
||||
if result.Type == gjson.Null {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if result.Type != gjson.JSON || !result.IsArray() {
|
||||
return nil, fmt.Errorf("result is not Array but %v", result.Type)
|
||||
}
|
||||
|
||||
return result.Array(), nil
|
||||
}
|
||||
Reference in New Issue
Block a user