mirror of
https://github.com/minio/minio.git
synced 2025-11-25 03:56:17 -05:00
select: Support Parquet dates (#11928)
Pass schema to parser to support dates. Fixes #11926
This commit is contained in:
@@ -67,10 +67,18 @@ func getColumns(
|
||||
if nameColumnMap == nil {
|
||||
nameColumnMap = make(map[string]*column)
|
||||
}
|
||||
var se *parquet.SchemaElement
|
||||
for _, schema := range schemaElements {
|
||||
if schema != nil && schema.Name == columnName {
|
||||
se = schema
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
nameColumnMap[columnName] = &column{
|
||||
name: columnName,
|
||||
metadata: meta,
|
||||
schema: se,
|
||||
schemaElements: schemaElements,
|
||||
rc: rc,
|
||||
thriftReader: thriftReader,
|
||||
@@ -95,6 +103,7 @@ type column struct {
|
||||
valueIndex int
|
||||
valueType parquet.Type
|
||||
metadata *parquet.ColumnMetaData
|
||||
schema *parquet.SchemaElement
|
||||
schemaElements []*parquet.SchemaElement
|
||||
nameIndexMap map[string]int
|
||||
dictPage *page
|
||||
@@ -140,14 +149,14 @@ func (column *column) readPage() {
|
||||
column.dataTable.Merge(page.DataTable)
|
||||
}
|
||||
|
||||
func (column *column) read() (value interface{}, valueType parquet.Type) {
|
||||
func (column *column) read() (value interface{}, valueType parquet.Type, cnv *parquet.SchemaElement) {
|
||||
if column.dataTable == nil {
|
||||
column.readPage()
|
||||
column.valueIndex = 0
|
||||
}
|
||||
|
||||
if column.endOfValues {
|
||||
return nil, column.metadata.GetType()
|
||||
return nil, column.metadata.GetType(), column.schema
|
||||
}
|
||||
|
||||
value = column.dataTable.Values[column.valueIndex]
|
||||
@@ -156,5 +165,5 @@ func (column *column) read() (value interface{}, valueType parquet.Type) {
|
||||
column.dataTable = nil
|
||||
}
|
||||
|
||||
return value, column.metadata.GetType()
|
||||
return value, column.metadata.GetType(), column.schema
|
||||
}
|
||||
|
||||
@@ -72,8 +72,9 @@ func fileMetadata(getReaderFunc GetReaderFunc) (*parquet.FileMetaData, error) {
|
||||
|
||||
// Value - denotes column value
|
||||
type Value struct {
|
||||
Value interface{}
|
||||
Type parquet.Type
|
||||
Value interface{}
|
||||
Type parquet.Type
|
||||
Schema *parquet.SchemaElement
|
||||
}
|
||||
|
||||
// MarshalJSON - encodes to JSON data
|
||||
@@ -144,8 +145,9 @@ func (reader *Reader) Read() (record *Record, err error) {
|
||||
|
||||
record = newRecord(reader.nameList)
|
||||
for name := range reader.columns {
|
||||
value, valueType := reader.columns[name].read()
|
||||
record.set(name, Value{value, valueType})
|
||||
col := reader.columns[name]
|
||||
value, valueType, schema := col.read()
|
||||
record.set(name, Value{Value: value, Type: valueType, Schema: schema})
|
||||
}
|
||||
|
||||
reader.rowIndex++
|
||||
|
||||
@@ -59,9 +59,9 @@ func TestReader(t *testing.T) {
|
||||
}
|
||||
|
||||
expectedRecords := []string{
|
||||
`map[one:{-1 DOUBLE} three:{true BOOLEAN} two:{[102 111 111] BYTE_ARRAY}]`,
|
||||
`map[one:{<nil> DOUBLE} three:{false BOOLEAN} two:{[98 97 114] BYTE_ARRAY}]`,
|
||||
`map[one:{2.5 DOUBLE} three:{true BOOLEAN} two:{[98 97 122] BYTE_ARRAY}]`,
|
||||
`map[one:{-1 DOUBLE SchemaElement({Type:DOUBLE TypeLength:<nil> RepetitionType:OPTIONAL Name:one NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} three:{true BOOLEAN SchemaElement({Type:BOOLEAN TypeLength:<nil> RepetitionType:OPTIONAL Name:three NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} two:{[102 111 111] BYTE_ARRAY SchemaElement({Type:BYTE_ARRAY TypeLength:<nil> RepetitionType:OPTIONAL Name:two NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})}]`,
|
||||
`map[one:{<nil> DOUBLE SchemaElement({Type:DOUBLE TypeLength:<nil> RepetitionType:OPTIONAL Name:one NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} three:{false BOOLEAN SchemaElement({Type:BOOLEAN TypeLength:<nil> RepetitionType:OPTIONAL Name:three NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} two:{[98 97 114] BYTE_ARRAY SchemaElement({Type:BYTE_ARRAY TypeLength:<nil> RepetitionType:OPTIONAL Name:two NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})}]`,
|
||||
`map[one:{2.5 DOUBLE SchemaElement({Type:DOUBLE TypeLength:<nil> RepetitionType:OPTIONAL Name:one NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} three:{true BOOLEAN SchemaElement({Type:BOOLEAN TypeLength:<nil> RepetitionType:OPTIONAL Name:three NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} two:{[98 97 122] BYTE_ARRAY SchemaElement({Type:BYTE_ARRAY TypeLength:<nil> RepetitionType:OPTIONAL Name:two NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})}]`,
|
||||
}
|
||||
|
||||
i := 0
|
||||
@@ -76,11 +76,11 @@ func TestReader(t *testing.T) {
|
||||
}
|
||||
|
||||
if i == len(expectedRecords) {
|
||||
t.Fatalf("read more than expected record count %v", len(expectedRecords))
|
||||
t.Errorf("read more than expected record count %v", len(expectedRecords))
|
||||
}
|
||||
|
||||
if record.String() != expectedRecords[i] {
|
||||
t.Fatalf("record%v: expected: %v, got: %v", i+1, expectedRecords[i], record.String())
|
||||
t.Errorf("record%v: expected: %v, got: %v", i+1, expectedRecords[i], record.String())
|
||||
}
|
||||
|
||||
i++
|
||||
|
||||
Reference in New Issue
Block a user