select: Support Parquet dates (#11928)

Pass schema to parser to support dates.

Fixes #11926
This commit is contained in:
Klaus Post
2021-04-03 17:25:19 +02:00
committed by GitHub
parent bf106453b8
commit dca7cf7200
7 changed files with 280 additions and 13 deletions

View File

@@ -67,10 +67,18 @@ func getColumns(
if nameColumnMap == nil {
nameColumnMap = make(map[string]*column)
}
var se *parquet.SchemaElement
for _, schema := range schemaElements {
if schema != nil && schema.Name == columnName {
se = schema
break
}
}
nameColumnMap[columnName] = &column{
name: columnName,
metadata: meta,
schema: se,
schemaElements: schemaElements,
rc: rc,
thriftReader: thriftReader,
@@ -95,6 +103,7 @@ type column struct {
valueIndex int
valueType parquet.Type
metadata *parquet.ColumnMetaData
schema *parquet.SchemaElement
schemaElements []*parquet.SchemaElement
nameIndexMap map[string]int
dictPage *page
@@ -140,14 +149,14 @@ func (column *column) readPage() {
column.dataTable.Merge(page.DataTable)
}
func (column *column) read() (value interface{}, valueType parquet.Type) {
func (column *column) read() (value interface{}, valueType parquet.Type, cnv *parquet.SchemaElement) {
if column.dataTable == nil {
column.readPage()
column.valueIndex = 0
}
if column.endOfValues {
return nil, column.metadata.GetType()
return nil, column.metadata.GetType(), column.schema
}
value = column.dataTable.Values[column.valueIndex]
@@ -156,5 +165,5 @@ func (column *column) read() (value interface{}, valueType parquet.Type) {
column.dataTable = nil
}
return value, column.metadata.GetType()
return value, column.metadata.GetType(), column.schema
}

View File

@@ -72,8 +72,9 @@ func fileMetadata(getReaderFunc GetReaderFunc) (*parquet.FileMetaData, error) {
// Value - denotes column value
type Value struct {
Value interface{}
Type parquet.Type
Value interface{}
Type parquet.Type
Schema *parquet.SchemaElement
}
// MarshalJSON - encodes to JSON data
@@ -144,8 +145,9 @@ func (reader *Reader) Read() (record *Record, err error) {
record = newRecord(reader.nameList)
for name := range reader.columns {
value, valueType := reader.columns[name].read()
record.set(name, Value{value, valueType})
col := reader.columns[name]
value, valueType, schema := col.read()
record.set(name, Value{Value: value, Type: valueType, Schema: schema})
}
reader.rowIndex++

View File

@@ -59,9 +59,9 @@ func TestReader(t *testing.T) {
}
expectedRecords := []string{
`map[one:{-1 DOUBLE} three:{true BOOLEAN} two:{[102 111 111] BYTE_ARRAY}]`,
`map[one:{<nil> DOUBLE} three:{false BOOLEAN} two:{[98 97 114] BYTE_ARRAY}]`,
`map[one:{2.5 DOUBLE} three:{true BOOLEAN} two:{[98 97 122] BYTE_ARRAY}]`,
`map[one:{-1 DOUBLE SchemaElement({Type:DOUBLE TypeLength:<nil> RepetitionType:OPTIONAL Name:one NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} three:{true BOOLEAN SchemaElement({Type:BOOLEAN TypeLength:<nil> RepetitionType:OPTIONAL Name:three NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} two:{[102 111 111] BYTE_ARRAY SchemaElement({Type:BYTE_ARRAY TypeLength:<nil> RepetitionType:OPTIONAL Name:two NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})}]`,
`map[one:{<nil> DOUBLE SchemaElement({Type:DOUBLE TypeLength:<nil> RepetitionType:OPTIONAL Name:one NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} three:{false BOOLEAN SchemaElement({Type:BOOLEAN TypeLength:<nil> RepetitionType:OPTIONAL Name:three NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} two:{[98 97 114] BYTE_ARRAY SchemaElement({Type:BYTE_ARRAY TypeLength:<nil> RepetitionType:OPTIONAL Name:two NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})}]`,
`map[one:{2.5 DOUBLE SchemaElement({Type:DOUBLE TypeLength:<nil> RepetitionType:OPTIONAL Name:one NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} three:{true BOOLEAN SchemaElement({Type:BOOLEAN TypeLength:<nil> RepetitionType:OPTIONAL Name:three NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})} two:{[98 97 122] BYTE_ARRAY SchemaElement({Type:BYTE_ARRAY TypeLength:<nil> RepetitionType:OPTIONAL Name:two NumChildren:<nil> ConvertedType:<nil> Scale:<nil> Precision:<nil> FieldID:<nil> LogicalType:<nil>})}]`,
}
i := 0
@@ -76,11 +76,11 @@ func TestReader(t *testing.T) {
}
if i == len(expectedRecords) {
t.Fatalf("read more than expected record count %v", len(expectedRecords))
t.Errorf("read more than expected record count %v", len(expectedRecords))
}
if record.String() != expectedRecords[i] {
t.Fatalf("record%v: expected: %v, got: %v", i+1, expectedRecords[i], record.String())
t.Errorf("record%v: expected: %v, got: %v", i+1, expectedRecords[i], record.String())
}
i++