mirror of
https://github.com/minio/minio.git
synced 2025-11-23 02:57:42 -05:00
Add archived parquet as int. package (#9912)
Since github.com/minio/parquet-go is archived add it as internal package.
This commit is contained in:
60
pkg/s3select/internal/parquet-go/encoding/rledict-encode.go
Normal file
60
pkg/s3select/internal/parquet-go/encoding/rledict-encode.go
Normal file
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package encoding
|
||||
|
||||
import (
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/common"
|
||||
"github.com/minio/minio/pkg/s3select/internal/parquet-go/gen-go/parquet"
|
||||
)
|
||||
|
||||
// RLEDictEncode encodes values specified in https://github.com/apache/parquet-format/blob/master/Encodings.md#dictionary-encoding-plain_dictionary--2-and-rle_dictionary--8 and returns dictionary page data and data page data.
|
||||
//
|
||||
// Dictionary page data contains PLAIN encodeed slice of uniquely fully defined non-nil values.
|
||||
// Data page data contains RLE/Bit-Packed Hybrid encoded indices of fully defined non-nil values.
|
||||
//
|
||||
// Supported Types: BOOLEAN, INT32, INT64, FLOAT, DOUBLE, BYTE_ARRAY
|
||||
func RLEDictEncode(values []interface{}, parquetType parquet.Type, bitWidth int32) (dictPageData, dataPageData []byte, dictValueCount int32, indexBitWidth uint8) {
|
||||
var definedValues []interface{}
|
||||
var indices []int32
|
||||
|
||||
valueIndexMap := make(map[interface{}]int32)
|
||||
j := 0
|
||||
for i := 0; i < len(values); i = j {
|
||||
for j = i; j < len(values); j++ {
|
||||
value := values[j]
|
||||
if value == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
index, found := valueIndexMap[value]
|
||||
if !found {
|
||||
index = int32(len(definedValues))
|
||||
definedValues = append(definedValues, value)
|
||||
valueIndexMap[value] = index
|
||||
}
|
||||
|
||||
indices = append(indices, index)
|
||||
}
|
||||
}
|
||||
|
||||
indexBitWidth = uint8(common.BitWidth(uint64(indices[len(indices)-1])))
|
||||
|
||||
dictPageData = PlainEncode(common.ToSliceValue(definedValues, parquetType), parquetType)
|
||||
dataPageData = RLEBitPackedHybridEncode(indices, int32(indexBitWidth), parquet.Type_INT32)
|
||||
|
||||
return dictPageData, dataPageData, int32(len(definedValues)), indexBitWidth
|
||||
}
|
||||
Reference in New Issue
Block a user