select: Add more compression formats (#13142)

Support Zstandard, LZ4, S2, and snappy as additional 
compression formats for S3 Select.
This commit is contained in:
Klaus Post
2021-09-06 09:09:53 -07:00
committed by GitHub
parent 42b1d92b2a
commit b2c92cdaaa
6 changed files with 62 additions and 30 deletions

View File

@@ -31,12 +31,16 @@ import (
"strings"
"sync"
"github.com/klauspost/compress/s2"
"github.com/klauspost/compress/zstd"
gzip "github.com/klauspost/pgzip"
"github.com/minio/minio/internal/s3select/csv"
"github.com/minio/minio/internal/s3select/json"
"github.com/minio/minio/internal/s3select/parquet"
"github.com/minio/minio/internal/s3select/simdj"
"github.com/minio/minio/internal/s3select/sql"
"github.com/minio/simdjson-go"
"github.com/pierrec/lz4"
)
type recordReader interface {
@@ -57,8 +61,13 @@ type CompressionType string
const (
noneType CompressionType = "none"
gzipType CompressionType = "gzip"
bzip2Type CompressionType = "bzip2"
gzipType CompressionType = "GZIP"
bzip2Type CompressionType = "BZIP2"
zstdType CompressionType = "ZSTD"
lz4Type CompressionType = "LZ4"
s2Type CompressionType = "S2"
snappyType CompressionType = "SNAPPY"
)
const (
@@ -87,13 +96,13 @@ func (c *CompressionType) UnmarshalXML(d *xml.Decoder, start xml.StartElement) e
return errMalformedXML(err)
}
parsedType := CompressionType(strings.ToLower(s))
if s == "" {
parsedType := CompressionType(strings.ToUpper(s))
if s == "" || parsedType == "NONE" {
parsedType = noneType
}
switch parsedType {
case noneType, gzipType, bzip2Type:
case noneType, gzipType, bzip2Type, snappyType, s2Type, zstdType, lz4Type:
default:
return errInvalidCompressionFormat(fmt.Errorf("invalid compression format '%v'", s))
}
@@ -127,7 +136,7 @@ func (input *InputSerialization) UnmarshalXML(d *xml.Decoder, start xml.StartEle
}
// If no compression is specified, set to noneType
if parsedInput.CompressionType == CompressionType("") {
if parsedInput.CompressionType == "" {
parsedInput.CompressionType = noneType
}
@@ -309,7 +318,19 @@ func (s3Select *S3Select) Open(getReader func(offset, length int64) (io.ReadClos
rc.Close()
var stErr bzip2.StructuralError
if errors.As(err, &stErr) {
return errInvalidBZIP2CompressionFormat(err)
return errInvalidCompression(err, s3Select.Input.CompressionType)
}
// Test these compressor errors
errs := []error{
gzip.ErrHeader, gzip.ErrChecksum,
s2.ErrCorrupt, s2.ErrUnsupported, s2.ErrCRC,
zstd.ErrBlockTooSmall, zstd.ErrMagicMismatch, zstd.ErrWindowSizeExceeded, zstd.ErrUnknownDictionary, zstd.ErrWindowSizeTooSmall,
lz4.ErrInvalid, lz4.ErrBlockDependency,
}
for _, e := range errs {
if errors.Is(err, e) {
return errInvalidCompression(err, s3Select.Input.CompressionType)
}
}
return err
}