mirror of
https://github.com/minio/minio.git
synced 2025-11-07 12:52:58 -05:00
select: Add more compression formats (#13142)
Support Zstandard, LZ4, S2, and snappy as additional compression formats for S3 Select.
This commit is contained in:
@@ -31,12 +31,16 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/klauspost/compress/s2"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
gzip "github.com/klauspost/pgzip"
|
||||
"github.com/minio/minio/internal/s3select/csv"
|
||||
"github.com/minio/minio/internal/s3select/json"
|
||||
"github.com/minio/minio/internal/s3select/parquet"
|
||||
"github.com/minio/minio/internal/s3select/simdj"
|
||||
"github.com/minio/minio/internal/s3select/sql"
|
||||
"github.com/minio/simdjson-go"
|
||||
"github.com/pierrec/lz4"
|
||||
)
|
||||
|
||||
type recordReader interface {
|
||||
@@ -57,8 +61,13 @@ type CompressionType string
|
||||
|
||||
const (
|
||||
noneType CompressionType = "none"
|
||||
gzipType CompressionType = "gzip"
|
||||
bzip2Type CompressionType = "bzip2"
|
||||
gzipType CompressionType = "GZIP"
|
||||
bzip2Type CompressionType = "BZIP2"
|
||||
|
||||
zstdType CompressionType = "ZSTD"
|
||||
lz4Type CompressionType = "LZ4"
|
||||
s2Type CompressionType = "S2"
|
||||
snappyType CompressionType = "SNAPPY"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -87,13 +96,13 @@ func (c *CompressionType) UnmarshalXML(d *xml.Decoder, start xml.StartElement) e
|
||||
return errMalformedXML(err)
|
||||
}
|
||||
|
||||
parsedType := CompressionType(strings.ToLower(s))
|
||||
if s == "" {
|
||||
parsedType := CompressionType(strings.ToUpper(s))
|
||||
if s == "" || parsedType == "NONE" {
|
||||
parsedType = noneType
|
||||
}
|
||||
|
||||
switch parsedType {
|
||||
case noneType, gzipType, bzip2Type:
|
||||
case noneType, gzipType, bzip2Type, snappyType, s2Type, zstdType, lz4Type:
|
||||
default:
|
||||
return errInvalidCompressionFormat(fmt.Errorf("invalid compression format '%v'", s))
|
||||
}
|
||||
@@ -127,7 +136,7 @@ func (input *InputSerialization) UnmarshalXML(d *xml.Decoder, start xml.StartEle
|
||||
}
|
||||
|
||||
// If no compression is specified, set to noneType
|
||||
if parsedInput.CompressionType == CompressionType("") {
|
||||
if parsedInput.CompressionType == "" {
|
||||
parsedInput.CompressionType = noneType
|
||||
}
|
||||
|
||||
@@ -309,7 +318,19 @@ func (s3Select *S3Select) Open(getReader func(offset, length int64) (io.ReadClos
|
||||
rc.Close()
|
||||
var stErr bzip2.StructuralError
|
||||
if errors.As(err, &stErr) {
|
||||
return errInvalidBZIP2CompressionFormat(err)
|
||||
return errInvalidCompression(err, s3Select.Input.CompressionType)
|
||||
}
|
||||
// Test these compressor errors
|
||||
errs := []error{
|
||||
gzip.ErrHeader, gzip.ErrChecksum,
|
||||
s2.ErrCorrupt, s2.ErrUnsupported, s2.ErrCRC,
|
||||
zstd.ErrBlockTooSmall, zstd.ErrMagicMismatch, zstd.ErrWindowSizeExceeded, zstd.ErrUnknownDictionary, zstd.ErrWindowSizeTooSmall,
|
||||
lz4.ErrInvalid, lz4.ErrBlockDependency,
|
||||
}
|
||||
for _, e := range errs {
|
||||
if errors.Is(err, e) {
|
||||
return errInvalidCompression(err, s3Select.Input.CompressionType)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user