mirror of
https://github.com/minio/minio.git
synced 2025-11-20 18:06:10 -05:00
Add object compression support (#6292)
Add support for streaming (golang/LZ77/snappy) compression.
This commit is contained in:
committed by
Nitish Tiwari
parent
5c765bc63e
commit
ce9d36d954
@@ -25,15 +25,18 @@ import (
|
||||
"net/http"
|
||||
"path"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
snappy "github.com/golang/snappy"
|
||||
"github.com/minio/minio/cmd/crypto"
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
"github.com/minio/minio/pkg/dns"
|
||||
"github.com/minio/minio/pkg/ioutil"
|
||||
"github.com/minio/minio/pkg/wildcard"
|
||||
"github.com/skyrings/skyring-common/tools/uuid"
|
||||
)
|
||||
|
||||
@@ -301,6 +304,107 @@ func getRandomHostPort(records []dns.SrvRecord) (string, int) {
|
||||
return srvRecord.Host, srvRecord.Port
|
||||
}
|
||||
|
||||
// IsCompressed returns true if the object is marked as compressed.
|
||||
func (o ObjectInfo) IsCompressed() bool {
|
||||
_, ok := o.UserDefined[ReservedMetadataPrefix+"compression"]
|
||||
return ok
|
||||
}
|
||||
|
||||
// GetActualSize - read the decompressed size from the meta json.
|
||||
func (o ObjectInfo) GetActualSize() int64 {
|
||||
metadata := o.UserDefined
|
||||
sizeStr, ok := metadata[ReservedMetadataPrefix+"actual-size"]
|
||||
if ok {
|
||||
size, err := strconv.ParseInt(sizeStr, 10, 64)
|
||||
if err == nil {
|
||||
return size
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// Disabling compression for encrypted enabled requests.
|
||||
// Using compression and encryption together enables room for side channel attacks.
|
||||
// Eliminate non-compressible objects by extensions/content-types.
|
||||
func isCompressible(header http.Header, object string) bool {
|
||||
if hasServerSideEncryptionHeader(header) || excludeForCompression(header, object) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Eliminate the non-compressible objects.
|
||||
func excludeForCompression(header http.Header, object string) bool {
|
||||
objStr := object
|
||||
contentType := header.Get("Content-Type")
|
||||
if globalIsCompressionEnabled {
|
||||
// We strictly disable compression for standard extensions/content-types (`compressed`).
|
||||
if hasStringSuffixInSlice(objStr, standardExcludeCompressExtensions) || hasPattern(standardExcludeCompressContentTypes, contentType) {
|
||||
return true
|
||||
}
|
||||
// Filter compression includes.
|
||||
if len(globalCompressExtensions) > 0 || len(globalCompressMimeTypes) > 0 {
|
||||
extensions := globalCompressExtensions
|
||||
mimeTypes := globalCompressMimeTypes
|
||||
if hasStringSuffixInSlice(objStr, extensions) || hasPattern(mimeTypes, contentType) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Utility which returns if a string is present in the list.
|
||||
func hasStringSuffixInSlice(str string, list []string) bool {
|
||||
for _, v := range list {
|
||||
if strings.HasSuffix(str, v) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Returns true if any of the given wildcard patterns match the matchStr.
|
||||
func hasPattern(patterns []string, matchStr string) bool {
|
||||
for _, pattern := range patterns {
|
||||
if ok := wildcard.MatchSimple(pattern, matchStr); ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Returns the part file name which matches the partNumber and etag.
|
||||
func getPartFile(entries []string, partNumber int, etag string) string {
|
||||
for _, entry := range entries {
|
||||
if strings.HasPrefix(entry, fmt.Sprintf("%.5d.%s.", partNumber, etag)) {
|
||||
return entry
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Returs the compressed offset which should be skipped.
|
||||
func getCompressedOffsets(objectInfo ObjectInfo, offset int64) (int64, int64) {
|
||||
var compressedOffset int64
|
||||
var skipLength int64
|
||||
var cumulativeActualSize int64
|
||||
if len(objectInfo.Parts) > 0 {
|
||||
for _, part := range objectInfo.Parts {
|
||||
cumulativeActualSize += part.ActualSize
|
||||
if cumulativeActualSize <= offset {
|
||||
compressedOffset += part.Size
|
||||
} else {
|
||||
skipLength = cumulativeActualSize - part.ActualSize
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return compressedOffset, offset - skipLength
|
||||
}
|
||||
|
||||
// byBucketName is a collection satisfying sort.Interface.
|
||||
type byBucketName []BucketInfo
|
||||
|
||||
@@ -352,6 +456,7 @@ func NewGetObjectReader(rs *HTTPRangeSpec, oi ObjectInfo, cleanUpFns ...func())
|
||||
}()
|
||||
|
||||
isEncrypted := crypto.IsEncrypted(oi.UserDefined)
|
||||
isCompressed := oi.IsCompressed()
|
||||
var skipLen int64
|
||||
// Calculate range to read (different for
|
||||
// e.g. encrypted/compressed objects)
|
||||
@@ -407,6 +512,50 @@ func NewGetObjectReader(rs *HTTPRangeSpec, oi ObjectInfo, cleanUpFns ...func())
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
case isCompressed:
|
||||
// Read the decompressed size from the meta.json.
|
||||
actualSize := oi.GetActualSize()
|
||||
if actualSize < 0 {
|
||||
return nil, 0, 0, errInvalidDecompressedSize
|
||||
}
|
||||
off, length = int64(0), oi.Size
|
||||
decOff, decLength := int64(0), actualSize
|
||||
if rs != nil {
|
||||
off, length, err = rs.GetOffsetLength(actualSize)
|
||||
if err != nil {
|
||||
return nil, 0, 0, err
|
||||
}
|
||||
// Incase of range based queries on multiparts, the offset and length are reduced.
|
||||
off, decOff = getCompressedOffsets(oi, off)
|
||||
decLength = length
|
||||
length = oi.Size - off
|
||||
|
||||
// For negative length we read everything.
|
||||
if decLength < 0 {
|
||||
decLength = actualSize - decOff
|
||||
}
|
||||
|
||||
// Reply back invalid range if the input offset and length fall out of range.
|
||||
if decOff > actualSize || decOff+decLength > actualSize {
|
||||
return nil, 0, 0, errInvalidRange
|
||||
}
|
||||
}
|
||||
fn = func(inputReader io.Reader, _ http.Header, cFns ...func()) (r *GetObjectReader, err error) {
|
||||
// Decompression reader.
|
||||
snappyReader := snappy.NewReader(inputReader)
|
||||
// Apply the skipLen and limit on the
|
||||
// decompressed stream
|
||||
decReader := io.LimitReader(ioutil.NewSkipReader(snappyReader, decOff), decLength)
|
||||
oi.Size = decLength
|
||||
|
||||
// Assemble the GetObjectReader
|
||||
r = &GetObjectReader{
|
||||
ObjInfo: oi,
|
||||
pReader: decReader,
|
||||
cleanUpFns: append(cleanUpFns, cFns...),
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
default:
|
||||
off, length, err = rs.GetOffsetLength(oi.Size)
|
||||
|
||||
Reference in New Issue
Block a user