Add object compression support (#6292)

Add support for streaming (golang/LZ77/snappy) compression.
This commit is contained in:
Praveen raj Mani
2018-09-28 09:06:17 +05:30
committed by Nitish Tiwari
parent 5c765bc63e
commit ce9d36d954
57 changed files with 1321 additions and 173 deletions

View File

@@ -25,15 +25,18 @@ import (
"net/http"
"path"
"runtime"
"strconv"
"strings"
"sync"
"time"
"unicode/utf8"
snappy "github.com/golang/snappy"
"github.com/minio/minio/cmd/crypto"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/dns"
"github.com/minio/minio/pkg/ioutil"
"github.com/minio/minio/pkg/wildcard"
"github.com/skyrings/skyring-common/tools/uuid"
)
@@ -301,6 +304,107 @@ func getRandomHostPort(records []dns.SrvRecord) (string, int) {
return srvRecord.Host, srvRecord.Port
}
// IsCompressed returns true if the object is marked as compressed.
func (o ObjectInfo) IsCompressed() bool {
_, ok := o.UserDefined[ReservedMetadataPrefix+"compression"]
return ok
}
// GetActualSize - read the decompressed size from the meta json.
func (o ObjectInfo) GetActualSize() int64 {
metadata := o.UserDefined
sizeStr, ok := metadata[ReservedMetadataPrefix+"actual-size"]
if ok {
size, err := strconv.ParseInt(sizeStr, 10, 64)
if err == nil {
return size
}
}
return -1
}
// Disabling compression for encrypted enabled requests.
// Using compression and encryption together enables room for side channel attacks.
// Eliminate non-compressible objects by extensions/content-types.
func isCompressible(header http.Header, object string) bool {
if hasServerSideEncryptionHeader(header) || excludeForCompression(header, object) {
return false
}
return true
}
// Eliminate the non-compressible objects.
func excludeForCompression(header http.Header, object string) bool {
objStr := object
contentType := header.Get("Content-Type")
if globalIsCompressionEnabled {
// We strictly disable compression for standard extensions/content-types (`compressed`).
if hasStringSuffixInSlice(objStr, standardExcludeCompressExtensions) || hasPattern(standardExcludeCompressContentTypes, contentType) {
return true
}
// Filter compression includes.
if len(globalCompressExtensions) > 0 || len(globalCompressMimeTypes) > 0 {
extensions := globalCompressExtensions
mimeTypes := globalCompressMimeTypes
if hasStringSuffixInSlice(objStr, extensions) || hasPattern(mimeTypes, contentType) {
return false
}
return true
}
return false
}
return true
}
// Utility which returns if a string is present in the list.
func hasStringSuffixInSlice(str string, list []string) bool {
for _, v := range list {
if strings.HasSuffix(str, v) {
return true
}
}
return false
}
// Returns true if any of the given wildcard patterns match the matchStr.
func hasPattern(patterns []string, matchStr string) bool {
for _, pattern := range patterns {
if ok := wildcard.MatchSimple(pattern, matchStr); ok {
return true
}
}
return false
}
// Returns the part file name which matches the partNumber and etag.
func getPartFile(entries []string, partNumber int, etag string) string {
for _, entry := range entries {
if strings.HasPrefix(entry, fmt.Sprintf("%.5d.%s.", partNumber, etag)) {
return entry
}
}
return ""
}
// Returs the compressed offset which should be skipped.
func getCompressedOffsets(objectInfo ObjectInfo, offset int64) (int64, int64) {
var compressedOffset int64
var skipLength int64
var cumulativeActualSize int64
if len(objectInfo.Parts) > 0 {
for _, part := range objectInfo.Parts {
cumulativeActualSize += part.ActualSize
if cumulativeActualSize <= offset {
compressedOffset += part.Size
} else {
skipLength = cumulativeActualSize - part.ActualSize
break
}
}
}
return compressedOffset, offset - skipLength
}
// byBucketName is a collection satisfying sort.Interface.
type byBucketName []BucketInfo
@@ -352,6 +456,7 @@ func NewGetObjectReader(rs *HTTPRangeSpec, oi ObjectInfo, cleanUpFns ...func())
}()
isEncrypted := crypto.IsEncrypted(oi.UserDefined)
isCompressed := oi.IsCompressed()
var skipLen int64
// Calculate range to read (different for
// e.g. encrypted/compressed objects)
@@ -407,6 +512,50 @@ func NewGetObjectReader(rs *HTTPRangeSpec, oi ObjectInfo, cleanUpFns ...func())
}
return r, nil
}
case isCompressed:
// Read the decompressed size from the meta.json.
actualSize := oi.GetActualSize()
if actualSize < 0 {
return nil, 0, 0, errInvalidDecompressedSize
}
off, length = int64(0), oi.Size
decOff, decLength := int64(0), actualSize
if rs != nil {
off, length, err = rs.GetOffsetLength(actualSize)
if err != nil {
return nil, 0, 0, err
}
// Incase of range based queries on multiparts, the offset and length are reduced.
off, decOff = getCompressedOffsets(oi, off)
decLength = length
length = oi.Size - off
// For negative length we read everything.
if decLength < 0 {
decLength = actualSize - decOff
}
// Reply back invalid range if the input offset and length fall out of range.
if decOff > actualSize || decOff+decLength > actualSize {
return nil, 0, 0, errInvalidRange
}
}
fn = func(inputReader io.Reader, _ http.Header, cFns ...func()) (r *GetObjectReader, err error) {
// Decompression reader.
snappyReader := snappy.NewReader(inputReader)
// Apply the skipLen and limit on the
// decompressed stream
decReader := io.LimitReader(ioutil.NewSkipReader(snappyReader, decOff), decLength)
oi.Size = decLength
// Assemble the GetObjectReader
r = &GetObjectReader{
ObjInfo: oi,
pReader: decReader,
cleanUpFns: append(cleanUpFns, cFns...),
}
return r, nil
}
default:
off, length, err = rs.GetOffsetLength(oi.Size)