Switch to Snappy -> S2 compression (#8189)

This commit is contained in:
Klaus Post 2019-09-25 23:08:24 -07:00 committed by Harshavardhana
parent be313f1758
commit ff726969aa
12 changed files with 224 additions and 160 deletions

View File

@ -251,18 +251,18 @@ var (
// configuration must be present. // configuration must be present.
globalAutoEncryption bool globalAutoEncryption bool
// Is compression include extensions/content-types set. // Is compression include extensions/content-types set?
globalIsEnvCompression bool globalIsEnvCompression bool
// Is compression enabeld. // Is compression enabled?
globalIsCompressionEnabled = false globalIsCompressionEnabled = false
// Include-list for compression. // Include-list for compression.
globalCompressExtensions = []string{".txt", ".log", ".csv", ".json"} globalCompressExtensions = []string{".txt", ".log", ".csv", ".json", ".tar", ".xml", ".bin"}
globalCompressMimeTypes = []string{"text/csv", "text/plain", "application/json"} globalCompressMimeTypes = []string{"text/*", "application/json", "application/xml"}
// Some standard object extensions which we strictly dis-allow for compression. // Some standard object extensions which we strictly dis-allow for compression.
standardExcludeCompressExtensions = []string{".gz", ".bz2", ".rar", ".zip", ".7z"} standardExcludeCompressExtensions = []string{".gz", ".bz2", ".rar", ".zip", ".7z", ".xz", ".mp4", ".mkv", ".mov"}
// Some standard content-types which we strictly dis-allow for compression. // Some standard content-types which we strictly dis-allow for compression.
standardExcludeCompressContentTypes = []string{"video/*", "audio/*", "application/zip", "application/x-gzip", "application/x-zip-compressed", " application/x-compress", "application/x-spoon"} standardExcludeCompressContentTypes = []string{"video/*", "audio/*", "application/zip", "application/x-gzip", "application/x-zip-compressed", " application/x-compress", "application/x-spoon"}

View File

@ -17,7 +17,6 @@
package cmd package cmd
import ( import (
"archive/zip"
"bytes" "bytes"
"context" "context"
"encoding/json" "encoding/json"
@ -31,6 +30,7 @@ import (
"sync" "sync"
"time" "time"
"github.com/klauspost/compress/zip"
"github.com/minio/minio/cmd/crypto" "github.com/minio/minio/cmd/crypto"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/event" "github.com/minio/minio/pkg/event"

View File

@ -33,7 +33,8 @@ import (
"time" "time"
"unicode/utf8" "unicode/utf8"
snappy "github.com/golang/snappy" "github.com/klauspost/compress/s2"
"github.com/klauspost/readahead"
"github.com/minio/minio-go/v6/pkg/s3utils" "github.com/minio/minio-go/v6/pkg/s3utils"
"github.com/minio/minio/cmd/crypto" "github.com/minio/minio/cmd/crypto"
xhttp "github.com/minio/minio/cmd/http" xhttp "github.com/minio/minio/cmd/http"
@ -56,6 +57,12 @@ const (
minioMetaTmpBucket = minioMetaBucket + "/tmp" minioMetaTmpBucket = minioMetaBucket + "/tmp"
// DNS separator (period), used for bucket name validation. // DNS separator (period), used for bucket name validation.
dnsDelimiter = "." dnsDelimiter = "."
// On compressed files bigger than this;
compReadAheadSize = 100 << 20
// Read this many buffers ahead.
compReadAheadBuffers = 5
// Size of each buffer.
compReadAheadBufSize = 1 << 20
) )
// isMinioBucket returns true if given bucket is a MinIO internal // isMinioBucket returns true if given bucket is a MinIO internal
@ -337,6 +344,22 @@ func (o ObjectInfo) IsCompressed() bool {
return ok return ok
} }
// IsCompressedOK returns whether the object is compressed and can be decompressed.
func (o ObjectInfo) IsCompressedOK() (bool, error) {
scheme, ok := o.UserDefined[ReservedMetadataPrefix+"compression"]
if !ok {
return false, nil
}
if crypto.IsEncrypted(o.UserDefined) {
return true, fmt.Errorf("compression %q and encryption enabled on same object", scheme)
}
switch scheme {
case compressionAlgorithmV1, compressionAlgorithmV2:
return true, nil
}
return true, fmt.Errorf("unknown compression scheme: %s", scheme)
}
// GetActualSize - read the decompressed size from the meta json. // GetActualSize - read the decompressed size from the meta json.
func (o ObjectInfo) GetActualSize() int64 { func (o ObjectInfo) GetActualSize() int64 {
metadata := o.UserDefined metadata := o.UserDefined
@ -364,29 +387,34 @@ func isCompressible(header http.Header, object string) bool {
func excludeForCompression(header http.Header, object string) bool { func excludeForCompression(header http.Header, object string) bool {
objStr := object objStr := object
contentType := header.Get(xhttp.ContentType) contentType := header.Get(xhttp.ContentType)
if globalIsCompressionEnabled { if !globalIsCompressionEnabled {
// We strictly disable compression for standard extensions/content-types (`compressed`). return true
if hasStringSuffixInSlice(objStr, standardExcludeCompressExtensions) || hasPattern(standardExcludeCompressContentTypes, contentType) { }
return true
} // We strictly disable compression for standard extensions/content-types (`compressed`).
// Filter compression includes. if hasStringSuffixInSlice(objStr, standardExcludeCompressExtensions) || hasPattern(standardExcludeCompressContentTypes, contentType) {
if len(globalCompressExtensions) > 0 || len(globalCompressMimeTypes) > 0 { return true
extensions := globalCompressExtensions }
mimeTypes := globalCompressMimeTypes
if hasStringSuffixInSlice(objStr, extensions) || hasPattern(mimeTypes, contentType) { // Filter compression includes.
return false if len(globalCompressExtensions) == 0 || len(globalCompressMimeTypes) == 0 {
} return false
return true }
}
extensions := globalCompressExtensions
mimeTypes := globalCompressMimeTypes
if hasStringSuffixInSlice(objStr, extensions) || hasPattern(mimeTypes, contentType) {
return false return false
} }
return true return true
} }
// Utility which returns if a string is present in the list. // Utility which returns if a string is present in the list.
// Comparison is case insensitive.
func hasStringSuffixInSlice(str string, list []string) bool { func hasStringSuffixInSlice(str string, list []string) bool {
str = strings.ToLower(str)
for _, v := range list { for _, v := range list {
if strings.HasSuffix(str, v) { if strings.HasSuffix(str, strings.ToLower(v)) {
return true return true
} }
} }
@ -413,7 +441,7 @@ func getPartFile(entries []string, partNumber int, etag string) string {
return "" return ""
} }
// Returs the compressed offset which should be skipped. // Returns the compressed offset which should be skipped.
func getCompressedOffsets(objectInfo ObjectInfo, offset int64) (int64, int64) { func getCompressedOffsets(objectInfo ObjectInfo, offset int64) (int64, int64) {
var compressedOffset int64 var compressedOffset int64
var skipLength int64 var skipLength int64
@ -494,7 +522,10 @@ func NewGetObjectReader(rs *HTTPRangeSpec, oi ObjectInfo, pcfn CheckCopyPrecondi
}() }()
isEncrypted := crypto.IsEncrypted(oi.UserDefined) isEncrypted := crypto.IsEncrypted(oi.UserDefined)
isCompressed := oi.IsCompressed() isCompressed, err := oi.IsCompressedOK()
if err != nil {
return nil, 0, 0, err
}
var skipLen int64 var skipLen int64
// Calculate range to read (different for // Calculate range to read (different for
// e.g. encrypted/compressed objects) // e.g. encrypted/compressed objects)
@ -575,7 +606,7 @@ func NewGetObjectReader(rs *HTTPRangeSpec, oi ObjectInfo, pcfn CheckCopyPrecondi
if err != nil { if err != nil {
return nil, 0, 0, err return nil, 0, 0, err
} }
// Incase of range based queries on multiparts, the offset and length are reduced. // In case of range based queries on multiparts, the offset and length are reduced.
off, decOff = getCompressedOffsets(oi, off) off, decOff = getCompressedOffsets(oi, off)
decLength = length decLength = length
length = oi.Size - off length = oi.Size - off
@ -602,10 +633,23 @@ func NewGetObjectReader(rs *HTTPRangeSpec, oi ObjectInfo, pcfn CheckCopyPrecondi
} }
} }
// Decompression reader. // Decompression reader.
snappyReader := snappy.NewReader(inputReader) s2Reader := s2.NewReader(inputReader)
// Apply the skipLen and limit on the // Apply the skipLen and limit on the decompressed stream.
// decompressed stream err = s2Reader.Skip(decOff)
decReader := io.LimitReader(ioutil.NewSkipReader(snappyReader, decOff), decLength) if err != nil {
return nil, err
}
decReader := io.LimitReader(s2Reader, decLength)
if decLength > compReadAheadSize {
rah, err := readahead.NewReaderSize(decReader, compReadAheadBuffers, compReadAheadBufSize)
if err == nil {
decReader = rah
cFns = append(cFns, func() {
rah.Close()
})
}
}
oi.Size = decLength oi.Size = decLength
// Assemble the GetObjectReader // Assemble the GetObjectReader
@ -760,55 +804,29 @@ func CleanMinioInternalMetadataKeys(metadata map[string]string) map[string]strin
return newMeta return newMeta
} }
// snappyCompressReader compresses data as it reads // newS2CompressReader will read data from r, compress it and return the compressed data as a Reader.
// from the underlying io.Reader. // Use Close to ensure resources are released on incomplete streams.
type snappyCompressReader struct { func newS2CompressReader(r io.Reader) io.ReadCloser {
r io.Reader pr, pw := io.Pipe()
w *snappy.Writer comp := s2.NewWriter(pw)
closed bool // Copy input to compressor
buf bytes.Buffer go func() {
} _, err := io.Copy(comp, r)
func newSnappyCompressReader(r io.Reader) *snappyCompressReader {
cr := &snappyCompressReader{r: r}
cr.w = snappy.NewBufferedWriter(&cr.buf)
return cr
}
func (cr *snappyCompressReader) Read(p []byte) (int, error) {
if cr.closed {
// if snappy writer is closed r has been completely read,
// return any remaining data in buf.
return cr.buf.Read(p)
}
// read from original using p as buffer
nr, readErr := cr.r.Read(p)
// write read bytes to snappy writer
nw, err := cr.w.Write(p[:nr])
if err != nil {
return 0, err
}
if nw != nr {
return 0, io.ErrShortWrite
}
// if last of data from reader, close snappy writer to flush
if readErr == io.EOF {
err := cr.w.Close()
cr.closed = true
if err != nil { if err != nil {
return 0, err comp.Close()
pw.CloseWithError(err)
return
} }
} // Close the stream.
err = comp.Close()
// read compressed bytes out of buf if err != nil {
n, err := cr.buf.Read(p) pw.CloseWithError(err)
if readErr != io.EOF && (err == nil || err == io.EOF) { return
err = readErr }
} // Everything ok, do regular close.
return n, err pw.Close()
}()
return pr
} }
// Returns error if the cancelCh has been closed (indicating that S3 client has disconnected) // Returns error if the cancelCh has been closed (indicating that S3 client has disconnected)

View File

@ -1,5 +1,5 @@
/* /*
* MinIO Cloud Storage, (C) 2016 MinIO, Inc. * MinIO Cloud Storage, (C) 2016-2019 MinIO, Inc.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@ -21,9 +21,11 @@ import (
"io" "io"
"net/http" "net/http"
"reflect" "reflect"
"strconv"
"testing" "testing"
"github.com/golang/snappy" "github.com/klauspost/compress/s2"
"github.com/minio/minio/cmd/crypto"
) )
// Tests validate bucket name. // Tests validate bucket name.
@ -298,10 +300,11 @@ func TestIsCompressed(t *testing.T) {
testCases := []struct { testCases := []struct {
objInfo ObjectInfo objInfo ObjectInfo
result bool result bool
err bool
}{ }{
{ {
objInfo: ObjectInfo{ objInfo: ObjectInfo{
UserDefined: map[string]string{"X-Minio-Internal-compression": "golang/snappy/LZ77", UserDefined: map[string]string{"X-Minio-Internal-compression": compressionAlgorithmV1,
"content-type": "application/octet-stream", "content-type": "application/octet-stream",
"etag": "b3ff3ef3789147152fbfbc50efba4bfd-2"}, "etag": "b3ff3ef3789147152fbfbc50efba4bfd-2"},
}, },
@ -309,7 +312,35 @@ func TestIsCompressed(t *testing.T) {
}, },
{ {
objInfo: ObjectInfo{ objInfo: ObjectInfo{
UserDefined: map[string]string{"X-Minio-Internal-XYZ": "golang/snappy/LZ77", UserDefined: map[string]string{"X-Minio-Internal-compression": compressionAlgorithmV2,
"content-type": "application/octet-stream",
"etag": "b3ff3ef3789147152fbfbc50efba4bfd-2"},
},
result: true,
},
{
objInfo: ObjectInfo{
UserDefined: map[string]string{"X-Minio-Internal-compression": "unknown/compression/type",
"content-type": "application/octet-stream",
"etag": "b3ff3ef3789147152fbfbc50efba4bfd-2"},
},
result: true,
err: true,
},
{
objInfo: ObjectInfo{
UserDefined: map[string]string{"X-Minio-Internal-compression": compressionAlgorithmV2,
"content-type": "application/octet-stream",
"etag": "b3ff3ef3789147152fbfbc50efba4bfd-2",
crypto.SSEIV: "yes",
},
},
result: true,
err: true,
},
{
objInfo: ObjectInfo{
UserDefined: map[string]string{"X-Minio-Internal-XYZ": "klauspost/compress/s2",
"content-type": "application/octet-stream", "content-type": "application/octet-stream",
"etag": "b3ff3ef3789147152fbfbc50efba4bfd-2"}, "etag": "b3ff3ef3789147152fbfbc50efba4bfd-2"},
}, },
@ -324,11 +355,21 @@ func TestIsCompressed(t *testing.T) {
}, },
} }
for i, test := range testCases { for i, test := range testCases {
got := test.objInfo.IsCompressed() t.Run(strconv.Itoa(i), func(t *testing.T) {
if got != test.result { got := test.objInfo.IsCompressed()
t.Errorf("Test %d - expected %v but received %v", if got != test.result {
i+1, test.result, got) t.Errorf("IsCompressed: Expected %v but received %v",
} test.result, got)
}
got, gErr := test.objInfo.IsCompressedOK()
if got != test.result {
t.Errorf("IsCompressedOK: Expected %v but received %v",
test.result, got)
}
if gErr != nil != test.err {
t.Errorf("IsCompressedOK: want error: %t, got error: %v", test.err, gErr)
}
})
} }
} }
@ -367,6 +408,13 @@ func TestExcludeForCompression(t *testing.T) {
}, },
result: false, result: false,
}, },
{
object: "object",
header: http.Header{
"Content-Type": []string{"text/something"},
},
result: false,
},
} }
for i, test := range testCases { for i, test := range testCases {
globalIsCompressionEnabled = true globalIsCompressionEnabled = true
@ -422,7 +470,7 @@ func TestGetActualSize(t *testing.T) {
}{ }{
{ {
objInfo: ObjectInfo{ objInfo: ObjectInfo{
UserDefined: map[string]string{"X-Minio-Internal-compression": "golang/snappy/LZ77", UserDefined: map[string]string{"X-Minio-Internal-compression": "klauspost/compress/s2",
"X-Minio-Internal-actual-size": "100000001", "X-Minio-Internal-actual-size": "100000001",
"content-type": "application/octet-stream", "content-type": "application/octet-stream",
"etag": "b3ff3ef3789147152fbfbc50efba4bfd-2"}, "etag": "b3ff3ef3789147152fbfbc50efba4bfd-2"},
@ -441,7 +489,7 @@ func TestGetActualSize(t *testing.T) {
}, },
{ {
objInfo: ObjectInfo{ objInfo: ObjectInfo{
UserDefined: map[string]string{"X-Minio-Internal-compression": "golang/snappy/LZ77", UserDefined: map[string]string{"X-Minio-Internal-compression": "klauspost/compress/s2",
"X-Minio-Internal-actual-size": "841", "X-Minio-Internal-actual-size": "841",
"content-type": "application/octet-stream", "content-type": "application/octet-stream",
"etag": "b3ff3ef3789147152fbfbc50efba4bfd-2"}, "etag": "b3ff3ef3789147152fbfbc50efba4bfd-2"},
@ -451,7 +499,7 @@ func TestGetActualSize(t *testing.T) {
}, },
{ {
objInfo: ObjectInfo{ objInfo: ObjectInfo{
UserDefined: map[string]string{"X-Minio-Internal-compression": "golang/snappy/LZ77", UserDefined: map[string]string{"X-Minio-Internal-compression": "klauspost/compress/s2",
"content-type": "application/octet-stream", "content-type": "application/octet-stream",
"etag": "b3ff3ef3789147152fbfbc50efba4bfd-2"}, "etag": "b3ff3ef3789147152fbfbc50efba4bfd-2"},
Parts: []ObjectPartInfo{}, Parts: []ObjectPartInfo{},
@ -540,7 +588,7 @@ func TestGetCompressedOffsets(t *testing.T) {
} }
} }
func TestSnappyCompressReader(t *testing.T) { func TestS2CompressReader(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
data []byte data []byte
@ -554,7 +602,8 @@ func TestSnappyCompressReader(t *testing.T) {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
buf := make([]byte, 100) // make small buffer to ensure multiple reads are required for large case buf := make([]byte, 100) // make small buffer to ensure multiple reads are required for large case
r := newSnappyCompressReader(bytes.NewReader(tt.data)) r := newS2CompressReader(bytes.NewReader(tt.data))
defer r.Close()
var rdrBuf bytes.Buffer var rdrBuf bytes.Buffer
_, err := io.CopyBuffer(&rdrBuf, r, buf) _, err := io.CopyBuffer(&rdrBuf, r, buf)
@ -563,7 +612,7 @@ func TestSnappyCompressReader(t *testing.T) {
} }
var stdBuf bytes.Buffer var stdBuf bytes.Buffer
w := snappy.NewBufferedWriter(&stdBuf) w := s2.NewWriter(&stdBuf)
_, err = io.CopyBuffer(w, bytes.NewReader(tt.data), buf) _, err = io.CopyBuffer(w, bytes.NewReader(tt.data), buf)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -582,7 +631,7 @@ func TestSnappyCompressReader(t *testing.T) {
} }
var decBuf bytes.Buffer var decBuf bytes.Buffer
decRdr := snappy.NewReader(&rdrBuf) decRdr := s2.NewReader(&rdrBuf)
_, err = io.Copy(&decBuf, decRdr) _, err = io.Copy(&decBuf, decRdr)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)

View File

@ -61,6 +61,7 @@ var supportedHeadGetReqParams = map[string]string{
const ( const (
compressionAlgorithmV1 = "golang/snappy/LZ77" compressionAlgorithmV1 = "golang/snappy/LZ77"
compressionAlgorithmV2 = "klauspost/compress/s2"
) )
// setHeadGetRespHeaders - set any requested parameters as response headers. // setHeadGetRespHeaders - set any requested parameters as response headers.
@ -800,13 +801,15 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re
if isCompressed { if isCompressed {
compressMetadata = make(map[string]string, 2) compressMetadata = make(map[string]string, 2)
// Preserving the compression metadata. // Preserving the compression metadata.
compressMetadata[ReservedMetadataPrefix+"compression"] = compressionAlgorithmV1 compressMetadata[ReservedMetadataPrefix+"compression"] = compressionAlgorithmV2
compressMetadata[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(actualSize, 10) compressMetadata[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(actualSize, 10)
// Remove all source encrypted related metadata to // Remove all source encrypted related metadata to
// avoid copying them in target object. // avoid copying them in target object.
crypto.RemoveInternalEntries(srcInfo.UserDefined) crypto.RemoveInternalEntries(srcInfo.UserDefined)
reader = newSnappyCompressReader(gr) s2c := newS2CompressReader(gr)
defer s2c.Close()
reader = s2c
length = -1 length = -1
} else { } else {
// Remove the metadata for remote calls. // Remove the metadata for remote calls.
@ -1175,7 +1178,7 @@ func (api objectAPIHandlers) PutObjectHandler(w http.ResponseWriter, r *http.Req
if objectAPI.IsCompressionSupported() && isCompressible(r.Header, object) && size > 0 { if objectAPI.IsCompressionSupported() && isCompressible(r.Header, object) && size > 0 {
// Storing the compression metadata. // Storing the compression metadata.
metadata[ReservedMetadataPrefix+"compression"] = compressionAlgorithmV1 metadata[ReservedMetadataPrefix+"compression"] = compressionAlgorithmV2
metadata[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(size, 10) metadata[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(size, 10)
actualReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize, globalCLIContext.StrictS3Compat) actualReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize, globalCLIContext.StrictS3Compat)
@ -1185,7 +1188,9 @@ func (api objectAPIHandlers) PutObjectHandler(w http.ResponseWriter, r *http.Req
} }
// Set compression metrics. // Set compression metrics.
reader = newSnappyCompressReader(actualReader) s2c := newS2CompressReader(actualReader)
defer s2c.Close()
reader = s2c
size = -1 // Since compressed size is un-predictable. size = -1 // Since compressed size is un-predictable.
md5hex = "" // Do not try to verify the content. md5hex = "" // Do not try to verify the content.
sha256hex = "" sha256hex = ""
@ -1389,7 +1394,7 @@ func (api objectAPIHandlers) NewMultipartUploadHandler(w http.ResponseWriter, r
if objectAPI.IsCompressionSupported() && isCompressible(r.Header, object) { if objectAPI.IsCompressionSupported() && isCompressible(r.Header, object) {
// Storing the compression metadata. // Storing the compression metadata.
metadata[ReservedMetadataPrefix+"compression"] = compressionAlgorithmV1 metadata[ReservedMetadataPrefix+"compression"] = compressionAlgorithmV2
} }
opts, err = putOpts(ctx, r, bucket, object, metadata) opts, err = putOpts(ctx, r, bucket, object, metadata)
@ -1632,7 +1637,9 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt
isCompressed := compressPart isCompressed := compressPart
// Compress only if the compression is enabled during initial multipart. // Compress only if the compression is enabled during initial multipart.
if isCompressed { if isCompressed {
reader = newSnappyCompressReader(gr) s2c := newS2CompressReader(gr)
defer s2c.Close()
reader = s2c
length = -1 length = -1
} else { } else {
reader = gr reader = gr
@ -1872,7 +1879,9 @@ func (api objectAPIHandlers) PutObjectPartHandler(w http.ResponseWriter, r *http
} }
// Set compression metrics. // Set compression metrics.
reader = newSnappyCompressReader(actualReader) s2c := newS2CompressReader(actualReader)
defer s2c.Close()
reader = s2c
size = -1 // Since compressed size is un-predictable. size = -1 // Since compressed size is un-predictable.
md5hex = "" // Do not try to verify the content. md5hex = "" // Do not try to verify the content.
sha256hex = "" sha256hex = ""

View File

@ -129,6 +129,10 @@ func setupTestReadDirGeneric(t *testing.T) (testResults []result) {
// Test to read non-empty directory with symlinks. // Test to read non-empty directory with symlinks.
func setupTestReadDirSymlink(t *testing.T) (testResults []result) { func setupTestReadDirSymlink(t *testing.T) (testResults []result) {
if runtime.GOOS != "Windows" {
t.Log("symlinks not available on windows")
return nil
}
dir := mustSetupDir(t) dir := mustSetupDir(t)
entries := []string{} entries := []string{}
for i := 0; i < 10; i++ { for i := 0; i < 10; i++ {

View File

@ -20,16 +20,14 @@ import (
"bufio" "bufio"
"bytes" "bytes"
"crypto/tls" "crypto/tls"
"encoding/gob"
"encoding/hex"
"fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"net/url" "net/url"
"path" "path"
"strconv" "strconv"
"encoding/gob"
"encoding/hex"
"fmt"
"strings" "strings"
"github.com/minio/minio/cmd/http" "github.com/minio/minio/cmd/http"

View File

@ -358,10 +358,12 @@ func (s *storageRESTServer) ReadFileStreamHandler(w http.ResponseWriter, r *http
return return
} }
defer rc.Close() defer rc.Close()
w.Header().Set(xhttp.ContentLength, strconv.Itoa(length)) w.Header().Set(xhttp.ContentLength, strconv.Itoa(length))
io.Copy(w, rc) io.Copy(w, rc)
w.(http.Flusher).Flush() w.(http.Flusher).Flush()
} }
// readMetadata func provides the function types for reading leaf metadata. // readMetadata func provides the function types for reading leaf metadata.

View File

@ -17,7 +17,6 @@
package cmd package cmd
import ( import (
"archive/zip"
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
@ -29,13 +28,12 @@ import (
"runtime" "runtime"
"strconv" "strconv"
"strings" "strings"
"sync"
"time" "time"
humanize "github.com/dustin/go-humanize" "github.com/dustin/go-humanize"
snappy "github.com/golang/snappy"
"github.com/gorilla/mux" "github.com/gorilla/mux"
"github.com/gorilla/rpc/v2/json2" "github.com/gorilla/rpc/v2/json2"
"github.com/klauspost/compress/zip"
miniogopolicy "github.com/minio/minio-go/v6/pkg/policy" miniogopolicy "github.com/minio/minio-go/v6/pkg/policy"
"github.com/minio/minio-go/v6/pkg/s3utils" "github.com/minio/minio-go/v6/pkg/s3utils"
"github.com/minio/minio-go/v6/pkg/set" "github.com/minio/minio-go/v6/pkg/set"
@ -995,7 +993,7 @@ func (web *webAPIHandlers) Upload(w http.ResponseWriter, r *http.Request) {
} }
if objectAPI.IsCompressionSupported() && isCompressible(r.Header, object) && size > 0 { if objectAPI.IsCompressionSupported() && isCompressible(r.Header, object) && size > 0 {
// Storing the compression metadata. // Storing the compression metadata.
metadata[ReservedMetadataPrefix+"compression"] = compressionAlgorithmV1 metadata[ReservedMetadataPrefix+"compression"] = compressionAlgorithmV2
metadata[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(size, 10) metadata[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(size, 10)
actualReader, err := hash.NewReader(reader, size, "", "", actualSize, globalCLIContext.StrictS3Compat) actualReader, err := hash.NewReader(reader, size, "", "", actualSize, globalCLIContext.StrictS3Compat)
@ -1006,7 +1004,9 @@ func (web *webAPIHandlers) Upload(w http.ResponseWriter, r *http.Request) {
// Set compression metrics. // Set compression metrics.
size = -1 // Since compressed size is un-predictable. size = -1 // Since compressed size is un-predictable.
reader = newSnappyCompressReader(actualReader) s2c := newS2CompressReader(actualReader)
defer s2c.Close()
reader = s2c
hashReader, err = hash.NewReader(reader, size, "", "", actualSize, globalCLIContext.StrictS3Compat) hashReader, err = hash.NewReader(reader, size, "", "", actualSize, globalCLIContext.StrictS3Compat)
if err != nil { if err != nil {
writeWebErrorResponse(w, err) writeWebErrorResponse(w, err)
@ -1234,7 +1234,6 @@ func (web *webAPIHandlers) DownloadZip(w http.ResponseWriter, r *http.Request) {
ctx := newContext(r, w, "WebDownloadZip") ctx := newContext(r, w, "WebDownloadZip")
defer logger.AuditLog(w, r, "WebDownloadZip", mustGetClaimsFromToken(r)) defer logger.AuditLog(w, r, "WebDownloadZip", mustGetClaimsFromToken(r))
var wg sync.WaitGroup
objectAPI := web.ObjectAPI() objectAPI := web.ObjectAPI()
if objectAPI == nil { if objectAPI == nil {
writeWebErrorResponse(w, errServerNotInitialized) writeWebErrorResponse(w, errServerNotInitialized)
@ -1306,7 +1305,6 @@ func (web *webAPIHandlers) DownloadZip(w http.ResponseWriter, r *http.Request) {
archive := zip.NewWriter(w) archive := zip.NewWriter(w)
defer archive.Close() defer archive.Close()
var length int64
for _, object := range args.Objects { for _, object := range args.Objects {
// Writes compressed object file to the response. // Writes compressed object file to the response.
zipit := func(objectName string) error { zipit := func(objectName string) error {
@ -1318,58 +1316,28 @@ func (web *webAPIHandlers) DownloadZip(w http.ResponseWriter, r *http.Request) {
defer gr.Close() defer gr.Close()
info := gr.ObjInfo info := gr.ObjInfo
var actualSize int64
if info.IsCompressed() { if info.IsCompressed() {
// Read the decompressed size from the meta.json. // For reporting, set the file size to the uncompressed size.
actualSize = info.GetActualSize() info.Size = info.GetActualSize()
// Set the info.Size to the actualSize.
info.Size = actualSize
} }
header := &zip.FileHeader{ header := &zip.FileHeader{
Name: strings.TrimPrefix(objectName, args.Prefix), Name: strings.TrimPrefix(objectName, args.Prefix),
Method: zip.Deflate, Method: zip.Deflate,
UncompressedSize64: uint64(length),
UncompressedSize: uint32(length),
} }
zipWriter, err := archive.CreateHeader(header) if hasStringSuffixInSlice(info.Name, standardExcludeCompressExtensions) || hasPattern(standardExcludeCompressContentTypes, info.ContentType) {
// We strictly disable compression for standard extensions/content-types.
header.Method = zip.Store
}
writer, err := archive.CreateHeader(header)
if err != nil { if err != nil {
writeWebErrorResponse(w, errUnexpected) writeWebErrorResponse(w, errUnexpected)
return err return err
} }
var writer io.Writer
if info.IsCompressed() {
// Open a pipe for compression
// Where compressWriter is actually passed to the getObject
decompressReader, compressWriter := io.Pipe()
snappyReader := snappy.NewReader(decompressReader)
// The limit is set to the actual size.
responseWriter := ioutil.LimitedWriter(zipWriter, 0, actualSize)
wg.Add(1) //For closures.
go func() {
defer wg.Done()
// Finally, writes to the client.
_, perr := io.Copy(responseWriter, snappyReader)
// Close the compressWriter if the data is read already.
// Closing the pipe, releases the writer passed to the getObject.
compressWriter.CloseWithError(perr)
}()
writer = compressWriter
} else {
writer = zipWriter
}
httpWriter := ioutil.WriteOnClose(writer) httpWriter := ioutil.WriteOnClose(writer)
// Write object content to response body // Write object content to response body
if _, err = io.Copy(httpWriter, gr); err != nil { if _, err = io.Copy(httpWriter, gr); err != nil {
httpWriter.Close() httpWriter.Close()
if info.IsCompressed() {
// Wait for decompression go-routine to retire.
wg.Wait()
}
if !httpWriter.HasWritten() { // write error response only if no data or headers has been written to client yet if !httpWriter.HasWritten() { // write error response only if no data or headers has been written to client yet
writeWebErrorResponse(w, err) writeWebErrorResponse(w, err)
} }
@ -1382,10 +1350,6 @@ func (web *webAPIHandlers) DownloadZip(w http.ResponseWriter, r *http.Request) {
return err return err
} }
} }
if info.IsCompressed() {
// Wait for decompression go-routine to retire.
wg.Wait()
}
// Notify object accessed via a GET request. // Notify object accessed via a GET request.
sendEvent(eventArgs{ sendEvent(eventArgs{

View File

@ -1,6 +1,10 @@
# Compression Guide [![Slack](https://slack.min.io/slack?type=svg)](https://slack.min.io) # Compression Guide [![Slack](https://slack.min.io/slack?type=svg)](https://slack.min.io)
MinIO server allows streaming compression to ensure efficient disk space usage. Compression happens inflight, i.e objects are compressed before being written to disk(s). MinIO uses [`golang/snappy`](https://github.com/golang/snappy) streaming compression due to its stability and performance. MinIO server allows streaming compression to ensure efficient disk space usage. Compression happens inflight, i.e objects are compressed before being written to disk(s). MinIO uses [`klauspost/compress/s2`](https://github.com/klauspost/compress/tree/master/s2) streaming compression due to its stability and performance.
This algorithm is specifically optimized for machine generated content. Write throughput is typically at least 300MB/s per CPU core. Decompression speed is typically at least 1GB/s.
This means that in cases where raw IO is below these numbers compression will not only reduce disk usage but also help increase system throughput.
Typically enabling compression on spinning disk systems will increase speed when the content can be compressed.
## Get Started ## Get Started
@ -15,13 +19,26 @@ Compression can be enabled by updating the `compress` config settings for MinIO
```json ```json
"compress": { "compress": {
"enabled": true, "enabled": true,
"extensions": [".txt",".log",".csv", ".json"], "extensions": [".txt",".log",".csv", ".json", ".tar"],
"mime-types": ["text/csv","text/plain","application/json"] "mime-types": ["text/*","application/json","application/xml"]
} }
``` ```
Since text, log, csv, json files are highly compressible, These extensions/mime-types are included by default for compression. Since text, log, csv, json files are highly compressible, These extensions/mime-types are included by default for compression.
Having compression enabled and no extensions or mime types will attempt to compress anything that isn't explicitly known to be already compressed content.
Settings for enabling compression on all content, except for types listed below:
```json
"compress": {
"enabled": true,
"extensions": [],
"mime-types": []
}
```
Incompressible content will be skipped with quite low CPU usage and storage overhead, typically at several GB/s.
To update the configuration, use `mc admin config get` command to get the current configuration file for the minio cluster in json format, and save it locally. To update the configuration, use `mc admin config get` command to get the current configuration file for the minio cluster in json format, and save it locally.
```sh ```sh
@ -69,6 +86,8 @@ export MINIO_COMPRESS_MIMETYPES="application/pdf"
| `application/x-compress` | | `application/x-compress` |
| `application/x-xz` | | `application/x-xz` |
All files with these extensions and mime types are excluded from compression, even if compression is enabled for all types.
- MinIO does not support encryption with compression because compression and encryption together potentially enables room for side channel attacks like [`CRIME and BREACH`](https://blog.minio.io/c-e-compression-encryption-cb6b7f04a369) - MinIO does not support encryption with compression because compression and encryption together potentially enables room for side channel attacks like [`CRIME and BREACH`](https://blog.minio.io/c-e-compression-encryption-cb6b7f04a369)
- MinIO does not support compression for Gateway (Azure/GCS/NAS) implementations. - MinIO does not support compression for Gateway (Azure/GCS/NAS) implementations.

3
go.mod
View File

@ -21,7 +21,6 @@ require (
github.com/fatih/color v1.7.0 github.com/fatih/color v1.7.0
github.com/fatih/structs v1.1.0 github.com/fatih/structs v1.1.0
github.com/go-sql-driver/mysql v1.4.1 github.com/go-sql-driver/mysql v1.4.1
github.com/golang/snappy v0.0.1
github.com/gomodule/redigo v2.0.0+incompatible github.com/gomodule/redigo v2.0.0+incompatible
github.com/gorilla/handlers v1.4.0 github.com/gorilla/handlers v1.4.0
github.com/gorilla/mux v1.7.0 github.com/gorilla/mux v1.7.0
@ -29,7 +28,7 @@ require (
github.com/hashicorp/vault v1.1.0 github.com/hashicorp/vault v1.1.0
github.com/inconshreveable/go-update v0.0.0-20160112193335-8152e7eb6ccf github.com/inconshreveable/go-update v0.0.0-20160112193335-8152e7eb6ccf
github.com/json-iterator/go v1.1.7 github.com/json-iterator/go v1.1.7
github.com/klauspost/compress v1.5.0 github.com/klauspost/compress v1.8.3
github.com/klauspost/pgzip v1.2.1 github.com/klauspost/pgzip v1.2.1
github.com/klauspost/readahead v1.3.0 github.com/klauspost/readahead v1.3.0
github.com/klauspost/reedsolomon v1.9.1 github.com/klauspost/reedsolomon v1.9.1

2
go.sum
View File

@ -340,6 +340,8 @@ github.com/klauspost/compress v1.3.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.5.0 h1:iDac0ZKbmSA4PRrRuXXjZL8C7UoJan8oBYxXkMzEQrI= github.com/klauspost/compress v1.5.0 h1:iDac0ZKbmSA4PRrRuXXjZL8C7UoJan8oBYxXkMzEQrI=
github.com/klauspost/compress v1.5.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.5.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.8.3 h1:CkLseiEYMM/fRb0RIg9mXB+Iwgmle+U9KGFu+JCO4Ec=
github.com/klauspost/compress v1.8.3/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/cpuid v0.0.0-20160106104451-349c67577817/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v0.0.0-20160106104451-349c67577817/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.1 h1:vJi+O/nMdFt0vqm8NZBI6wzALWdA2X+egi0ogNyrC/w= github.com/klauspost/cpuid v1.2.1 h1:vJi+O/nMdFt0vqm8NZBI6wzALWdA2X+egi0ogNyrC/w=