Add support for caching multipart in writethrough mode (#13507)

This commit is contained in:
Poorna K 2021-11-01 11:11:58 -04:00 committed by GitHub
parent 6d53e3c2d7
commit 15dcacc1fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 975 additions and 91 deletions

View File

@ -29,6 +29,8 @@ import (
"io/ioutil" "io/ioutil"
"net/http" "net/http"
"os" "os"
"path"
"strconv"
"strings" "strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
@ -39,6 +41,7 @@ import (
"github.com/minio/minio/internal/crypto" "github.com/minio/minio/internal/crypto"
"github.com/minio/minio/internal/disk" "github.com/minio/minio/internal/disk"
"github.com/minio/minio/internal/fips" "github.com/minio/minio/internal/fips"
"github.com/minio/minio/internal/hash"
xhttp "github.com/minio/minio/internal/http" xhttp "github.com/minio/minio/internal/http"
xioutil "github.com/minio/minio/internal/ioutil" xioutil "github.com/minio/minio/internal/ioutil"
"github.com/minio/minio/internal/kms" "github.com/minio/minio/internal/kms"
@ -48,13 +51,18 @@ import (
const ( const (
// cache.json object metadata for cached objects. // cache.json object metadata for cached objects.
cacheMetaJSONFile = "cache.json" cacheMetaJSONFile = "cache.json"
cacheDataFile = "part.1" cacheDataFile = "part.1"
cacheMetaVersion = "1.0.0" cacheDataFilePrefix = "part"
cacheExpiryDays = 90 * time.Hour * 24 // defaults to 90 days
cacheMetaVersion = "1.0.0"
cacheExpiryDays = 90 * time.Hour * 24 // defaults to 90 days
// SSECacheEncrypted is the metadata key indicating that the object // SSECacheEncrypted is the metadata key indicating that the object
// is a cache entry encrypted with cache KMS master key in globalCacheKMS. // is a cache entry encrypted with cache KMS master key in globalCacheKMS.
SSECacheEncrypted = "X-Minio-Internal-Encrypted-Cache" SSECacheEncrypted = "X-Minio-Internal-Encrypted-Cache"
cacheMultipartDir = "multipart"
cacheStaleUploadCleanupInterval = time.Hour * 24
cacheStaleUploadExpiry = time.Hour * 24
) )
// CacheChecksumInfoV1 - carries checksums of individual blocks on disk. // CacheChecksumInfoV1 - carries checksums of individual blocks on disk.
@ -78,6 +86,11 @@ type cacheMeta struct {
Hits int `json:"hits,omitempty"` Hits int `json:"hits,omitempty"`
Bucket string `json:"bucket,omitempty"` Bucket string `json:"bucket,omitempty"`
Object string `json:"object,omitempty"` Object string `json:"object,omitempty"`
// for multipart upload
PartNumbers []int `json:"partNums,omitempty"` // Part Numbers
PartETags []string `json:"partETags,omitempty"` // Part ETags
PartSizes []int64 `json:"partSizes,omitempty"` // Part Sizes
PartActualSizes []int64 `json:"partASizes,omitempty"` // Part ActualSizes (compression)
} }
// RangeInfo has the range, file and range length information for a cached range. // RangeInfo has the range, file and range length information for a cached range.
@ -104,13 +117,13 @@ func (m *cacheMeta) ToObjectInfo(bucket, object string) (o ObjectInfo) {
CacheStatus: CacheHit, CacheStatus: CacheHit,
CacheLookupStatus: CacheHit, CacheLookupStatus: CacheHit,
} }
meta := cloneMSS(m.Meta)
// We set file info only if its valid. // We set file info only if its valid.
o.Size = m.Stat.Size o.Size = m.Stat.Size
o.ETag = extractETag(m.Meta) o.ETag = extractETag(meta)
o.ContentType = m.Meta["content-type"] o.ContentType = meta["content-type"]
o.ContentEncoding = m.Meta["content-encoding"] o.ContentEncoding = meta["content-encoding"]
if storageClass, ok := m.Meta[xhttp.AmzStorageClass]; ok { if storageClass, ok := meta[xhttp.AmzStorageClass]; ok {
o.StorageClass = storageClass o.StorageClass = storageClass
} else { } else {
o.StorageClass = globalMinioDefaultStorageClass o.StorageClass = globalMinioDefaultStorageClass
@ -119,20 +132,26 @@ func (m *cacheMeta) ToObjectInfo(bucket, object string) (o ObjectInfo) {
t time.Time t time.Time
e error e error
) )
if exp, ok := m.Meta["expires"]; ok { if exp, ok := meta["expires"]; ok {
if t, e = time.Parse(http.TimeFormat, exp); e == nil { if t, e = time.Parse(http.TimeFormat, exp); e == nil {
o.Expires = t.UTC() o.Expires = t.UTC()
} }
} }
if mtime, ok := m.Meta["last-modified"]; ok { if mtime, ok := meta["last-modified"]; ok {
if t, e = time.Parse(http.TimeFormat, mtime); e == nil { if t, e = time.Parse(http.TimeFormat, mtime); e == nil {
o.ModTime = t.UTC() o.ModTime = t.UTC()
} }
} }
o.Parts = make([]ObjectPartInfo, len(m.PartNumbers))
for i := range m.PartNumbers {
o.Parts[i].Number = m.PartNumbers[i]
o.Parts[i].Size = m.PartSizes[i]
o.Parts[i].ETag = m.PartETags[i]
o.Parts[i].ActualSize = m.PartActualSizes[i]
}
// etag/md5Sum has already been extracted. We need to // etag/md5Sum has already been extracted. We need to
// remove to avoid it from appearing as part of user-defined metadata // remove to avoid it from appearing as part of user-defined metadata
o.UserDefined = cleanMetadata(m.Meta) o.UserDefined = cleanMetadata(meta)
return o return o
} }
@ -142,16 +161,18 @@ type diskCache struct {
online uint32 // ref: https://golang.org/pkg/sync/atomic/#pkg-note-BUG online uint32 // ref: https://golang.org/pkg/sync/atomic/#pkg-note-BUG
purgeRunning int32 purgeRunning int32
triggerGC chan struct{} triggerGC chan struct{}
dir string // caching directory dir string // caching directory
stats CacheDiskStats // disk cache stats for prometheus stats CacheDiskStats // disk cache stats for prometheus
quotaPct int // max usage in % quotaPct int // max usage in %
pool sync.Pool pool sync.Pool
after int // minimum accesses before an object is cached. after int // minimum accesses before an object is cached.
lowWatermark int lowWatermark int
highWatermark int highWatermark int
enableRange bool enableRange bool
commitWriteback bool commitWriteback bool
commitWritethrough bool
retryWritebackCh chan ObjectInfo retryWritebackCh chan ObjectInfo
// nsMutex namespace lock // nsMutex namespace lock
nsMutex *nsLockMap nsMutex *nsLockMap
@ -170,15 +191,17 @@ func newDiskCache(ctx context.Context, dir string, config cache.Config) (*diskCa
return nil, fmt.Errorf("Unable to initialize '%s' dir, %w", dir, err) return nil, fmt.Errorf("Unable to initialize '%s' dir, %w", dir, err)
} }
cache := diskCache{ cache := diskCache{
dir: dir, dir: dir,
triggerGC: make(chan struct{}, 1), triggerGC: make(chan struct{}, 1),
stats: CacheDiskStats{Dir: dir}, stats: CacheDiskStats{Dir: dir},
quotaPct: quotaPct, quotaPct: quotaPct,
after: config.After, after: config.After,
lowWatermark: config.WatermarkLow, lowWatermark: config.WatermarkLow,
highWatermark: config.WatermarkHigh, highWatermark: config.WatermarkHigh,
enableRange: config.Range, enableRange: config.Range,
commitWriteback: config.CommitWriteback, commitWriteback: config.CacheCommitMode == CommitWriteBack,
commitWritethrough: config.CacheCommitMode == CommitWriteThrough,
retryWritebackCh: make(chan ObjectInfo, 10000), retryWritebackCh: make(chan ObjectInfo, 10000),
online: 1, online: 1,
pool: sync.Pool{ pool: sync.Pool{
@ -190,6 +213,7 @@ func newDiskCache(ctx context.Context, dir string, config cache.Config) (*diskCa
nsMutex: newNSLock(false), nsMutex: newNSLock(false),
} }
go cache.purgeWait(ctx) go cache.purgeWait(ctx)
go cache.cleanupStaleUploads(ctx)
if cache.commitWriteback { if cache.commitWriteback {
go cache.scanCacheWritebackFailures(ctx) go cache.scanCacheWritebackFailures(ctx)
} }
@ -303,16 +327,11 @@ func (c *diskCache) purge(ctx context.Context) {
// ignore error we know what value we are passing. // ignore error we know what value we are passing.
scorer, _ := newFileScorer(toFree, time.Now().Unix(), 100) scorer, _ := newFileScorer(toFree, time.Now().Unix(), 100)
// this function returns FileInfo for cached range files and cache data file. // this function returns FileInfo for cached range files.
fiStatFn := func(ranges map[string]string, dataFile, pathPrefix string) map[string]os.FileInfo { fiStatRangesFn := func(ranges map[string]string, pathPrefix string) map[string]os.FileInfo {
fm := make(map[string]os.FileInfo) fm := make(map[string]os.FileInfo)
fname := pathJoin(pathPrefix, dataFile)
if fi, err := os.Stat(fname); err == nil {
fm[fname] = fi
}
for _, rngFile := range ranges { for _, rngFile := range ranges {
fname = pathJoin(pathPrefix, rngFile) fname := pathJoin(pathPrefix, rngFile)
if fi, err := os.Stat(fname); err == nil { if fi, err := os.Stat(fname); err == nil {
fm[fname] = fi fm[fname] = fi
} }
@ -320,6 +339,26 @@ func (c *diskCache) purge(ctx context.Context) {
return fm return fm
} }
// this function returns most recent Atime among cached part files.
lastAtimeFn := func(partNums []int, pathPrefix string) time.Time {
lastATime := timeSentinel
for _, pnum := range partNums {
fname := pathJoin(pathPrefix, fmt.Sprintf("%s.%d", cacheDataFilePrefix, pnum))
if fi, err := os.Stat(fname); err == nil {
if atime.Get(fi).After(lastATime) {
lastATime = atime.Get(fi)
}
}
}
if len(partNums) == 0 {
fname := pathJoin(pathPrefix, cacheDataFile)
if fi, err := os.Stat(fname); err == nil {
lastATime = atime.Get(fi)
}
}
return lastATime
}
filterFn := func(name string, typ os.FileMode) error { filterFn := func(name string, typ os.FileMode) error {
if name == minioMetaBucket { if name == minioMetaBucket {
// Proceed to next file. // Proceed to next file.
@ -334,9 +373,10 @@ func (c *diskCache) purge(ctx context.Context) {
// Proceed to next file. // Proceed to next file.
return nil return nil
} }
// get last access time of cache part files
// stat all cached file ranges and cacheDataFile. lastAtime := lastAtimeFn(meta.PartNumbers, pathJoin(c.dir, name))
cachedFiles := fiStatFn(meta.Ranges, cacheDataFile, pathJoin(c.dir, name)) // stat all cached file ranges.
cachedRngFiles := fiStatRangesFn(meta.Ranges, pathJoin(c.dir, name))
objInfo := meta.ToObjectInfo("", "") objInfo := meta.ToObjectInfo("", "")
// prevent gc from clearing un-synced commits. This metadata is present when // prevent gc from clearing un-synced commits. This metadata is present when
// cache writeback commit setting is enabled. // cache writeback commit setting is enabled.
@ -345,7 +385,27 @@ func (c *diskCache) purge(ctx context.Context) {
return nil return nil
} }
cc := cacheControlOpts(objInfo) cc := cacheControlOpts(objInfo)
for fname, fi := range cachedFiles { switch {
case cc != nil:
if cc.isStale(objInfo.ModTime) {
if err = removeAll(cacheDir); err != nil {
logger.LogIf(ctx, err)
}
scorer.adjustSaveBytes(-objInfo.Size)
// break early if sufficient disk space reclaimed.
if c.diskUsageLow() {
// if we found disk usage is already low, we return nil filtering is complete.
return errDoneForNow
}
}
case lastAtime != timeSentinel:
// cached multipart or single part
objInfo.AccTime = lastAtime
objInfo.Name = pathJoin(c.dir, name, cacheDataFile)
scorer.addFileWithObjInfo(objInfo, numHits)
}
for fname, fi := range cachedRngFiles {
if cc != nil { if cc != nil {
if cc.isStale(objInfo.ModTime) { if cc.isStale(objInfo.ModTime) {
if err = removeAll(fname); err != nil { if err = removeAll(fname); err != nil {
@ -365,7 +425,7 @@ func (c *diskCache) purge(ctx context.Context) {
} }
// clean up stale cache.json files for objects that never got cached but access count was maintained in cache.json // clean up stale cache.json files for objects that never got cached but access count was maintained in cache.json
fi, err := os.Stat(pathJoin(cacheDir, cacheMetaJSONFile)) fi, err := os.Stat(pathJoin(cacheDir, cacheMetaJSONFile))
if err != nil || (fi.ModTime().Before(expiry) && len(cachedFiles) == 0) { if err != nil || (fi.ModTime().Before(expiry) && len(cachedRngFiles) == 0) {
removeAll(cacheDir) removeAll(cacheDir)
scorer.adjustSaveBytes(-fi.Size()) scorer.adjustSaveBytes(-fi.Size())
// Proceed to next file. // Proceed to next file.
@ -581,8 +641,11 @@ func (c *diskCache) saveMetadata(ctx context.Context, bucket, object string, met
if m.Meta == nil { if m.Meta == nil {
m.Meta = make(map[string]string) m.Meta = make(map[string]string)
} }
if etag, ok := meta["etag"]; ok { // save etag in m.Meta if missing
m.Meta["etag"] = etag if _, ok := m.Meta["etag"]; !ok {
if etag, ok := meta["etag"]; ok {
m.Meta["etag"] = etag
}
} }
} }
m.Hits++ m.Hits++
@ -639,7 +702,7 @@ func (c *diskCache) bitrotWriteToCache(cachePath, fileName string, reader io.Rea
} }
hashBytes := h.Sum(nil) hashBytes := h.Sum(nil)
// compute md5Hash of original data stream if writeback commit to cache // compute md5Hash of original data stream if writeback commit to cache
if c.commitWriteback { if c.commitWriteback || c.commitWritethrough {
if _, err = md5Hash.Write((*bufp)[:n]); err != nil { if _, err = md5Hash.Write((*bufp)[:n]); err != nil {
return 0, "", err return 0, "", err
} }
@ -655,8 +718,9 @@ func (c *diskCache) bitrotWriteToCache(cachePath, fileName string, reader io.Rea
break break
} }
} }
md5sumCurr := md5Hash.Sum(nil)
return bytesWritten, base64.StdEncoding.EncodeToString(md5Hash.Sum(nil)), nil return bytesWritten, base64.StdEncoding.EncodeToString(md5sumCurr), nil
} }
func newCacheEncryptReader(content io.Reader, bucket, object string, metadata map[string]string) (r io.Reader, err error) { func newCacheEncryptReader(content io.Reader, bucket, object string, metadata map[string]string) (r io.Reader, err error) {
@ -897,7 +961,7 @@ func (c *diskCache) bitrotReadFromCache(ctx context.Context, filePath string, of
return err return err
} }
if _, err := io.Copy(writer, bytes.NewReader((*bufp)[blockOffset:blockOffset+blockLength])); err != nil { if _, err = io.Copy(writer, bytes.NewReader((*bufp)[blockOffset:blockOffset+blockLength])); err != nil {
if err != io.ErrClosedPipe { if err != io.ErrClosedPipe {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
return err return err
@ -950,19 +1014,78 @@ func (c *diskCache) Get(ctx context.Context, bucket, object string, rs *HTTPRang
gr, gerr := NewGetObjectReaderFromReader(bytes.NewBuffer(nil), objInfo, opts) gr, gerr := NewGetObjectReaderFromReader(bytes.NewBuffer(nil), objInfo, opts)
return gr, numHits, gerr return gr, numHits, gerr
} }
fn, off, length, nErr := NewGetObjectReader(rs, objInfo, opts) fn, startOffset, length, nErr := NewGetObjectReader(rs, objInfo, opts)
if nErr != nil { if nErr != nil {
return nil, numHits, nErr return nil, numHits, nErr
} }
filePath := pathJoin(cacheObjPath, cacheFile) var totalBytesRead int64
pr, pw := xioutil.WaitPipe() pr, pw := xioutil.WaitPipe()
go func() { if len(objInfo.Parts) > 0 {
err := c.bitrotReadFromCache(ctx, filePath, off, length, pw) // For negative length read everything.
if err != nil { if length < 0 {
removeAll(cacheObjPath) length = objInfo.Size - startOffset
} }
pw.CloseWithError(err)
}() // Reply back invalid range if the input offset and length fall out of range.
if startOffset > objInfo.Size || startOffset+length > objInfo.Size {
logger.LogIf(ctx, InvalidRange{startOffset, length, objInfo.Size}, logger.Application)
return nil, numHits, InvalidRange{startOffset, length, objInfo.Size}
}
// Get start part index and offset.
partIndex, partOffset, err := cacheObjectToPartOffset(objInfo, startOffset)
if err != nil {
return nil, numHits, InvalidRange{startOffset, length, objInfo.Size}
}
// Calculate endOffset according to length
endOffset := startOffset
if length > 0 {
endOffset += length - 1
}
// Get last part index to read given length.
lastPartIndex, _, err := cacheObjectToPartOffset(objInfo, endOffset)
if err != nil {
return nil, numHits, InvalidRange{startOffset, length, objInfo.Size}
}
go func() {
for ; partIndex <= lastPartIndex; partIndex++ {
if length == totalBytesRead {
break
}
partNumber := objInfo.Parts[partIndex].Number
// Save the current part name and size.
partSize := objInfo.Parts[partIndex].Size
partLength := partSize - partOffset
// partLength should be adjusted so that we don't write more data than what was requested.
if partLength > (length - totalBytesRead) {
partLength = length - totalBytesRead
}
filePath := pathJoin(cacheObjPath, fmt.Sprintf("part.%d", partNumber))
err := c.bitrotReadFromCache(ctx, filePath, partOffset, partLength, pw)
if err != nil {
removeAll(cacheObjPath)
pw.CloseWithError(err)
break
}
totalBytesRead += partLength
// partOffset will be valid only for the first part, hence reset it to 0 for
// the remaining parts.
partOffset = 0
} // End of read all parts loop.
pr.CloseWithError(err)
}()
} else {
go func() {
filePath := pathJoin(cacheObjPath, cacheFile)
err := c.bitrotReadFromCache(ctx, filePath, startOffset, length, pw)
if err != nil {
removeAll(cacheObjPath)
}
pw.CloseWithError(err)
}()
}
// Cleanup function to cause the go routine above to exit, in // Cleanup function to cause the go routine above to exit, in
// case of incomplete read. // case of incomplete read.
pipeCloser := func() { pr.CloseWithError(nil) } pipeCloser := func() { pr.CloseWithError(nil) }
@ -980,7 +1103,6 @@ func (c *diskCache) Get(ctx context.Context, bucket, object string, rs *HTTPRang
gr.ObjInfo.Size = objSize gr.ObjInfo.Size = objSize
} }
return gr, numHits, nil return gr, numHits, nil
} }
// Deletes the cached object // Deletes the cached object
@ -1040,3 +1162,397 @@ func (c *diskCache) scanCacheWritebackFailures(ctx context.Context) {
return return
} }
} }
// NewMultipartUpload caches multipart uploads when writethrough is MINIO_CACHE_COMMIT mode
// multiparts are saved in .minio.sys/multipart/cachePath/uploadID dir until finalized. Then the individual parts
// are moved from the upload dir to cachePath/ directory.
func (c *diskCache) NewMultipartUpload(ctx context.Context, bucket, object, uID string, opts ObjectOptions) (uploadID string, err error) {
uploadID = uID
if uploadID == "" {
return "", InvalidUploadID{
Bucket: bucket,
Object: object,
UploadID: uploadID,
}
}
cachePath := getMultipartCacheSHADir(c.dir, bucket, object)
uploadIDDir := path.Join(cachePath, uploadID)
if err := os.MkdirAll(uploadIDDir, 0777); err != nil {
return uploadID, err
}
metaPath := pathJoin(uploadIDDir, cacheMetaJSONFile)
f, err := os.OpenFile(metaPath, os.O_RDWR|os.O_CREATE, 0666)
if err != nil {
return uploadID, err
}
defer f.Close()
m := &cacheMeta{
Version: cacheMetaVersion,
Bucket: bucket,
Object: object,
}
if err := jsonLoad(f, m); err != nil && err != io.EOF {
return uploadID, err
}
m.Meta = opts.UserDefined
m.Meta[ReservedMetadataPrefix+"Encrypted-Multipart"] = ""
m.Checksum = CacheChecksumInfoV1{Algorithm: HighwayHash256S.String(), Blocksize: cacheBlkSize}
if c.commitWriteback {
m.Meta[writeBackStatusHeader] = CommitPending.String()
}
m.Stat.ModTime = UTCNow()
if globalCacheKMS != nil {
if _, err := newCacheEncryptMetadata(bucket, object, m.Meta); err != nil {
return uploadID, err
}
}
err = jsonSave(f, m)
return uploadID, err
}
// PutObjectPart caches part to cache multipart path.
func (c *diskCache) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, data io.Reader, size int64, opts ObjectOptions) (partInfo PartInfo, err error) {
oi := PartInfo{}
if !c.diskSpaceAvailable(size) {
io.Copy(ioutil.Discard, data)
return oi, errDiskFull
}
cachePath := getMultipartCacheSHADir(c.dir, bucket, object)
uploadIDDir := path.Join(cachePath, uploadID)
partIDLock := c.NewNSLockFn(pathJoin(uploadIDDir, strconv.Itoa(partID)))
lkctx, err := partIDLock.GetLock(ctx, globalOperationTimeout)
if err != nil {
return oi, err
}
ctx = lkctx.Context()
defer partIDLock.Unlock(lkctx.Cancel)
meta, _, _, err := c.statCache(ctx, uploadIDDir)
// Case where object not yet cached
if err != nil {
return oi, err
}
if !c.diskSpaceAvailable(size) {
return oi, errDiskFull
}
reader := data
var actualSize = uint64(size)
if globalCacheKMS != nil {
reader, err = newCachePartEncryptReader(ctx, bucket, object, partID, data, size, meta.Meta)
if err != nil {
return oi, err
}
actualSize, _ = sio.EncryptedSize(uint64(size))
}
n, md5sum, err := c.bitrotWriteToCache(uploadIDDir, fmt.Sprintf("part.%d", partID), reader, actualSize)
if IsErr(err, baseErrs...) {
// take the cache drive offline
c.setOffline()
}
if err != nil {
return oi, err
}
if actualSize != uint64(n) {
return oi, IncompleteBody{Bucket: bucket, Object: object}
}
var md5hex string
if md5bytes, err := base64.StdEncoding.DecodeString(md5sum); err == nil {
md5hex = hex.EncodeToString(md5bytes)
}
pInfo := PartInfo{
PartNumber: partID,
ETag: md5hex,
Size: n,
ActualSize: int64(actualSize),
LastModified: UTCNow(),
}
return pInfo, nil
}
// SavePartMetadata saves part upload metadata to uploadID directory on disk cache
func (c *diskCache) SavePartMetadata(ctx context.Context, bucket, object, uploadID string, partID int, pinfo PartInfo) error {
cachePath := getMultipartCacheSHADir(c.dir, bucket, object)
uploadDir := path.Join(cachePath, uploadID)
// acquire a write lock at upload path to update cache.json
uploadLock := c.NewNSLockFn(uploadDir)
ulkctx, err := uploadLock.GetLock(ctx, globalOperationTimeout)
if err != nil {
return err
}
defer uploadLock.Unlock(ulkctx.Cancel)
metaPath := pathJoin(uploadDir, cacheMetaJSONFile)
f, err := os.OpenFile(metaPath, os.O_RDWR, 0666)
if err != nil {
return err
}
defer f.Close()
m := &cacheMeta{}
if err := jsonLoad(f, m); err != nil && err != io.EOF {
return err
}
var key []byte
var objectEncryptionKey crypto.ObjectKey
if globalCacheKMS != nil {
// Calculating object encryption key
key, err = decryptObjectInfo(key, bucket, object, m.Meta)
if err != nil {
return err
}
copy(objectEncryptionKey[:], key)
pinfo.ETag = hex.EncodeToString(objectEncryptionKey.SealETag([]byte(pinfo.ETag)))
}
pIdx := cacheObjPartIndex(m, partID)
if pIdx == -1 {
m.PartActualSizes = append(m.PartActualSizes, pinfo.ActualSize)
m.PartNumbers = append(m.PartNumbers, pinfo.PartNumber)
m.PartETags = append(m.PartETags, pinfo.ETag)
m.PartSizes = append(m.PartSizes, pinfo.Size)
} else {
m.PartActualSizes[pIdx] = pinfo.ActualSize
m.PartNumbers[pIdx] = pinfo.PartNumber
m.PartETags[pIdx] = pinfo.ETag
m.PartSizes[pIdx] = pinfo.Size
}
return jsonSave(f, m)
}
// newCachePartEncryptReader returns encrypted cache part reader, with part data encrypted with part encryption key
func newCachePartEncryptReader(ctx context.Context, bucket, object string, partID int, content io.Reader, size int64, metadata map[string]string) (r io.Reader, err error) {
var key []byte
var objectEncryptionKey, partEncryptionKey crypto.ObjectKey
// Calculating object encryption key
key, err = decryptObjectInfo(key, bucket, object, metadata)
if err != nil {
return nil, err
}
copy(objectEncryptionKey[:], key)
partEnckey := objectEncryptionKey.DerivePartKey(uint32(partID))
copy(partEncryptionKey[:], partEnckey[:])
wantSize := int64(-1)
if size >= 0 {
info := ObjectInfo{Size: size}
wantSize = info.EncryptedSize()
}
hReader, err := hash.NewReader(content, wantSize, "", "", size)
if err != nil {
return nil, err
}
pReader := NewPutObjReader(hReader)
content, err = pReader.WithEncryption(hReader, &partEncryptionKey)
if err != nil {
return nil, err
}
reader, err := sio.EncryptReader(content, sio.Config{Key: partEncryptionKey[:], MinVersion: sio.Version20, CipherSuites: fips.CipherSuitesDARE()})
if err != nil {
return nil, crypto.ErrInvalidCustomerKey
}
return reader, nil
}
// uploadIDExists returns error if uploadID is not being cached.
func (c *diskCache) uploadIDExists(bucket, object, uploadID string) (err error) {
mpartCachePath := getMultipartCacheSHADir(c.dir, bucket, object)
uploadIDDir := path.Join(mpartCachePath, uploadID)
if _, err := os.Stat(uploadIDDir); err != nil {
return err
}
return nil
}
// CompleteMultipartUpload completes multipart upload on cache. The parts and cache.json are moved from the temporary location in
// .minio.sys/multipart/cacheSHA/.. to cacheSHA path after part verification succeeds.
func (c *diskCache) CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, roi ObjectInfo, opts ObjectOptions) (oi ObjectInfo, err error) {
cachePath := getCacheSHADir(c.dir, bucket, object)
cLock := c.NewNSLockFn(cachePath)
lkctx, err := cLock.GetLock(ctx, globalOperationTimeout)
if err != nil {
return oi, err
}
ctx = lkctx.Context()
defer cLock.Unlock(lkctx.Cancel)
mpartCachePath := getMultipartCacheSHADir(c.dir, bucket, object)
uploadIDDir := path.Join(mpartCachePath, uploadID)
uploadMeta, _, _, uerr := c.statCache(ctx, uploadIDDir)
if uerr != nil {
return oi, errUploadIDNotFound
}
// Case where object not yet cached
// Calculate full object size.
var objectSize int64
// Calculate consolidated actual size.
var objectActualSize int64
var partETags []string
partETags, err = decryptCachePartETags(uploadMeta)
if err != nil {
return oi, err
}
for i, pi := range uploadedParts {
pIdx := cacheObjPartIndex(uploadMeta, pi.PartNumber)
if pIdx == -1 {
invp := InvalidPart{
PartNumber: pi.PartNumber,
GotETag: pi.ETag,
}
return oi, invp
}
pi.ETag = canonicalizeETag(pi.ETag)
if partETags[pIdx] != pi.ETag {
invp := InvalidPart{
PartNumber: pi.PartNumber,
ExpETag: partETags[pIdx],
GotETag: pi.ETag,
}
return oi, invp
}
// All parts except the last part has to be atleast 5MB.
if (i < len(uploadedParts)-1) && !isMinAllowedPartSize(uploadMeta.PartActualSizes[pIdx]) {
return oi, PartTooSmall{
PartNumber: pi.PartNumber,
PartSize: uploadMeta.PartActualSizes[pIdx],
PartETag: pi.ETag,
}
}
// Save for total object size.
objectSize += uploadMeta.PartSizes[pIdx]
// Save the consolidated actual size.
objectActualSize += uploadMeta.PartActualSizes[pIdx]
}
uploadMeta.Stat.Size = objectSize
uploadMeta.Stat.ModTime = roi.ModTime
// if encrypted - make sure ETag updated
uploadMeta.Meta["etag"] = roi.ETag
uploadMeta.Meta[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(objectActualSize, 10)
var cpartETags []string
var cpartNums []int
var cpartSizes, cpartActualSizes []int64
for _, pi := range uploadedParts {
pIdx := cacheObjPartIndex(uploadMeta, pi.PartNumber)
if pIdx != -1 {
cpartETags = append(cpartETags, uploadMeta.PartETags[pIdx])
cpartNums = append(cpartNums, uploadMeta.PartNumbers[pIdx])
cpartSizes = append(cpartSizes, uploadMeta.PartSizes[pIdx])
cpartActualSizes = append(cpartActualSizes, uploadMeta.PartActualSizes[pIdx])
}
}
uploadMeta.PartETags = cpartETags
uploadMeta.PartSizes = cpartSizes
uploadMeta.PartActualSizes = cpartActualSizes
uploadMeta.PartNumbers = cpartNums
uploadMeta.Hits++
metaPath := pathJoin(uploadIDDir, cacheMetaJSONFile)
f, err := os.OpenFile(metaPath, os.O_RDWR|os.O_CREATE, 0666)
if err != nil {
return oi, err
}
defer f.Close()
jsonSave(f, uploadMeta)
for _, pi := range uploadedParts {
part := fmt.Sprintf("part.%d", pi.PartNumber)
renameAll(pathJoin(uploadIDDir, part), pathJoin(cachePath, part))
}
renameAll(pathJoin(uploadIDDir, cacheMetaJSONFile), pathJoin(cachePath, cacheMetaJSONFile))
removeAll(uploadIDDir) // clean up any unused parts in the uploadIDDir
return uploadMeta.ToObjectInfo(bucket, object), nil
}
func (c *diskCache) AbortUpload(bucket, object, uploadID string) (err error) {
mpartCachePath := getMultipartCacheSHADir(c.dir, bucket, object)
uploadDir := path.Join(mpartCachePath, uploadID)
return removeAll(uploadDir)
}
// cacheObjPartIndex - returns the index of matching object part number.
func cacheObjPartIndex(m *cacheMeta, partNumber int) int {
for i, part := range m.PartNumbers {
if partNumber == part {
return i
}
}
return -1
}
// cacheObjectToPartOffset calculates part index and part offset for requested offset for content on cache.
func cacheObjectToPartOffset(objInfo ObjectInfo, offset int64) (partIndex int, partOffset int64, err error) {
if offset == 0 {
// Special case - if offset is 0, then partIndex and partOffset are always 0.
return 0, 0, nil
}
partOffset = offset
// Seek until object offset maps to a particular part offset.
for i, part := range objInfo.Parts {
partIndex = i
// Offset is smaller than size we have reached the proper part offset.
if partOffset < part.Size {
return partIndex, partOffset, nil
}
// Continue to towards the next part.
partOffset -= part.Size
}
// Offset beyond the size of the object return InvalidRange.
return 0, 0, InvalidRange{}
}
// get path of on-going multipart caching
func getMultipartCacheSHADir(dir, bucket, object string) string {
return pathJoin(dir, minioMetaBucket, cacheMultipartDir, getSHA256Hash([]byte(pathJoin(bucket, object))))
}
// clean up stale cache multipart uploads according to cleanup interval.
func (c *diskCache) cleanupStaleUploads(ctx context.Context) {
if !c.commitWritethrough {
return
}
timer := time.NewTimer(cacheStaleUploadCleanupInterval)
defer timer.Stop()
for {
select {
case <-ctx.Done():
return
case <-timer.C:
// Reset for the next interval
timer.Reset(cacheStaleUploadCleanupInterval)
now := time.Now()
readDirFn(pathJoin(c.dir, minioMetaBucket, cacheMultipartDir), func(shaDir string, typ os.FileMode) error {
return readDirFn(pathJoin(c.dir, minioMetaBucket, cacheMultipartDir, shaDir), func(uploadIDDir string, typ os.FileMode) error {
uploadIDPath := pathJoin(c.dir, minioMetaBucket, cacheMultipartDir, shaDir, uploadIDDir)
fi, err := os.Stat(uploadIDPath)
if err != nil {
return nil
}
if now.Sub(fi.ModTime()) > cacheStaleUploadExpiry {
removeAll(uploadIDPath)
}
return nil
})
})
}
}
}

View File

@ -246,6 +246,9 @@ func decryptCacheObjectETag(info *ObjectInfo) error {
if globalCacheKMS == nil { if globalCacheKMS == nil {
return errKMSNotConfigured return errKMSNotConfigured
} }
if len(info.Parts) > 0 { // multipart ETag is not encrypted since it is not md5sum
return nil
}
keyID, kmsKey, sealedKey, err := crypto.S3.ParseMetadata(info.UserDefined) keyID, kmsKey, sealedKey, err := crypto.S3.ParseMetadata(info.UserDefined)
if err != nil { if err != nil {
return err return err
@ -271,6 +274,43 @@ func decryptCacheObjectETag(info *ObjectInfo) error {
return nil return nil
} }
// decryptCacheObjectETag tries to decrypt the ETag saved in encrypted format using the cache KMS
func decryptCachePartETags(c *cacheMeta) ([]string, error) {
var partETags []string
encrypted := crypto.S3.IsEncrypted(c.Meta) && isCacheEncrypted(c.Meta)
switch {
case encrypted:
if globalCacheKMS == nil {
return partETags, errKMSNotConfigured
}
keyID, kmsKey, sealedKey, err := crypto.S3.ParseMetadata(c.Meta)
if err != nil {
return partETags, err
}
extKey, err := globalCacheKMS.DecryptKey(keyID, kmsKey, kms.Context{c.Bucket: path.Join(c.Bucket, c.Object)})
if err != nil {
return partETags, err
}
var objectKey crypto.ObjectKey
if err = objectKey.Unseal(extKey, sealedKey, crypto.S3.String(), c.Bucket, c.Object); err != nil {
return partETags, err
}
for i := range c.PartETags {
etagStr := tryDecryptETag(objectKey[:], c.PartETags[i], false)
// backend ETag was hex encoded before encrypting, so hex decode to get actual ETag
etag, err := hex.DecodeString(etagStr)
if err != nil {
return []string{}, err
}
partETags = append(partETags, string(etag))
}
return partETags, nil
default:
return c.PartETags, nil
}
}
func isMetadataSame(m1, m2 map[string]string) bool { func isMetadataSame(m1, m2 map[string]string) bool {
if m1 == nil && m2 == nil { if m1 == nil && m2 == nil {
return true return true
@ -506,3 +546,38 @@ func bytesToClear(total, free int64, quotaPct, lowWatermark, highWatermark uint6
lowWMUsage := total * (int64)(lowWatermark*quotaPct) / (100 * 100) lowWMUsage := total * (int64)(lowWatermark*quotaPct) / (100 * 100)
return (uint64)(math.Min(float64(quotaAllowed), math.Max(0.0, float64(used-lowWMUsage)))) return (uint64)(math.Min(float64(quotaAllowed), math.Max(0.0, float64(used-lowWMUsage))))
} }
type multiWriter struct {
backendWriter io.Writer
cacheWriter *io.PipeWriter
pipeClosed bool
}
// multiWriter writes to backend and cache - if cache write
// fails close the pipe, but continue writing to the backend
func (t *multiWriter) Write(p []byte) (n int, err error) {
n, err = t.backendWriter.Write(p)
if err == nil && n != len(p) {
err = io.ErrShortWrite
return
}
if err != nil {
if !t.pipeClosed {
t.cacheWriter.CloseWithError(err)
}
return
}
// ignore errors writing to cache
if !t.pipeClosed {
_, cerr := t.cacheWriter.Write(p)
if cerr != nil {
t.pipeClosed = true
t.cacheWriter.CloseWithError(cerr)
}
}
return len(p), nil
}
func cacheMultiWriter(w1 io.Writer, w2 *io.PipeWriter) io.Writer {
return &multiWriter{backendWriter: w1, cacheWriter: w2}
}

View File

@ -59,6 +59,13 @@ const (
CommitFailed cacheCommitStatus = "failed" CommitFailed cacheCommitStatus = "failed"
) )
const (
// CommitWriteBack allows staging and write back of cached content for single object uploads
CommitWriteBack string = "writeback"
// CommitWriteThrough allows caching multipart uploads to disk synchronously
CommitWriteThrough string = "writethrough"
)
// String returns string representation of status // String returns string representation of status
func (s cacheCommitStatus) String() string { func (s cacheCommitStatus) String() string {
return string(s) return string(s)
@ -80,6 +87,13 @@ type CacheObjectLayer interface {
DeleteObjects(ctx context.Context, bucket string, objects []ObjectToDelete, opts ObjectOptions) ([]DeletedObject, []error) DeleteObjects(ctx context.Context, bucket string, objects []ObjectToDelete, opts ObjectOptions) ([]DeletedObject, []error)
PutObject(ctx context.Context, bucket, object string, data *PutObjReader, opts ObjectOptions) (objInfo ObjectInfo, err error) PutObject(ctx context.Context, bucket, object string, data *PutObjReader, opts ObjectOptions) (objInfo ObjectInfo, err error)
CopyObject(ctx context.Context, srcBucket, srcObject, destBucket, destObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (objInfo ObjectInfo, err error) CopyObject(ctx context.Context, srcBucket, srcObject, destBucket, destObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (objInfo ObjectInfo, err error)
// Multipart operations.
NewMultipartUpload(ctx context.Context, bucket, object string, opts ObjectOptions) (uploadID string, err error)
PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, data *PutObjReader, opts ObjectOptions) (info PartInfo, err error)
AbortMultipartUpload(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) error
CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (objInfo ObjectInfo, err error)
CopyObjectPart(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int, startOffset int64, length int64, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (pi PartInfo, e error)
// Storage operations. // Storage operations.
StorageInfo(ctx context.Context) CacheStorageInfo StorageInfo(ctx context.Context) CacheStorageInfo
CacheStats() *CacheStats CacheStats() *CacheStats
@ -94,21 +108,26 @@ type cacheObjects struct {
// number of accesses after which to cache an object // number of accesses after which to cache an object
after int after int
// commit objects in async manner // commit objects in async manner
commitWriteback bool commitWriteback bool
commitWritethrough bool
// if true migration is in progress from v1 to v2 // if true migration is in progress from v1 to v2
migrating bool migrating bool
// mutex to protect migration bool
migMutex sync.Mutex
// retry queue for writeback cache mode to reattempt upload to backend // retry queue for writeback cache mode to reattempt upload to backend
wbRetryCh chan ObjectInfo wbRetryCh chan ObjectInfo
// Cache stats // Cache stats
cacheStats *CacheStats cacheStats *CacheStats
InnerGetObjectNInfoFn func(ctx context.Context, bucket, object string, rs *HTTPRangeSpec, h http.Header, lockType LockType, opts ObjectOptions) (gr *GetObjectReader, err error) InnerGetObjectNInfoFn func(ctx context.Context, bucket, object string, rs *HTTPRangeSpec, h http.Header, lockType LockType, opts ObjectOptions) (gr *GetObjectReader, err error)
InnerGetObjectInfoFn func(ctx context.Context, bucket, object string, opts ObjectOptions) (objInfo ObjectInfo, err error) InnerGetObjectInfoFn func(ctx context.Context, bucket, object string, opts ObjectOptions) (objInfo ObjectInfo, err error)
InnerDeleteObjectFn func(ctx context.Context, bucket, object string, opts ObjectOptions) (objInfo ObjectInfo, err error) InnerDeleteObjectFn func(ctx context.Context, bucket, object string, opts ObjectOptions) (objInfo ObjectInfo, err error)
InnerPutObjectFn func(ctx context.Context, bucket, object string, data *PutObjReader, opts ObjectOptions) (objInfo ObjectInfo, err error) InnerPutObjectFn func(ctx context.Context, bucket, object string, data *PutObjReader, opts ObjectOptions) (objInfo ObjectInfo, err error)
InnerCopyObjectFn func(ctx context.Context, srcBucket, srcObject, destBucket, destObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (objInfo ObjectInfo, err error) InnerCopyObjectFn func(ctx context.Context, srcBucket, srcObject, destBucket, destObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (objInfo ObjectInfo, err error)
InnerNewMultipartUploadFn func(ctx context.Context, bucket, object string, opts ObjectOptions) (uploadID string, err error)
InnerPutObjectPartFn func(ctx context.Context, bucket, object, uploadID string, partID int, data *PutObjReader, opts ObjectOptions) (info PartInfo, err error)
InnerAbortMultipartUploadFn func(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) error
InnerCompleteMultipartUploadFn func(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (objInfo ObjectInfo, err error)
InnerCopyObjectPartFn func(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int, startOffset int64, length int64, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (pi PartInfo, e error)
} }
func (c *cacheObjects) incHitsToMeta(ctx context.Context, dcache *diskCache, bucket, object string, size int64, eTag string, rs *HTTPRangeSpec) error { func (c *cacheObjects) incHitsToMeta(ctx context.Context, dcache *diskCache, bucket, object string, size int64, eTag string, rs *HTTPRangeSpec) error {
@ -488,8 +507,6 @@ func (c *cacheObjects) CacheStats() (cs *CacheStats) {
// skipCache() returns true if cache migration is in progress // skipCache() returns true if cache migration is in progress
func (c *cacheObjects) skipCache() bool { func (c *cacheObjects) skipCache() bool {
c.migMutex.Lock()
defer c.migMutex.Unlock()
return c.migrating return c.migrating
} }
@ -619,8 +636,6 @@ func (c *cacheObjects) migrateCacheFromV1toV2(ctx context.Context) {
} }
// update migration status // update migration status
c.migMutex.Lock()
defer c.migMutex.Unlock()
c.migrating = false c.migrating = false
logStartupMessage(color.Blue("Cache migration completed successfully.")) logStartupMessage(color.Blue("Cache migration completed successfully."))
} }
@ -759,13 +774,14 @@ func newServerCacheObjects(ctx context.Context, config cache.Config) (CacheObjec
return nil, err return nil, err
} }
c := &cacheObjects{ c := &cacheObjects{
cache: cache, cache: cache,
exclude: config.Exclude, exclude: config.Exclude,
after: config.After, after: config.After,
migrating: migrateSw, migrating: migrateSw,
migMutex: sync.Mutex{}, commitWriteback: config.CacheCommitMode == CommitWriteBack,
commitWriteback: config.CommitWriteback, commitWritethrough: config.CacheCommitMode == CommitWriteThrough,
cacheStats: newCacheStats(),
cacheStats: newCacheStats(),
InnerGetObjectInfoFn: func(ctx context.Context, bucket, object string, opts ObjectOptions) (ObjectInfo, error) { InnerGetObjectInfoFn: func(ctx context.Context, bucket, object string, opts ObjectOptions) (ObjectInfo, error) {
return newObjectLayerFn().GetObjectInfo(ctx, bucket, object, opts) return newObjectLayerFn().GetObjectInfo(ctx, bucket, object, opts)
}, },
@ -781,6 +797,21 @@ func newServerCacheObjects(ctx context.Context, config cache.Config) (CacheObjec
InnerCopyObjectFn: func(ctx context.Context, srcBucket, srcObject, destBucket, destObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (objInfo ObjectInfo, err error) { InnerCopyObjectFn: func(ctx context.Context, srcBucket, srcObject, destBucket, destObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (objInfo ObjectInfo, err error) {
return newObjectLayerFn().CopyObject(ctx, srcBucket, srcObject, destBucket, destObject, srcInfo, srcOpts, dstOpts) return newObjectLayerFn().CopyObject(ctx, srcBucket, srcObject, destBucket, destObject, srcInfo, srcOpts, dstOpts)
}, },
InnerNewMultipartUploadFn: func(ctx context.Context, bucket, object string, opts ObjectOptions) (uploadID string, err error) {
return newObjectLayerFn().NewMultipartUpload(ctx, bucket, object, opts)
},
InnerPutObjectPartFn: func(ctx context.Context, bucket, object, uploadID string, partID int, data *PutObjReader, opts ObjectOptions) (info PartInfo, err error) {
return newObjectLayerFn().PutObjectPart(ctx, bucket, object, uploadID, partID, data, opts)
},
InnerAbortMultipartUploadFn: func(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) error {
return newObjectLayerFn().AbortMultipartUpload(ctx, bucket, object, uploadID, opts)
},
InnerCompleteMultipartUploadFn: func(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (objInfo ObjectInfo, err error) {
return newObjectLayerFn().CompleteMultipartUpload(ctx, bucket, object, uploadID, uploadedParts, opts)
},
InnerCopyObjectPartFn: func(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int, startOffset int64, length int64, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (pi PartInfo, e error) {
return newObjectLayerFn().CopyObjectPart(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID, startOffset, length, srcInfo, srcOpts, dstOpts)
},
} }
c.cacheStats.GetDiskStats = func() []CacheDiskStats { c.cacheStats.GetDiskStats = func() []CacheDiskStats {
cacheDiskStats := make([]CacheDiskStats, len(c.cache)) cacheDiskStats := make([]CacheDiskStats, len(c.cache))
@ -859,3 +890,247 @@ func (c *cacheObjects) queuePendingWriteback(ctx context.Context) {
} }
} }
} }
// NewMultipartUpload - Starts a new multipart upload operation to backend - if writethrough mode is enabled, starts caching the multipart.
func (c *cacheObjects) NewMultipartUpload(ctx context.Context, bucket, object string, opts ObjectOptions) (uploadID string, err error) {
newMultipartUploadFn := c.InnerNewMultipartUploadFn
dcache, err := c.getCacheToLoc(ctx, bucket, object)
if err != nil {
// disk cache could not be located,execute backend call.
return newMultipartUploadFn(ctx, bucket, object, opts)
}
if c.skipCache() {
return newMultipartUploadFn(ctx, bucket, object, opts)
}
if opts.ServerSideEncryption != nil { // avoid caching encrypted objects
dcache.Delete(ctx, bucket, object)
return newMultipartUploadFn(ctx, bucket, object, opts)
}
// skip cache for objects with locks
objRetention := objectlock.GetObjectRetentionMeta(opts.UserDefined)
legalHold := objectlock.GetObjectLegalHoldMeta(opts.UserDefined)
if objRetention.Mode.Valid() || legalHold.Status.Valid() {
dcache.Delete(ctx, bucket, object)
return newMultipartUploadFn(ctx, bucket, object, opts)
}
// fetch from backend if cache exclude pattern or cache-control
// directive set to exclude
if c.isCacheExclude(bucket, object) {
dcache.Delete(ctx, bucket, object)
return newMultipartUploadFn(ctx, bucket, object, opts)
}
if !c.commitWritethrough {
return newMultipartUploadFn(ctx, bucket, object, opts)
}
// perform multipart upload on backend and cache simultaneously
uploadID, err = newMultipartUploadFn(ctx, bucket, object, opts)
dcache.NewMultipartUpload(GlobalContext, bucket, object, uploadID, opts)
return uploadID, err
}
// PutObjectPart streams part to cache concurrently if writethrough mode is enabled. Otherwise redirects the call to remote
func (c *cacheObjects) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, data *PutObjReader, opts ObjectOptions) (info PartInfo, err error) {
putObjectPartFn := c.InnerPutObjectPartFn
dcache, err := c.getCacheToLoc(ctx, bucket, object)
if err != nil {
// disk cache could not be located,execute backend call.
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
}
if !c.commitWritethrough {
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
}
if c.skipCache() {
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
}
size := data.Size()
// avoid caching part if space unavailable
if !dcache.diskSpaceAvailable(size) {
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
}
if opts.ServerSideEncryption != nil {
dcache.Delete(ctx, bucket, object)
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
}
// skip cache for objects with locks
objRetention := objectlock.GetObjectRetentionMeta(opts.UserDefined)
legalHold := objectlock.GetObjectLegalHoldMeta(opts.UserDefined)
if objRetention.Mode.Valid() || legalHold.Status.Valid() {
dcache.Delete(ctx, bucket, object)
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
}
// fetch from backend if cache exclude pattern or cache-control
// directive set to exclude
if c.isCacheExclude(bucket, object) {
dcache.Delete(ctx, bucket, object)
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
}
info = PartInfo{}
// Initialize pipe to stream data to backend
pipeReader, pipeWriter := io.Pipe()
hashReader, err := hash.NewReader(pipeReader, size, "", "", data.ActualSize())
if err != nil {
return
}
// Initialize pipe to stream data to cache
rPipe, wPipe := io.Pipe()
pinfoCh := make(chan PartInfo)
cinfoCh := make(chan PartInfo)
errorCh := make(chan error)
go func() {
info, err = putObjectPartFn(ctx, bucket, object, uploadID, partID, NewPutObjReader(hashReader), opts)
if err != nil {
close(pinfoCh)
pipeReader.CloseWithError(err)
rPipe.CloseWithError(err)
errorCh <- err
return
}
close(errorCh)
pinfoCh <- info
}()
go func() {
pinfo, perr := dcache.PutObjectPart(GlobalContext, bucket, object, uploadID, partID, rPipe, data.Size(), opts)
if perr != nil {
rPipe.CloseWithError(perr)
close(cinfoCh)
// clean up upload
dcache.AbortUpload(bucket, object, uploadID)
return
}
cinfoCh <- pinfo
}()
mwriter := cacheMultiWriter(pipeWriter, wPipe)
_, err = io.Copy(mwriter, data)
pipeWriter.Close()
wPipe.Close()
if err != nil {
err = <-errorCh
return PartInfo{}, err
}
info = <-pinfoCh
cachedInfo := <-cinfoCh
if info.PartNumber == cachedInfo.PartNumber {
cachedInfo.ETag = info.ETag
cachedInfo.LastModified = info.LastModified
dcache.SavePartMetadata(GlobalContext, bucket, object, uploadID, partID, cachedInfo)
}
return info, err
}
// CopyObjectPart behaves similar to PutObjectPart - caches part to upload dir if writethrough mode is enabled.
func (c *cacheObjects) CopyObjectPart(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int, startOffset int64, length int64, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (pi PartInfo, e error) {
copyObjectPartFn := c.InnerCopyObjectPartFn
dcache, err := c.getCacheToLoc(ctx, dstBucket, dstObject)
if err != nil {
// disk cache could not be located,execute backend call.
return copyObjectPartFn(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID, startOffset, length, srcInfo, srcOpts, dstOpts)
}
if !c.commitWritethrough {
return copyObjectPartFn(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID, startOffset, length, srcInfo, srcOpts, dstOpts)
}
if err := dcache.uploadIDExists(dstBucket, dstObject, uploadID); err != nil {
return copyObjectPartFn(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID, startOffset, length, srcInfo, srcOpts, dstOpts)
}
partInfo, err := copyObjectPartFn(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID, startOffset, length, srcInfo, srcOpts, dstOpts)
if err != nil {
return pi, toObjectErr(err, dstBucket, dstObject)
}
go func() {
isSuffixLength := false
if startOffset < 0 {
isSuffixLength = true
}
rs := &HTTPRangeSpec{
IsSuffixLength: isSuffixLength,
Start: startOffset,
End: startOffset + length,
}
// fill cache in the background
bReader, bErr := c.InnerGetObjectNInfoFn(GlobalContext, srcBucket, srcObject, rs, http.Header{}, readLock, ObjectOptions{})
if bErr != nil {
return
}
defer bReader.Close()
// avoid cache overwrite if another background routine filled cache
dcache.PutObjectPart(GlobalContext, dstBucket, dstObject, uploadID, partID, bReader, length, ObjectOptions{UserDefined: getMetadata(bReader.ObjInfo)})
}()
// Success.
return partInfo, nil
}
// CompleteMultipartUpload - completes multipart upload operation on the backend. If writethrough mode is enabled, this also
// finalizes the upload saved in cache multipart dir.
func (c *cacheObjects) CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (oi ObjectInfo, err error) {
completeMultipartUploadFn := c.InnerCompleteMultipartUploadFn
if !c.commitWritethrough {
return completeMultipartUploadFn(ctx, bucket, object, uploadID, uploadedParts, opts)
}
dcache, err := c.getCacheToLoc(ctx, bucket, object)
if err != nil {
// disk cache could not be located,execute backend call.
return completeMultipartUploadFn(ctx, bucket, object, uploadID, uploadedParts, opts)
}
// perform multipart upload on backend and cache simultaneously
oi, err = completeMultipartUploadFn(ctx, bucket, object, uploadID, uploadedParts, opts)
if err == nil {
// fill cache in the background
go func() {
_, err := dcache.CompleteMultipartUpload(ctx, bucket, object, uploadID, uploadedParts, oi, opts)
if err != nil {
// fill cache in the background
bReader, bErr := c.InnerGetObjectNInfoFn(GlobalContext, bucket, object, nil, http.Header{}, readLock, ObjectOptions{})
if bErr != nil {
return
}
defer bReader.Close()
oi, _, err := dcache.Stat(GlobalContext, bucket, object)
// avoid cache overwrite if another background routine filled cache
if err != nil || oi.ETag != bReader.ObjInfo.ETag {
dcache.Put(GlobalContext, bucket, object, bReader, bReader.ObjInfo.Size, nil, ObjectOptions{UserDefined: getMetadata(bReader.ObjInfo)}, false)
}
}
}()
}
return
}
// AbortMultipartUpload - aborts multipart upload on backend and cache.
func (c *cacheObjects) AbortMultipartUpload(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) error {
abortMultipartUploadFn := c.InnerAbortMultipartUploadFn
if !c.commitWritethrough {
return abortMultipartUploadFn(ctx, bucket, object, uploadID, opts)
}
dcache, err := c.getCacheToLoc(ctx, bucket, object)
if err != nil {
// disk cache could not be located,execute backend call.
return abortMultipartUploadFn(ctx, bucket, object, uploadID, opts)
}
if err = dcache.uploadIDExists(bucket, object, uploadID); err != nil {
return toObjectErr(err, bucket, object, uploadID)
}
// execute backend operation
err = abortMultipartUploadFn(ctx, bucket, object, uploadID, opts)
if err != nil {
return err
}
// abort multipart upload on cache
go dcache.AbortUpload(bucket, object, uploadID)
return nil
}

View File

@ -2283,6 +2283,9 @@ func (api objectAPIHandlers) NewMultipartUploadHandler(w http.ResponseWriter, r
return return
} }
newMultipartUpload := objectAPI.NewMultipartUpload newMultipartUpload := objectAPI.NewMultipartUpload
if api.CacheAPI() != nil {
newMultipartUpload = api.CacheAPI().NewMultipartUpload
}
uploadID, err := newMultipartUpload(ctx, bucket, object, opts) uploadID, err := newMultipartUpload(ctx, bucket, object, opts)
if err != nil { if err != nil {
@ -2597,9 +2600,13 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt
} }
srcInfo.PutObjReader = pReader srcInfo.PutObjReader = pReader
copyObjectPart := objectAPI.CopyObjectPart
if api.CacheAPI() != nil {
copyObjectPart = api.CacheAPI().CopyObjectPart
}
// Copy source object to destination, if source and destination // Copy source object to destination, if source and destination
// object is same then only metadata is updated. // object is same then only metadata is updated.
partInfo, err := objectAPI.CopyObjectPart(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID, partInfo, err := copyObjectPart(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID,
startOffset, length, srcInfo, srcOpts, dstOpts) startOffset, length, srcInfo, srcOpts, dstOpts)
if err != nil { if err != nil {
writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL) writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
@ -2854,6 +2861,9 @@ func (api objectAPIHandlers) PutObjectPartHandler(w http.ResponseWriter, r *http
} }
putObjectPart := objectAPI.PutObjectPart putObjectPart := objectAPI.PutObjectPart
if api.CacheAPI() != nil {
putObjectPart = api.CacheAPI().PutObjectPart
}
partInfo, err := putObjectPart(ctx, bucket, object, uploadID, partID, pReader, opts) partInfo, err := putObjectPart(ctx, bucket, object, uploadID, partID, pReader, opts)
if err != nil { if err != nil {
@ -2907,6 +2917,9 @@ func (api objectAPIHandlers) AbortMultipartUploadHandler(w http.ResponseWriter,
return return
} }
abortMultipartUpload := objectAPI.AbortMultipartUpload abortMultipartUpload := objectAPI.AbortMultipartUpload
if api.CacheAPI() != nil {
abortMultipartUpload = api.CacheAPI().AbortMultipartUpload
}
if s3Error := checkRequestAuthType(ctx, r, policy.AbortMultipartUploadAction, bucket, object); s3Error != ErrNone { if s3Error := checkRequestAuthType(ctx, r, policy.AbortMultipartUploadAction, bucket, object); s3Error != ErrNone {
writeErrorResponse(ctx, w, errorCodes.ToAPIErr(s3Error), r.URL) writeErrorResponse(ctx, w, errorCodes.ToAPIErr(s3Error), r.URL)
@ -3191,7 +3204,9 @@ func (api objectAPIHandlers) CompleteMultipartUploadHandler(w http.ResponseWrite
s3MD5 := getCompleteMultipartMD5(originalCompleteParts) s3MD5 := getCompleteMultipartMD5(originalCompleteParts)
completeMultiPartUpload := objectAPI.CompleteMultipartUpload completeMultiPartUpload := objectAPI.CompleteMultipartUpload
if api.CacheAPI() != nil {
completeMultiPartUpload = api.CacheAPI().CompleteMultipartUpload
}
// This code is specifically to handle the requirements for slow // This code is specifically to handle the requirements for slow
// complete multipart upload operations on FS mode. // complete multipart upload operations on FS mode.
writeErrorResponseWithoutXMLHeader := func(ctx context.Context, w http.ResponseWriter, err APIError, reqURL *url.URL) { writeErrorResponseWithoutXMLHeader := func(ctx context.Context, w http.ResponseWriter, err APIError, reqURL *url.URL) {

View File

@ -16,7 +16,7 @@ minio gateway <name> -h
MINIO_CACHE_WATERMARK_LOW: % of cache quota at which cache eviction stops MINIO_CACHE_WATERMARK_LOW: % of cache quota at which cache eviction stops
MINIO_CACHE_WATERMARK_HIGH: % of cache quota at which cache eviction starts MINIO_CACHE_WATERMARK_HIGH: % of cache quota at which cache eviction starts
MINIO_CACHE_RANGE: set to "on" or "off" caching of independent range requests per object, defaults to "on" MINIO_CACHE_RANGE: set to "on" or "off" caching of independent range requests per object, defaults to "on"
MINIO_CACHE_COMMIT: set to 'writeback' or 'writethrough' for upload caching
... ...
... ...
@ -87,6 +87,10 @@ master key to automatically encrypt all cached content.
Note that cache KMS master key is not recommended for use in production deployments. If the MinIO server/gateway machine is ever compromised, the cache KMS master key must also be treated as compromised. Note that cache KMS master key is not recommended for use in production deployments. If the MinIO server/gateway machine is ever compromised, the cache KMS master key must also be treated as compromised.
Support for external KMS to manage cache KMS keys is on the roadmap,and would be ideal for production use cases. Support for external KMS to manage cache KMS keys is on the roadmap,and would be ideal for production use cases.
- `MINIO_CACHE_COMMIT` setting of `writethrough` allows caching of multipart uploads synchronously if enabled. By default, single PUT operations are already cached on write without any special setting.
NOTE: `MINIO_CACHE_COMMIT` also has a value of `writeback` which allows staging single uploads in cache before committing to remote. However, for consistency reasons, staging uploads in the cache are not permitted for multipart uploads. Partially cached stale uploads older than 24 hours are automatically cleaned up.
> NOTE: Expiration happens automatically based on the configured interval as explained above, frequently accessed objects stay alive in cache for a significantly longer time. > NOTE: Expiration happens automatically based on the configured interval as explained above, frequently accessed objects stay alive in cache for a significantly longer time.
### Crash Recovery ### Crash Recovery

View File

@ -39,7 +39,7 @@ type Config struct {
WatermarkLow int `json:"watermark_low"` WatermarkLow int `json:"watermark_low"`
WatermarkHigh int `json:"watermark_high"` WatermarkHigh int `json:"watermark_high"`
Range bool `json:"range"` Range bool `json:"range"`
CommitWriteback bool `json:"-"` CacheCommitMode string `json:"commit_mode"`
} }
// UnmarshalJSON - implements JSON unmarshal interface for unmarshalling // UnmarshalJSON - implements JSON unmarshal interface for unmarshalling
@ -155,12 +155,11 @@ func parseCacheExcludes(excludes string) ([]string, error) {
return excludesSlice, nil return excludesSlice, nil
} }
func parseCacheCommitMode(commitStr string) (bool, error) { func parseCacheCommitMode(commitStr string) (string, error) {
switch strings.ToLower(commitStr) { switch strings.ToLower(commitStr) {
case "writeback": case "writeback", "writethrough":
return true, nil return strings.ToLower(commitStr), nil
case "writethrough": default:
return false, nil return "", config.ErrInvalidCacheCommitValue(nil).Msg("cache commit value must be `writeback` or `writethrough`")
} }
return false, config.ErrInvalidCacheCommitValue(nil).Msg("cache commit value must be `writeback` or `writethrough`")
} }

View File

@ -219,11 +219,11 @@ func LookupConfig(kvs config.KVS) (Config, error) {
cfg.Range = rng cfg.Range = rng
} }
if commit := env.Get(EnvCacheCommit, kvs.Get(Commit)); commit != "" { if commit := env.Get(EnvCacheCommit, kvs.Get(Commit)); commit != "" {
cfg.CommitWriteback, err = parseCacheCommitMode(commit) cfg.CacheCommitMode, err = parseCacheCommitMode(commit)
if err != nil { if err != nil {
return cfg, err return cfg, err
} }
if cfg.After > 0 && cfg.CommitWriteback { if cfg.After > 0 && cfg.CacheCommitMode != "" {
err := errors.New("cache after cannot be used with commit writeback") err := errors.New("cache after cannot be used with commit writeback")
return cfg, config.ErrInvalidCacheSetting(err) return cfg, config.ErrInvalidCacheSetting(err)
} }