mirror of https://github.com/minio/minio.git
Add support for caching multipart in writethrough mode (#13507)
This commit is contained in:
parent
6d53e3c2d7
commit
15dcacc1fc
|
@ -29,6 +29,8 @@ import (
|
|||
"io/ioutil"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
@ -39,6 +41,7 @@ import (
|
|||
"github.com/minio/minio/internal/crypto"
|
||||
"github.com/minio/minio/internal/disk"
|
||||
"github.com/minio/minio/internal/fips"
|
||||
"github.com/minio/minio/internal/hash"
|
||||
xhttp "github.com/minio/minio/internal/http"
|
||||
xioutil "github.com/minio/minio/internal/ioutil"
|
||||
"github.com/minio/minio/internal/kms"
|
||||
|
@ -50,11 +53,16 @@ const (
|
|||
// cache.json object metadata for cached objects.
|
||||
cacheMetaJSONFile = "cache.json"
|
||||
cacheDataFile = "part.1"
|
||||
cacheDataFilePrefix = "part"
|
||||
|
||||
cacheMetaVersion = "1.0.0"
|
||||
cacheExpiryDays = 90 * time.Hour * 24 // defaults to 90 days
|
||||
// SSECacheEncrypted is the metadata key indicating that the object
|
||||
// is a cache entry encrypted with cache KMS master key in globalCacheKMS.
|
||||
SSECacheEncrypted = "X-Minio-Internal-Encrypted-Cache"
|
||||
cacheMultipartDir = "multipart"
|
||||
cacheStaleUploadCleanupInterval = time.Hour * 24
|
||||
cacheStaleUploadExpiry = time.Hour * 24
|
||||
)
|
||||
|
||||
// CacheChecksumInfoV1 - carries checksums of individual blocks on disk.
|
||||
|
@ -78,6 +86,11 @@ type cacheMeta struct {
|
|||
Hits int `json:"hits,omitempty"`
|
||||
Bucket string `json:"bucket,omitempty"`
|
||||
Object string `json:"object,omitempty"`
|
||||
// for multipart upload
|
||||
PartNumbers []int `json:"partNums,omitempty"` // Part Numbers
|
||||
PartETags []string `json:"partETags,omitempty"` // Part ETags
|
||||
PartSizes []int64 `json:"partSizes,omitempty"` // Part Sizes
|
||||
PartActualSizes []int64 `json:"partASizes,omitempty"` // Part ActualSizes (compression)
|
||||
}
|
||||
|
||||
// RangeInfo has the range, file and range length information for a cached range.
|
||||
|
@ -104,13 +117,13 @@ func (m *cacheMeta) ToObjectInfo(bucket, object string) (o ObjectInfo) {
|
|||
CacheStatus: CacheHit,
|
||||
CacheLookupStatus: CacheHit,
|
||||
}
|
||||
|
||||
meta := cloneMSS(m.Meta)
|
||||
// We set file info only if its valid.
|
||||
o.Size = m.Stat.Size
|
||||
o.ETag = extractETag(m.Meta)
|
||||
o.ContentType = m.Meta["content-type"]
|
||||
o.ContentEncoding = m.Meta["content-encoding"]
|
||||
if storageClass, ok := m.Meta[xhttp.AmzStorageClass]; ok {
|
||||
o.ETag = extractETag(meta)
|
||||
o.ContentType = meta["content-type"]
|
||||
o.ContentEncoding = meta["content-encoding"]
|
||||
if storageClass, ok := meta[xhttp.AmzStorageClass]; ok {
|
||||
o.StorageClass = storageClass
|
||||
} else {
|
||||
o.StorageClass = globalMinioDefaultStorageClass
|
||||
|
@ -119,20 +132,26 @@ func (m *cacheMeta) ToObjectInfo(bucket, object string) (o ObjectInfo) {
|
|||
t time.Time
|
||||
e error
|
||||
)
|
||||
if exp, ok := m.Meta["expires"]; ok {
|
||||
if exp, ok := meta["expires"]; ok {
|
||||
if t, e = time.Parse(http.TimeFormat, exp); e == nil {
|
||||
o.Expires = t.UTC()
|
||||
}
|
||||
}
|
||||
if mtime, ok := m.Meta["last-modified"]; ok {
|
||||
if mtime, ok := meta["last-modified"]; ok {
|
||||
if t, e = time.Parse(http.TimeFormat, mtime); e == nil {
|
||||
o.ModTime = t.UTC()
|
||||
}
|
||||
}
|
||||
|
||||
o.Parts = make([]ObjectPartInfo, len(m.PartNumbers))
|
||||
for i := range m.PartNumbers {
|
||||
o.Parts[i].Number = m.PartNumbers[i]
|
||||
o.Parts[i].Size = m.PartSizes[i]
|
||||
o.Parts[i].ETag = m.PartETags[i]
|
||||
o.Parts[i].ActualSize = m.PartActualSizes[i]
|
||||
}
|
||||
// etag/md5Sum has already been extracted. We need to
|
||||
// remove to avoid it from appearing as part of user-defined metadata
|
||||
o.UserDefined = cleanMetadata(m.Meta)
|
||||
o.UserDefined = cleanMetadata(meta)
|
||||
return o
|
||||
}
|
||||
|
||||
|
@ -152,6 +171,8 @@ type diskCache struct {
|
|||
highWatermark int
|
||||
enableRange bool
|
||||
commitWriteback bool
|
||||
commitWritethrough bool
|
||||
|
||||
retryWritebackCh chan ObjectInfo
|
||||
// nsMutex namespace lock
|
||||
nsMutex *nsLockMap
|
||||
|
@ -178,7 +199,9 @@ func newDiskCache(ctx context.Context, dir string, config cache.Config) (*diskCa
|
|||
lowWatermark: config.WatermarkLow,
|
||||
highWatermark: config.WatermarkHigh,
|
||||
enableRange: config.Range,
|
||||
commitWriteback: config.CommitWriteback,
|
||||
commitWriteback: config.CacheCommitMode == CommitWriteBack,
|
||||
commitWritethrough: config.CacheCommitMode == CommitWriteThrough,
|
||||
|
||||
retryWritebackCh: make(chan ObjectInfo, 10000),
|
||||
online: 1,
|
||||
pool: sync.Pool{
|
||||
|
@ -190,6 +213,7 @@ func newDiskCache(ctx context.Context, dir string, config cache.Config) (*diskCa
|
|||
nsMutex: newNSLock(false),
|
||||
}
|
||||
go cache.purgeWait(ctx)
|
||||
go cache.cleanupStaleUploads(ctx)
|
||||
if cache.commitWriteback {
|
||||
go cache.scanCacheWritebackFailures(ctx)
|
||||
}
|
||||
|
@ -303,16 +327,11 @@ func (c *diskCache) purge(ctx context.Context) {
|
|||
// ignore error we know what value we are passing.
|
||||
scorer, _ := newFileScorer(toFree, time.Now().Unix(), 100)
|
||||
|
||||
// this function returns FileInfo for cached range files and cache data file.
|
||||
fiStatFn := func(ranges map[string]string, dataFile, pathPrefix string) map[string]os.FileInfo {
|
||||
// this function returns FileInfo for cached range files.
|
||||
fiStatRangesFn := func(ranges map[string]string, pathPrefix string) map[string]os.FileInfo {
|
||||
fm := make(map[string]os.FileInfo)
|
||||
fname := pathJoin(pathPrefix, dataFile)
|
||||
if fi, err := os.Stat(fname); err == nil {
|
||||
fm[fname] = fi
|
||||
}
|
||||
|
||||
for _, rngFile := range ranges {
|
||||
fname = pathJoin(pathPrefix, rngFile)
|
||||
fname := pathJoin(pathPrefix, rngFile)
|
||||
if fi, err := os.Stat(fname); err == nil {
|
||||
fm[fname] = fi
|
||||
}
|
||||
|
@ -320,6 +339,26 @@ func (c *diskCache) purge(ctx context.Context) {
|
|||
return fm
|
||||
}
|
||||
|
||||
// this function returns most recent Atime among cached part files.
|
||||
lastAtimeFn := func(partNums []int, pathPrefix string) time.Time {
|
||||
lastATime := timeSentinel
|
||||
for _, pnum := range partNums {
|
||||
fname := pathJoin(pathPrefix, fmt.Sprintf("%s.%d", cacheDataFilePrefix, pnum))
|
||||
if fi, err := os.Stat(fname); err == nil {
|
||||
if atime.Get(fi).After(lastATime) {
|
||||
lastATime = atime.Get(fi)
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(partNums) == 0 {
|
||||
fname := pathJoin(pathPrefix, cacheDataFile)
|
||||
if fi, err := os.Stat(fname); err == nil {
|
||||
lastATime = atime.Get(fi)
|
||||
}
|
||||
}
|
||||
return lastATime
|
||||
}
|
||||
|
||||
filterFn := func(name string, typ os.FileMode) error {
|
||||
if name == minioMetaBucket {
|
||||
// Proceed to next file.
|
||||
|
@ -334,9 +373,10 @@ func (c *diskCache) purge(ctx context.Context) {
|
|||
// Proceed to next file.
|
||||
return nil
|
||||
}
|
||||
|
||||
// stat all cached file ranges and cacheDataFile.
|
||||
cachedFiles := fiStatFn(meta.Ranges, cacheDataFile, pathJoin(c.dir, name))
|
||||
// get last access time of cache part files
|
||||
lastAtime := lastAtimeFn(meta.PartNumbers, pathJoin(c.dir, name))
|
||||
// stat all cached file ranges.
|
||||
cachedRngFiles := fiStatRangesFn(meta.Ranges, pathJoin(c.dir, name))
|
||||
objInfo := meta.ToObjectInfo("", "")
|
||||
// prevent gc from clearing un-synced commits. This metadata is present when
|
||||
// cache writeback commit setting is enabled.
|
||||
|
@ -345,7 +385,27 @@ func (c *diskCache) purge(ctx context.Context) {
|
|||
return nil
|
||||
}
|
||||
cc := cacheControlOpts(objInfo)
|
||||
for fname, fi := range cachedFiles {
|
||||
switch {
|
||||
case cc != nil:
|
||||
if cc.isStale(objInfo.ModTime) {
|
||||
if err = removeAll(cacheDir); err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
scorer.adjustSaveBytes(-objInfo.Size)
|
||||
// break early if sufficient disk space reclaimed.
|
||||
if c.diskUsageLow() {
|
||||
// if we found disk usage is already low, we return nil filtering is complete.
|
||||
return errDoneForNow
|
||||
}
|
||||
}
|
||||
case lastAtime != timeSentinel:
|
||||
// cached multipart or single part
|
||||
objInfo.AccTime = lastAtime
|
||||
objInfo.Name = pathJoin(c.dir, name, cacheDataFile)
|
||||
scorer.addFileWithObjInfo(objInfo, numHits)
|
||||
}
|
||||
|
||||
for fname, fi := range cachedRngFiles {
|
||||
if cc != nil {
|
||||
if cc.isStale(objInfo.ModTime) {
|
||||
if err = removeAll(fname); err != nil {
|
||||
|
@ -365,7 +425,7 @@ func (c *diskCache) purge(ctx context.Context) {
|
|||
}
|
||||
// clean up stale cache.json files for objects that never got cached but access count was maintained in cache.json
|
||||
fi, err := os.Stat(pathJoin(cacheDir, cacheMetaJSONFile))
|
||||
if err != nil || (fi.ModTime().Before(expiry) && len(cachedFiles) == 0) {
|
||||
if err != nil || (fi.ModTime().Before(expiry) && len(cachedRngFiles) == 0) {
|
||||
removeAll(cacheDir)
|
||||
scorer.adjustSaveBytes(-fi.Size())
|
||||
// Proceed to next file.
|
||||
|
@ -581,10 +641,13 @@ func (c *diskCache) saveMetadata(ctx context.Context, bucket, object string, met
|
|||
if m.Meta == nil {
|
||||
m.Meta = make(map[string]string)
|
||||
}
|
||||
// save etag in m.Meta if missing
|
||||
if _, ok := m.Meta["etag"]; !ok {
|
||||
if etag, ok := meta["etag"]; ok {
|
||||
m.Meta["etag"] = etag
|
||||
}
|
||||
}
|
||||
}
|
||||
m.Hits++
|
||||
|
||||
m.Checksum = CacheChecksumInfoV1{Algorithm: HighwayHash256S.String(), Blocksize: cacheBlkSize}
|
||||
|
@ -639,7 +702,7 @@ func (c *diskCache) bitrotWriteToCache(cachePath, fileName string, reader io.Rea
|
|||
}
|
||||
hashBytes := h.Sum(nil)
|
||||
// compute md5Hash of original data stream if writeback commit to cache
|
||||
if c.commitWriteback {
|
||||
if c.commitWriteback || c.commitWritethrough {
|
||||
if _, err = md5Hash.Write((*bufp)[:n]); err != nil {
|
||||
return 0, "", err
|
||||
}
|
||||
|
@ -655,8 +718,9 @@ func (c *diskCache) bitrotWriteToCache(cachePath, fileName string, reader io.Rea
|
|||
break
|
||||
}
|
||||
}
|
||||
md5sumCurr := md5Hash.Sum(nil)
|
||||
|
||||
return bytesWritten, base64.StdEncoding.EncodeToString(md5Hash.Sum(nil)), nil
|
||||
return bytesWritten, base64.StdEncoding.EncodeToString(md5sumCurr), nil
|
||||
}
|
||||
|
||||
func newCacheEncryptReader(content io.Reader, bucket, object string, metadata map[string]string) (r io.Reader, err error) {
|
||||
|
@ -897,7 +961,7 @@ func (c *diskCache) bitrotReadFromCache(ctx context.Context, filePath string, of
|
|||
return err
|
||||
}
|
||||
|
||||
if _, err := io.Copy(writer, bytes.NewReader((*bufp)[blockOffset:blockOffset+blockLength])); err != nil {
|
||||
if _, err = io.Copy(writer, bytes.NewReader((*bufp)[blockOffset:blockOffset+blockLength])); err != nil {
|
||||
if err != io.ErrClosedPipe {
|
||||
logger.LogIf(ctx, err)
|
||||
return err
|
||||
|
@ -950,19 +1014,78 @@ func (c *diskCache) Get(ctx context.Context, bucket, object string, rs *HTTPRang
|
|||
gr, gerr := NewGetObjectReaderFromReader(bytes.NewBuffer(nil), objInfo, opts)
|
||||
return gr, numHits, gerr
|
||||
}
|
||||
fn, off, length, nErr := NewGetObjectReader(rs, objInfo, opts)
|
||||
fn, startOffset, length, nErr := NewGetObjectReader(rs, objInfo, opts)
|
||||
if nErr != nil {
|
||||
return nil, numHits, nErr
|
||||
}
|
||||
filePath := pathJoin(cacheObjPath, cacheFile)
|
||||
var totalBytesRead int64
|
||||
|
||||
pr, pw := xioutil.WaitPipe()
|
||||
if len(objInfo.Parts) > 0 {
|
||||
// For negative length read everything.
|
||||
if length < 0 {
|
||||
length = objInfo.Size - startOffset
|
||||
}
|
||||
|
||||
// Reply back invalid range if the input offset and length fall out of range.
|
||||
if startOffset > objInfo.Size || startOffset+length > objInfo.Size {
|
||||
logger.LogIf(ctx, InvalidRange{startOffset, length, objInfo.Size}, logger.Application)
|
||||
return nil, numHits, InvalidRange{startOffset, length, objInfo.Size}
|
||||
}
|
||||
// Get start part index and offset.
|
||||
partIndex, partOffset, err := cacheObjectToPartOffset(objInfo, startOffset)
|
||||
if err != nil {
|
||||
return nil, numHits, InvalidRange{startOffset, length, objInfo.Size}
|
||||
}
|
||||
// Calculate endOffset according to length
|
||||
endOffset := startOffset
|
||||
if length > 0 {
|
||||
endOffset += length - 1
|
||||
}
|
||||
|
||||
// Get last part index to read given length.
|
||||
lastPartIndex, _, err := cacheObjectToPartOffset(objInfo, endOffset)
|
||||
if err != nil {
|
||||
return nil, numHits, InvalidRange{startOffset, length, objInfo.Size}
|
||||
}
|
||||
go func() {
|
||||
err := c.bitrotReadFromCache(ctx, filePath, off, length, pw)
|
||||
for ; partIndex <= lastPartIndex; partIndex++ {
|
||||
if length == totalBytesRead {
|
||||
break
|
||||
}
|
||||
partNumber := objInfo.Parts[partIndex].Number
|
||||
// Save the current part name and size.
|
||||
partSize := objInfo.Parts[partIndex].Size
|
||||
partLength := partSize - partOffset
|
||||
// partLength should be adjusted so that we don't write more data than what was requested.
|
||||
if partLength > (length - totalBytesRead) {
|
||||
partLength = length - totalBytesRead
|
||||
}
|
||||
filePath := pathJoin(cacheObjPath, fmt.Sprintf("part.%d", partNumber))
|
||||
err := c.bitrotReadFromCache(ctx, filePath, partOffset, partLength, pw)
|
||||
if err != nil {
|
||||
removeAll(cacheObjPath)
|
||||
pw.CloseWithError(err)
|
||||
break
|
||||
}
|
||||
totalBytesRead += partLength
|
||||
// partOffset will be valid only for the first part, hence reset it to 0 for
|
||||
// the remaining parts.
|
||||
partOffset = 0
|
||||
} // End of read all parts loop.
|
||||
pr.CloseWithError(err)
|
||||
}()
|
||||
} else {
|
||||
go func() {
|
||||
filePath := pathJoin(cacheObjPath, cacheFile)
|
||||
err := c.bitrotReadFromCache(ctx, filePath, startOffset, length, pw)
|
||||
if err != nil {
|
||||
removeAll(cacheObjPath)
|
||||
}
|
||||
pw.CloseWithError(err)
|
||||
}()
|
||||
}
|
||||
|
||||
// Cleanup function to cause the go routine above to exit, in
|
||||
// case of incomplete read.
|
||||
pipeCloser := func() { pr.CloseWithError(nil) }
|
||||
|
@ -980,7 +1103,6 @@ func (c *diskCache) Get(ctx context.Context, bucket, object string, rs *HTTPRang
|
|||
gr.ObjInfo.Size = objSize
|
||||
}
|
||||
return gr, numHits, nil
|
||||
|
||||
}
|
||||
|
||||
// Deletes the cached object
|
||||
|
@ -1040,3 +1162,397 @@ func (c *diskCache) scanCacheWritebackFailures(ctx context.Context) {
|
|||
return
|
||||
}
|
||||
}
|
||||
|
||||
// NewMultipartUpload caches multipart uploads when writethrough is MINIO_CACHE_COMMIT mode
|
||||
// multiparts are saved in .minio.sys/multipart/cachePath/uploadID dir until finalized. Then the individual parts
|
||||
// are moved from the upload dir to cachePath/ directory.
|
||||
func (c *diskCache) NewMultipartUpload(ctx context.Context, bucket, object, uID string, opts ObjectOptions) (uploadID string, err error) {
|
||||
uploadID = uID
|
||||
if uploadID == "" {
|
||||
return "", InvalidUploadID{
|
||||
Bucket: bucket,
|
||||
Object: object,
|
||||
UploadID: uploadID,
|
||||
}
|
||||
}
|
||||
|
||||
cachePath := getMultipartCacheSHADir(c.dir, bucket, object)
|
||||
uploadIDDir := path.Join(cachePath, uploadID)
|
||||
if err := os.MkdirAll(uploadIDDir, 0777); err != nil {
|
||||
return uploadID, err
|
||||
}
|
||||
metaPath := pathJoin(uploadIDDir, cacheMetaJSONFile)
|
||||
|
||||
f, err := os.OpenFile(metaPath, os.O_RDWR|os.O_CREATE, 0666)
|
||||
if err != nil {
|
||||
return uploadID, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
m := &cacheMeta{
|
||||
Version: cacheMetaVersion,
|
||||
Bucket: bucket,
|
||||
Object: object,
|
||||
}
|
||||
if err := jsonLoad(f, m); err != nil && err != io.EOF {
|
||||
return uploadID, err
|
||||
}
|
||||
|
||||
m.Meta = opts.UserDefined
|
||||
m.Meta[ReservedMetadataPrefix+"Encrypted-Multipart"] = ""
|
||||
|
||||
m.Checksum = CacheChecksumInfoV1{Algorithm: HighwayHash256S.String(), Blocksize: cacheBlkSize}
|
||||
if c.commitWriteback {
|
||||
m.Meta[writeBackStatusHeader] = CommitPending.String()
|
||||
}
|
||||
m.Stat.ModTime = UTCNow()
|
||||
if globalCacheKMS != nil {
|
||||
if _, err := newCacheEncryptMetadata(bucket, object, m.Meta); err != nil {
|
||||
return uploadID, err
|
||||
}
|
||||
}
|
||||
err = jsonSave(f, m)
|
||||
return uploadID, err
|
||||
}
|
||||
|
||||
// PutObjectPart caches part to cache multipart path.
|
||||
func (c *diskCache) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, data io.Reader, size int64, opts ObjectOptions) (partInfo PartInfo, err error) {
|
||||
oi := PartInfo{}
|
||||
if !c.diskSpaceAvailable(size) {
|
||||
io.Copy(ioutil.Discard, data)
|
||||
return oi, errDiskFull
|
||||
}
|
||||
cachePath := getMultipartCacheSHADir(c.dir, bucket, object)
|
||||
uploadIDDir := path.Join(cachePath, uploadID)
|
||||
|
||||
partIDLock := c.NewNSLockFn(pathJoin(uploadIDDir, strconv.Itoa(partID)))
|
||||
lkctx, err := partIDLock.GetLock(ctx, globalOperationTimeout)
|
||||
if err != nil {
|
||||
return oi, err
|
||||
}
|
||||
|
||||
ctx = lkctx.Context()
|
||||
defer partIDLock.Unlock(lkctx.Cancel)
|
||||
meta, _, _, err := c.statCache(ctx, uploadIDDir)
|
||||
// Case where object not yet cached
|
||||
if err != nil {
|
||||
return oi, err
|
||||
}
|
||||
|
||||
if !c.diskSpaceAvailable(size) {
|
||||
return oi, errDiskFull
|
||||
}
|
||||
reader := data
|
||||
var actualSize = uint64(size)
|
||||
if globalCacheKMS != nil {
|
||||
reader, err = newCachePartEncryptReader(ctx, bucket, object, partID, data, size, meta.Meta)
|
||||
if err != nil {
|
||||
return oi, err
|
||||
}
|
||||
actualSize, _ = sio.EncryptedSize(uint64(size))
|
||||
}
|
||||
n, md5sum, err := c.bitrotWriteToCache(uploadIDDir, fmt.Sprintf("part.%d", partID), reader, actualSize)
|
||||
if IsErr(err, baseErrs...) {
|
||||
// take the cache drive offline
|
||||
c.setOffline()
|
||||
}
|
||||
if err != nil {
|
||||
return oi, err
|
||||
}
|
||||
|
||||
if actualSize != uint64(n) {
|
||||
return oi, IncompleteBody{Bucket: bucket, Object: object}
|
||||
}
|
||||
var md5hex string
|
||||
if md5bytes, err := base64.StdEncoding.DecodeString(md5sum); err == nil {
|
||||
md5hex = hex.EncodeToString(md5bytes)
|
||||
}
|
||||
|
||||
pInfo := PartInfo{
|
||||
PartNumber: partID,
|
||||
ETag: md5hex,
|
||||
Size: n,
|
||||
ActualSize: int64(actualSize),
|
||||
LastModified: UTCNow(),
|
||||
}
|
||||
return pInfo, nil
|
||||
}
|
||||
|
||||
// SavePartMetadata saves part upload metadata to uploadID directory on disk cache
|
||||
func (c *diskCache) SavePartMetadata(ctx context.Context, bucket, object, uploadID string, partID int, pinfo PartInfo) error {
|
||||
cachePath := getMultipartCacheSHADir(c.dir, bucket, object)
|
||||
uploadDir := path.Join(cachePath, uploadID)
|
||||
|
||||
// acquire a write lock at upload path to update cache.json
|
||||
uploadLock := c.NewNSLockFn(uploadDir)
|
||||
ulkctx, err := uploadLock.GetLock(ctx, globalOperationTimeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer uploadLock.Unlock(ulkctx.Cancel)
|
||||
|
||||
metaPath := pathJoin(uploadDir, cacheMetaJSONFile)
|
||||
f, err := os.OpenFile(metaPath, os.O_RDWR, 0666)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
m := &cacheMeta{}
|
||||
if err := jsonLoad(f, m); err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
var key []byte
|
||||
var objectEncryptionKey crypto.ObjectKey
|
||||
if globalCacheKMS != nil {
|
||||
// Calculating object encryption key
|
||||
key, err = decryptObjectInfo(key, bucket, object, m.Meta)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
copy(objectEncryptionKey[:], key)
|
||||
pinfo.ETag = hex.EncodeToString(objectEncryptionKey.SealETag([]byte(pinfo.ETag)))
|
||||
|
||||
}
|
||||
|
||||
pIdx := cacheObjPartIndex(m, partID)
|
||||
if pIdx == -1 {
|
||||
m.PartActualSizes = append(m.PartActualSizes, pinfo.ActualSize)
|
||||
m.PartNumbers = append(m.PartNumbers, pinfo.PartNumber)
|
||||
m.PartETags = append(m.PartETags, pinfo.ETag)
|
||||
m.PartSizes = append(m.PartSizes, pinfo.Size)
|
||||
} else {
|
||||
m.PartActualSizes[pIdx] = pinfo.ActualSize
|
||||
m.PartNumbers[pIdx] = pinfo.PartNumber
|
||||
m.PartETags[pIdx] = pinfo.ETag
|
||||
m.PartSizes[pIdx] = pinfo.Size
|
||||
}
|
||||
return jsonSave(f, m)
|
||||
}
|
||||
|
||||
// newCachePartEncryptReader returns encrypted cache part reader, with part data encrypted with part encryption key
|
||||
func newCachePartEncryptReader(ctx context.Context, bucket, object string, partID int, content io.Reader, size int64, metadata map[string]string) (r io.Reader, err error) {
|
||||
var key []byte
|
||||
var objectEncryptionKey, partEncryptionKey crypto.ObjectKey
|
||||
|
||||
// Calculating object encryption key
|
||||
key, err = decryptObjectInfo(key, bucket, object, metadata)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
copy(objectEncryptionKey[:], key)
|
||||
|
||||
partEnckey := objectEncryptionKey.DerivePartKey(uint32(partID))
|
||||
copy(partEncryptionKey[:], partEnckey[:])
|
||||
wantSize := int64(-1)
|
||||
if size >= 0 {
|
||||
info := ObjectInfo{Size: size}
|
||||
wantSize = info.EncryptedSize()
|
||||
}
|
||||
hReader, err := hash.NewReader(content, wantSize, "", "", size)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pReader := NewPutObjReader(hReader)
|
||||
content, err = pReader.WithEncryption(hReader, &partEncryptionKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
reader, err := sio.EncryptReader(content, sio.Config{Key: partEncryptionKey[:], MinVersion: sio.Version20, CipherSuites: fips.CipherSuitesDARE()})
|
||||
if err != nil {
|
||||
return nil, crypto.ErrInvalidCustomerKey
|
||||
}
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
// uploadIDExists returns error if uploadID is not being cached.
|
||||
func (c *diskCache) uploadIDExists(bucket, object, uploadID string) (err error) {
|
||||
mpartCachePath := getMultipartCacheSHADir(c.dir, bucket, object)
|
||||
uploadIDDir := path.Join(mpartCachePath, uploadID)
|
||||
if _, err := os.Stat(uploadIDDir); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CompleteMultipartUpload completes multipart upload on cache. The parts and cache.json are moved from the temporary location in
|
||||
// .minio.sys/multipart/cacheSHA/.. to cacheSHA path after part verification succeeds.
|
||||
func (c *diskCache) CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, roi ObjectInfo, opts ObjectOptions) (oi ObjectInfo, err error) {
|
||||
cachePath := getCacheSHADir(c.dir, bucket, object)
|
||||
cLock := c.NewNSLockFn(cachePath)
|
||||
lkctx, err := cLock.GetLock(ctx, globalOperationTimeout)
|
||||
if err != nil {
|
||||
return oi, err
|
||||
}
|
||||
|
||||
ctx = lkctx.Context()
|
||||
defer cLock.Unlock(lkctx.Cancel)
|
||||
mpartCachePath := getMultipartCacheSHADir(c.dir, bucket, object)
|
||||
uploadIDDir := path.Join(mpartCachePath, uploadID)
|
||||
|
||||
uploadMeta, _, _, uerr := c.statCache(ctx, uploadIDDir)
|
||||
if uerr != nil {
|
||||
return oi, errUploadIDNotFound
|
||||
}
|
||||
|
||||
// Case where object not yet cached
|
||||
// Calculate full object size.
|
||||
var objectSize int64
|
||||
|
||||
// Calculate consolidated actual size.
|
||||
var objectActualSize int64
|
||||
|
||||
var partETags []string
|
||||
partETags, err = decryptCachePartETags(uploadMeta)
|
||||
if err != nil {
|
||||
return oi, err
|
||||
}
|
||||
for i, pi := range uploadedParts {
|
||||
pIdx := cacheObjPartIndex(uploadMeta, pi.PartNumber)
|
||||
if pIdx == -1 {
|
||||
invp := InvalidPart{
|
||||
PartNumber: pi.PartNumber,
|
||||
GotETag: pi.ETag,
|
||||
}
|
||||
return oi, invp
|
||||
}
|
||||
pi.ETag = canonicalizeETag(pi.ETag)
|
||||
if partETags[pIdx] != pi.ETag {
|
||||
invp := InvalidPart{
|
||||
PartNumber: pi.PartNumber,
|
||||
ExpETag: partETags[pIdx],
|
||||
GotETag: pi.ETag,
|
||||
}
|
||||
return oi, invp
|
||||
}
|
||||
// All parts except the last part has to be atleast 5MB.
|
||||
if (i < len(uploadedParts)-1) && !isMinAllowedPartSize(uploadMeta.PartActualSizes[pIdx]) {
|
||||
return oi, PartTooSmall{
|
||||
PartNumber: pi.PartNumber,
|
||||
PartSize: uploadMeta.PartActualSizes[pIdx],
|
||||
PartETag: pi.ETag,
|
||||
}
|
||||
}
|
||||
|
||||
// Save for total object size.
|
||||
objectSize += uploadMeta.PartSizes[pIdx]
|
||||
|
||||
// Save the consolidated actual size.
|
||||
objectActualSize += uploadMeta.PartActualSizes[pIdx]
|
||||
|
||||
}
|
||||
uploadMeta.Stat.Size = objectSize
|
||||
uploadMeta.Stat.ModTime = roi.ModTime
|
||||
// if encrypted - make sure ETag updated
|
||||
|
||||
uploadMeta.Meta["etag"] = roi.ETag
|
||||
uploadMeta.Meta[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(objectActualSize, 10)
|
||||
var cpartETags []string
|
||||
var cpartNums []int
|
||||
var cpartSizes, cpartActualSizes []int64
|
||||
for _, pi := range uploadedParts {
|
||||
pIdx := cacheObjPartIndex(uploadMeta, pi.PartNumber)
|
||||
if pIdx != -1 {
|
||||
cpartETags = append(cpartETags, uploadMeta.PartETags[pIdx])
|
||||
cpartNums = append(cpartNums, uploadMeta.PartNumbers[pIdx])
|
||||
cpartSizes = append(cpartSizes, uploadMeta.PartSizes[pIdx])
|
||||
cpartActualSizes = append(cpartActualSizes, uploadMeta.PartActualSizes[pIdx])
|
||||
}
|
||||
}
|
||||
uploadMeta.PartETags = cpartETags
|
||||
uploadMeta.PartSizes = cpartSizes
|
||||
uploadMeta.PartActualSizes = cpartActualSizes
|
||||
uploadMeta.PartNumbers = cpartNums
|
||||
uploadMeta.Hits++
|
||||
metaPath := pathJoin(uploadIDDir, cacheMetaJSONFile)
|
||||
|
||||
f, err := os.OpenFile(metaPath, os.O_RDWR|os.O_CREATE, 0666)
|
||||
if err != nil {
|
||||
return oi, err
|
||||
}
|
||||
defer f.Close()
|
||||
jsonSave(f, uploadMeta)
|
||||
for _, pi := range uploadedParts {
|
||||
part := fmt.Sprintf("part.%d", pi.PartNumber)
|
||||
renameAll(pathJoin(uploadIDDir, part), pathJoin(cachePath, part))
|
||||
}
|
||||
renameAll(pathJoin(uploadIDDir, cacheMetaJSONFile), pathJoin(cachePath, cacheMetaJSONFile))
|
||||
removeAll(uploadIDDir) // clean up any unused parts in the uploadIDDir
|
||||
return uploadMeta.ToObjectInfo(bucket, object), nil
|
||||
}
|
||||
|
||||
func (c *diskCache) AbortUpload(bucket, object, uploadID string) (err error) {
|
||||
mpartCachePath := getMultipartCacheSHADir(c.dir, bucket, object)
|
||||
uploadDir := path.Join(mpartCachePath, uploadID)
|
||||
return removeAll(uploadDir)
|
||||
}
|
||||
|
||||
// cacheObjPartIndex - returns the index of matching object part number.
|
||||
func cacheObjPartIndex(m *cacheMeta, partNumber int) int {
|
||||
for i, part := range m.PartNumbers {
|
||||
if partNumber == part {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// cacheObjectToPartOffset calculates part index and part offset for requested offset for content on cache.
|
||||
func cacheObjectToPartOffset(objInfo ObjectInfo, offset int64) (partIndex int, partOffset int64, err error) {
|
||||
if offset == 0 {
|
||||
// Special case - if offset is 0, then partIndex and partOffset are always 0.
|
||||
return 0, 0, nil
|
||||
}
|
||||
partOffset = offset
|
||||
// Seek until object offset maps to a particular part offset.
|
||||
for i, part := range objInfo.Parts {
|
||||
partIndex = i
|
||||
// Offset is smaller than size we have reached the proper part offset.
|
||||
if partOffset < part.Size {
|
||||
return partIndex, partOffset, nil
|
||||
}
|
||||
// Continue to towards the next part.
|
||||
partOffset -= part.Size
|
||||
}
|
||||
// Offset beyond the size of the object return InvalidRange.
|
||||
return 0, 0, InvalidRange{}
|
||||
}
|
||||
|
||||
// get path of on-going multipart caching
|
||||
func getMultipartCacheSHADir(dir, bucket, object string) string {
|
||||
return pathJoin(dir, minioMetaBucket, cacheMultipartDir, getSHA256Hash([]byte(pathJoin(bucket, object))))
|
||||
}
|
||||
|
||||
// clean up stale cache multipart uploads according to cleanup interval.
|
||||
func (c *diskCache) cleanupStaleUploads(ctx context.Context) {
|
||||
if !c.commitWritethrough {
|
||||
return
|
||||
}
|
||||
timer := time.NewTimer(cacheStaleUploadCleanupInterval)
|
||||
defer timer.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-timer.C:
|
||||
// Reset for the next interval
|
||||
timer.Reset(cacheStaleUploadCleanupInterval)
|
||||
now := time.Now()
|
||||
readDirFn(pathJoin(c.dir, minioMetaBucket, cacheMultipartDir), func(shaDir string, typ os.FileMode) error {
|
||||
return readDirFn(pathJoin(c.dir, minioMetaBucket, cacheMultipartDir, shaDir), func(uploadIDDir string, typ os.FileMode) error {
|
||||
uploadIDPath := pathJoin(c.dir, minioMetaBucket, cacheMultipartDir, shaDir, uploadIDDir)
|
||||
fi, err := os.Stat(uploadIDPath)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
if now.Sub(fi.ModTime()) > cacheStaleUploadExpiry {
|
||||
removeAll(uploadIDPath)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -246,6 +246,9 @@ func decryptCacheObjectETag(info *ObjectInfo) error {
|
|||
if globalCacheKMS == nil {
|
||||
return errKMSNotConfigured
|
||||
}
|
||||
if len(info.Parts) > 0 { // multipart ETag is not encrypted since it is not md5sum
|
||||
return nil
|
||||
}
|
||||
keyID, kmsKey, sealedKey, err := crypto.S3.ParseMetadata(info.UserDefined)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -271,6 +274,43 @@ func decryptCacheObjectETag(info *ObjectInfo) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// decryptCacheObjectETag tries to decrypt the ETag saved in encrypted format using the cache KMS
|
||||
func decryptCachePartETags(c *cacheMeta) ([]string, error) {
|
||||
var partETags []string
|
||||
encrypted := crypto.S3.IsEncrypted(c.Meta) && isCacheEncrypted(c.Meta)
|
||||
|
||||
switch {
|
||||
case encrypted:
|
||||
if globalCacheKMS == nil {
|
||||
return partETags, errKMSNotConfigured
|
||||
}
|
||||
keyID, kmsKey, sealedKey, err := crypto.S3.ParseMetadata(c.Meta)
|
||||
if err != nil {
|
||||
return partETags, err
|
||||
}
|
||||
extKey, err := globalCacheKMS.DecryptKey(keyID, kmsKey, kms.Context{c.Bucket: path.Join(c.Bucket, c.Object)})
|
||||
if err != nil {
|
||||
return partETags, err
|
||||
}
|
||||
var objectKey crypto.ObjectKey
|
||||
if err = objectKey.Unseal(extKey, sealedKey, crypto.S3.String(), c.Bucket, c.Object); err != nil {
|
||||
return partETags, err
|
||||
}
|
||||
for i := range c.PartETags {
|
||||
etagStr := tryDecryptETag(objectKey[:], c.PartETags[i], false)
|
||||
// backend ETag was hex encoded before encrypting, so hex decode to get actual ETag
|
||||
etag, err := hex.DecodeString(etagStr)
|
||||
if err != nil {
|
||||
return []string{}, err
|
||||
}
|
||||
partETags = append(partETags, string(etag))
|
||||
}
|
||||
return partETags, nil
|
||||
default:
|
||||
return c.PartETags, nil
|
||||
}
|
||||
}
|
||||
|
||||
func isMetadataSame(m1, m2 map[string]string) bool {
|
||||
if m1 == nil && m2 == nil {
|
||||
return true
|
||||
|
@ -506,3 +546,38 @@ func bytesToClear(total, free int64, quotaPct, lowWatermark, highWatermark uint6
|
|||
lowWMUsage := total * (int64)(lowWatermark*quotaPct) / (100 * 100)
|
||||
return (uint64)(math.Min(float64(quotaAllowed), math.Max(0.0, float64(used-lowWMUsage))))
|
||||
}
|
||||
|
||||
type multiWriter struct {
|
||||
backendWriter io.Writer
|
||||
cacheWriter *io.PipeWriter
|
||||
pipeClosed bool
|
||||
}
|
||||
|
||||
// multiWriter writes to backend and cache - if cache write
|
||||
// fails close the pipe, but continue writing to the backend
|
||||
func (t *multiWriter) Write(p []byte) (n int, err error) {
|
||||
n, err = t.backendWriter.Write(p)
|
||||
if err == nil && n != len(p) {
|
||||
err = io.ErrShortWrite
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
if !t.pipeClosed {
|
||||
t.cacheWriter.CloseWithError(err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// ignore errors writing to cache
|
||||
if !t.pipeClosed {
|
||||
_, cerr := t.cacheWriter.Write(p)
|
||||
if cerr != nil {
|
||||
t.pipeClosed = true
|
||||
t.cacheWriter.CloseWithError(cerr)
|
||||
}
|
||||
}
|
||||
return len(p), nil
|
||||
}
|
||||
func cacheMultiWriter(w1 io.Writer, w2 *io.PipeWriter) io.Writer {
|
||||
return &multiWriter{backendWriter: w1, cacheWriter: w2}
|
||||
}
|
||||
|
|
|
@ -59,6 +59,13 @@ const (
|
|||
CommitFailed cacheCommitStatus = "failed"
|
||||
)
|
||||
|
||||
const (
|
||||
// CommitWriteBack allows staging and write back of cached content for single object uploads
|
||||
CommitWriteBack string = "writeback"
|
||||
// CommitWriteThrough allows caching multipart uploads to disk synchronously
|
||||
CommitWriteThrough string = "writethrough"
|
||||
)
|
||||
|
||||
// String returns string representation of status
|
||||
func (s cacheCommitStatus) String() string {
|
||||
return string(s)
|
||||
|
@ -80,6 +87,13 @@ type CacheObjectLayer interface {
|
|||
DeleteObjects(ctx context.Context, bucket string, objects []ObjectToDelete, opts ObjectOptions) ([]DeletedObject, []error)
|
||||
PutObject(ctx context.Context, bucket, object string, data *PutObjReader, opts ObjectOptions) (objInfo ObjectInfo, err error)
|
||||
CopyObject(ctx context.Context, srcBucket, srcObject, destBucket, destObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (objInfo ObjectInfo, err error)
|
||||
// Multipart operations.
|
||||
NewMultipartUpload(ctx context.Context, bucket, object string, opts ObjectOptions) (uploadID string, err error)
|
||||
PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, data *PutObjReader, opts ObjectOptions) (info PartInfo, err error)
|
||||
AbortMultipartUpload(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) error
|
||||
CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (objInfo ObjectInfo, err error)
|
||||
CopyObjectPart(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int, startOffset int64, length int64, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (pi PartInfo, e error)
|
||||
|
||||
// Storage operations.
|
||||
StorageInfo(ctx context.Context) CacheStorageInfo
|
||||
CacheStats() *CacheStats
|
||||
|
@ -95,10 +109,10 @@ type cacheObjects struct {
|
|||
after int
|
||||
// commit objects in async manner
|
||||
commitWriteback bool
|
||||
commitWritethrough bool
|
||||
|
||||
// if true migration is in progress from v1 to v2
|
||||
migrating bool
|
||||
// mutex to protect migration bool
|
||||
migMutex sync.Mutex
|
||||
// retry queue for writeback cache mode to reattempt upload to backend
|
||||
wbRetryCh chan ObjectInfo
|
||||
// Cache stats
|
||||
|
@ -109,6 +123,11 @@ type cacheObjects struct {
|
|||
InnerDeleteObjectFn func(ctx context.Context, bucket, object string, opts ObjectOptions) (objInfo ObjectInfo, err error)
|
||||
InnerPutObjectFn func(ctx context.Context, bucket, object string, data *PutObjReader, opts ObjectOptions) (objInfo ObjectInfo, err error)
|
||||
InnerCopyObjectFn func(ctx context.Context, srcBucket, srcObject, destBucket, destObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (objInfo ObjectInfo, err error)
|
||||
InnerNewMultipartUploadFn func(ctx context.Context, bucket, object string, opts ObjectOptions) (uploadID string, err error)
|
||||
InnerPutObjectPartFn func(ctx context.Context, bucket, object, uploadID string, partID int, data *PutObjReader, opts ObjectOptions) (info PartInfo, err error)
|
||||
InnerAbortMultipartUploadFn func(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) error
|
||||
InnerCompleteMultipartUploadFn func(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (objInfo ObjectInfo, err error)
|
||||
InnerCopyObjectPartFn func(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int, startOffset int64, length int64, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (pi PartInfo, e error)
|
||||
}
|
||||
|
||||
func (c *cacheObjects) incHitsToMeta(ctx context.Context, dcache *diskCache, bucket, object string, size int64, eTag string, rs *HTTPRangeSpec) error {
|
||||
|
@ -488,8 +507,6 @@ func (c *cacheObjects) CacheStats() (cs *CacheStats) {
|
|||
|
||||
// skipCache() returns true if cache migration is in progress
|
||||
func (c *cacheObjects) skipCache() bool {
|
||||
c.migMutex.Lock()
|
||||
defer c.migMutex.Unlock()
|
||||
return c.migrating
|
||||
}
|
||||
|
||||
|
@ -619,8 +636,6 @@ func (c *cacheObjects) migrateCacheFromV1toV2(ctx context.Context) {
|
|||
}
|
||||
|
||||
// update migration status
|
||||
c.migMutex.Lock()
|
||||
defer c.migMutex.Unlock()
|
||||
c.migrating = false
|
||||
logStartupMessage(color.Blue("Cache migration completed successfully."))
|
||||
}
|
||||
|
@ -763,8 +778,9 @@ func newServerCacheObjects(ctx context.Context, config cache.Config) (CacheObjec
|
|||
exclude: config.Exclude,
|
||||
after: config.After,
|
||||
migrating: migrateSw,
|
||||
migMutex: sync.Mutex{},
|
||||
commitWriteback: config.CommitWriteback,
|
||||
commitWriteback: config.CacheCommitMode == CommitWriteBack,
|
||||
commitWritethrough: config.CacheCommitMode == CommitWriteThrough,
|
||||
|
||||
cacheStats: newCacheStats(),
|
||||
InnerGetObjectInfoFn: func(ctx context.Context, bucket, object string, opts ObjectOptions) (ObjectInfo, error) {
|
||||
return newObjectLayerFn().GetObjectInfo(ctx, bucket, object, opts)
|
||||
|
@ -781,6 +797,21 @@ func newServerCacheObjects(ctx context.Context, config cache.Config) (CacheObjec
|
|||
InnerCopyObjectFn: func(ctx context.Context, srcBucket, srcObject, destBucket, destObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (objInfo ObjectInfo, err error) {
|
||||
return newObjectLayerFn().CopyObject(ctx, srcBucket, srcObject, destBucket, destObject, srcInfo, srcOpts, dstOpts)
|
||||
},
|
||||
InnerNewMultipartUploadFn: func(ctx context.Context, bucket, object string, opts ObjectOptions) (uploadID string, err error) {
|
||||
return newObjectLayerFn().NewMultipartUpload(ctx, bucket, object, opts)
|
||||
},
|
||||
InnerPutObjectPartFn: func(ctx context.Context, bucket, object, uploadID string, partID int, data *PutObjReader, opts ObjectOptions) (info PartInfo, err error) {
|
||||
return newObjectLayerFn().PutObjectPart(ctx, bucket, object, uploadID, partID, data, opts)
|
||||
},
|
||||
InnerAbortMultipartUploadFn: func(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) error {
|
||||
return newObjectLayerFn().AbortMultipartUpload(ctx, bucket, object, uploadID, opts)
|
||||
},
|
||||
InnerCompleteMultipartUploadFn: func(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (objInfo ObjectInfo, err error) {
|
||||
return newObjectLayerFn().CompleteMultipartUpload(ctx, bucket, object, uploadID, uploadedParts, opts)
|
||||
},
|
||||
InnerCopyObjectPartFn: func(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int, startOffset int64, length int64, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (pi PartInfo, e error) {
|
||||
return newObjectLayerFn().CopyObjectPart(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID, startOffset, length, srcInfo, srcOpts, dstOpts)
|
||||
},
|
||||
}
|
||||
c.cacheStats.GetDiskStats = func() []CacheDiskStats {
|
||||
cacheDiskStats := make([]CacheDiskStats, len(c.cache))
|
||||
|
@ -859,3 +890,247 @@ func (c *cacheObjects) queuePendingWriteback(ctx context.Context) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NewMultipartUpload - Starts a new multipart upload operation to backend - if writethrough mode is enabled, starts caching the multipart.
|
||||
func (c *cacheObjects) NewMultipartUpload(ctx context.Context, bucket, object string, opts ObjectOptions) (uploadID string, err error) {
|
||||
newMultipartUploadFn := c.InnerNewMultipartUploadFn
|
||||
dcache, err := c.getCacheToLoc(ctx, bucket, object)
|
||||
if err != nil {
|
||||
// disk cache could not be located,execute backend call.
|
||||
return newMultipartUploadFn(ctx, bucket, object, opts)
|
||||
}
|
||||
if c.skipCache() {
|
||||
return newMultipartUploadFn(ctx, bucket, object, opts)
|
||||
}
|
||||
|
||||
if opts.ServerSideEncryption != nil { // avoid caching encrypted objects
|
||||
dcache.Delete(ctx, bucket, object)
|
||||
return newMultipartUploadFn(ctx, bucket, object, opts)
|
||||
}
|
||||
|
||||
// skip cache for objects with locks
|
||||
objRetention := objectlock.GetObjectRetentionMeta(opts.UserDefined)
|
||||
legalHold := objectlock.GetObjectLegalHoldMeta(opts.UserDefined)
|
||||
if objRetention.Mode.Valid() || legalHold.Status.Valid() {
|
||||
dcache.Delete(ctx, bucket, object)
|
||||
return newMultipartUploadFn(ctx, bucket, object, opts)
|
||||
}
|
||||
|
||||
// fetch from backend if cache exclude pattern or cache-control
|
||||
// directive set to exclude
|
||||
if c.isCacheExclude(bucket, object) {
|
||||
dcache.Delete(ctx, bucket, object)
|
||||
return newMultipartUploadFn(ctx, bucket, object, opts)
|
||||
}
|
||||
if !c.commitWritethrough {
|
||||
return newMultipartUploadFn(ctx, bucket, object, opts)
|
||||
}
|
||||
|
||||
// perform multipart upload on backend and cache simultaneously
|
||||
uploadID, err = newMultipartUploadFn(ctx, bucket, object, opts)
|
||||
dcache.NewMultipartUpload(GlobalContext, bucket, object, uploadID, opts)
|
||||
return uploadID, err
|
||||
}
|
||||
|
||||
// PutObjectPart streams part to cache concurrently if writethrough mode is enabled. Otherwise redirects the call to remote
|
||||
func (c *cacheObjects) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, data *PutObjReader, opts ObjectOptions) (info PartInfo, err error) {
|
||||
putObjectPartFn := c.InnerPutObjectPartFn
|
||||
dcache, err := c.getCacheToLoc(ctx, bucket, object)
|
||||
if err != nil {
|
||||
// disk cache could not be located,execute backend call.
|
||||
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
|
||||
}
|
||||
|
||||
if !c.commitWritethrough {
|
||||
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
|
||||
}
|
||||
if c.skipCache() {
|
||||
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
|
||||
}
|
||||
size := data.Size()
|
||||
|
||||
// avoid caching part if space unavailable
|
||||
if !dcache.diskSpaceAvailable(size) {
|
||||
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
|
||||
}
|
||||
|
||||
if opts.ServerSideEncryption != nil {
|
||||
dcache.Delete(ctx, bucket, object)
|
||||
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
|
||||
}
|
||||
|
||||
// skip cache for objects with locks
|
||||
objRetention := objectlock.GetObjectRetentionMeta(opts.UserDefined)
|
||||
legalHold := objectlock.GetObjectLegalHoldMeta(opts.UserDefined)
|
||||
if objRetention.Mode.Valid() || legalHold.Status.Valid() {
|
||||
dcache.Delete(ctx, bucket, object)
|
||||
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
|
||||
}
|
||||
|
||||
// fetch from backend if cache exclude pattern or cache-control
|
||||
// directive set to exclude
|
||||
if c.isCacheExclude(bucket, object) {
|
||||
dcache.Delete(ctx, bucket, object)
|
||||
return putObjectPartFn(ctx, bucket, object, uploadID, partID, data, opts)
|
||||
}
|
||||
|
||||
info = PartInfo{}
|
||||
// Initialize pipe to stream data to backend
|
||||
pipeReader, pipeWriter := io.Pipe()
|
||||
hashReader, err := hash.NewReader(pipeReader, size, "", "", data.ActualSize())
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
// Initialize pipe to stream data to cache
|
||||
rPipe, wPipe := io.Pipe()
|
||||
pinfoCh := make(chan PartInfo)
|
||||
cinfoCh := make(chan PartInfo)
|
||||
|
||||
errorCh := make(chan error)
|
||||
go func() {
|
||||
info, err = putObjectPartFn(ctx, bucket, object, uploadID, partID, NewPutObjReader(hashReader), opts)
|
||||
if err != nil {
|
||||
close(pinfoCh)
|
||||
pipeReader.CloseWithError(err)
|
||||
rPipe.CloseWithError(err)
|
||||
errorCh <- err
|
||||
return
|
||||
}
|
||||
close(errorCh)
|
||||
pinfoCh <- info
|
||||
}()
|
||||
go func() {
|
||||
pinfo, perr := dcache.PutObjectPart(GlobalContext, bucket, object, uploadID, partID, rPipe, data.Size(), opts)
|
||||
if perr != nil {
|
||||
rPipe.CloseWithError(perr)
|
||||
close(cinfoCh)
|
||||
// clean up upload
|
||||
dcache.AbortUpload(bucket, object, uploadID)
|
||||
return
|
||||
}
|
||||
cinfoCh <- pinfo
|
||||
}()
|
||||
|
||||
mwriter := cacheMultiWriter(pipeWriter, wPipe)
|
||||
_, err = io.Copy(mwriter, data)
|
||||
pipeWriter.Close()
|
||||
wPipe.Close()
|
||||
|
||||
if err != nil {
|
||||
err = <-errorCh
|
||||
return PartInfo{}, err
|
||||
}
|
||||
info = <-pinfoCh
|
||||
cachedInfo := <-cinfoCh
|
||||
if info.PartNumber == cachedInfo.PartNumber {
|
||||
cachedInfo.ETag = info.ETag
|
||||
cachedInfo.LastModified = info.LastModified
|
||||
dcache.SavePartMetadata(GlobalContext, bucket, object, uploadID, partID, cachedInfo)
|
||||
}
|
||||
return info, err
|
||||
}
|
||||
|
||||
// CopyObjectPart behaves similar to PutObjectPart - caches part to upload dir if writethrough mode is enabled.
|
||||
func (c *cacheObjects) CopyObjectPart(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject, uploadID string, partID int, startOffset int64, length int64, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (pi PartInfo, e error) {
|
||||
copyObjectPartFn := c.InnerCopyObjectPartFn
|
||||
dcache, err := c.getCacheToLoc(ctx, dstBucket, dstObject)
|
||||
if err != nil {
|
||||
// disk cache could not be located,execute backend call.
|
||||
return copyObjectPartFn(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID, startOffset, length, srcInfo, srcOpts, dstOpts)
|
||||
}
|
||||
|
||||
if !c.commitWritethrough {
|
||||
return copyObjectPartFn(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID, startOffset, length, srcInfo, srcOpts, dstOpts)
|
||||
}
|
||||
if err := dcache.uploadIDExists(dstBucket, dstObject, uploadID); err != nil {
|
||||
return copyObjectPartFn(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID, startOffset, length, srcInfo, srcOpts, dstOpts)
|
||||
}
|
||||
partInfo, err := copyObjectPartFn(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID, startOffset, length, srcInfo, srcOpts, dstOpts)
|
||||
if err != nil {
|
||||
return pi, toObjectErr(err, dstBucket, dstObject)
|
||||
}
|
||||
go func() {
|
||||
isSuffixLength := false
|
||||
if startOffset < 0 {
|
||||
isSuffixLength = true
|
||||
}
|
||||
|
||||
rs := &HTTPRangeSpec{
|
||||
IsSuffixLength: isSuffixLength,
|
||||
Start: startOffset,
|
||||
End: startOffset + length,
|
||||
}
|
||||
// fill cache in the background
|
||||
bReader, bErr := c.InnerGetObjectNInfoFn(GlobalContext, srcBucket, srcObject, rs, http.Header{}, readLock, ObjectOptions{})
|
||||
if bErr != nil {
|
||||
return
|
||||
}
|
||||
defer bReader.Close()
|
||||
// avoid cache overwrite if another background routine filled cache
|
||||
dcache.PutObjectPart(GlobalContext, dstBucket, dstObject, uploadID, partID, bReader, length, ObjectOptions{UserDefined: getMetadata(bReader.ObjInfo)})
|
||||
}()
|
||||
// Success.
|
||||
return partInfo, nil
|
||||
}
|
||||
|
||||
// CompleteMultipartUpload - completes multipart upload operation on the backend. If writethrough mode is enabled, this also
|
||||
// finalizes the upload saved in cache multipart dir.
|
||||
func (c *cacheObjects) CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (oi ObjectInfo, err error) {
|
||||
completeMultipartUploadFn := c.InnerCompleteMultipartUploadFn
|
||||
if !c.commitWritethrough {
|
||||
return completeMultipartUploadFn(ctx, bucket, object, uploadID, uploadedParts, opts)
|
||||
}
|
||||
dcache, err := c.getCacheToLoc(ctx, bucket, object)
|
||||
if err != nil {
|
||||
// disk cache could not be located,execute backend call.
|
||||
return completeMultipartUploadFn(ctx, bucket, object, uploadID, uploadedParts, opts)
|
||||
}
|
||||
|
||||
// perform multipart upload on backend and cache simultaneously
|
||||
oi, err = completeMultipartUploadFn(ctx, bucket, object, uploadID, uploadedParts, opts)
|
||||
if err == nil {
|
||||
// fill cache in the background
|
||||
go func() {
|
||||
_, err := dcache.CompleteMultipartUpload(ctx, bucket, object, uploadID, uploadedParts, oi, opts)
|
||||
if err != nil {
|
||||
// fill cache in the background
|
||||
bReader, bErr := c.InnerGetObjectNInfoFn(GlobalContext, bucket, object, nil, http.Header{}, readLock, ObjectOptions{})
|
||||
if bErr != nil {
|
||||
return
|
||||
}
|
||||
defer bReader.Close()
|
||||
oi, _, err := dcache.Stat(GlobalContext, bucket, object)
|
||||
// avoid cache overwrite if another background routine filled cache
|
||||
if err != nil || oi.ETag != bReader.ObjInfo.ETag {
|
||||
dcache.Put(GlobalContext, bucket, object, bReader, bReader.ObjInfo.Size, nil, ObjectOptions{UserDefined: getMetadata(bReader.ObjInfo)}, false)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// AbortMultipartUpload - aborts multipart upload on backend and cache.
|
||||
func (c *cacheObjects) AbortMultipartUpload(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) error {
|
||||
abortMultipartUploadFn := c.InnerAbortMultipartUploadFn
|
||||
if !c.commitWritethrough {
|
||||
return abortMultipartUploadFn(ctx, bucket, object, uploadID, opts)
|
||||
}
|
||||
dcache, err := c.getCacheToLoc(ctx, bucket, object)
|
||||
if err != nil {
|
||||
// disk cache could not be located,execute backend call.
|
||||
return abortMultipartUploadFn(ctx, bucket, object, uploadID, opts)
|
||||
}
|
||||
if err = dcache.uploadIDExists(bucket, object, uploadID); err != nil {
|
||||
return toObjectErr(err, bucket, object, uploadID)
|
||||
}
|
||||
|
||||
// execute backend operation
|
||||
err = abortMultipartUploadFn(ctx, bucket, object, uploadID, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// abort multipart upload on cache
|
||||
go dcache.AbortUpload(bucket, object, uploadID)
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -2283,6 +2283,9 @@ func (api objectAPIHandlers) NewMultipartUploadHandler(w http.ResponseWriter, r
|
|||
return
|
||||
}
|
||||
newMultipartUpload := objectAPI.NewMultipartUpload
|
||||
if api.CacheAPI() != nil {
|
||||
newMultipartUpload = api.CacheAPI().NewMultipartUpload
|
||||
}
|
||||
|
||||
uploadID, err := newMultipartUpload(ctx, bucket, object, opts)
|
||||
if err != nil {
|
||||
|
@ -2597,9 +2600,13 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt
|
|||
}
|
||||
|
||||
srcInfo.PutObjReader = pReader
|
||||
copyObjectPart := objectAPI.CopyObjectPart
|
||||
if api.CacheAPI() != nil {
|
||||
copyObjectPart = api.CacheAPI().CopyObjectPart
|
||||
}
|
||||
// Copy source object to destination, if source and destination
|
||||
// object is same then only metadata is updated.
|
||||
partInfo, err := objectAPI.CopyObjectPart(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID,
|
||||
partInfo, err := copyObjectPart(ctx, srcBucket, srcObject, dstBucket, dstObject, uploadID, partID,
|
||||
startOffset, length, srcInfo, srcOpts, dstOpts)
|
||||
if err != nil {
|
||||
writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
|
||||
|
@ -2854,6 +2861,9 @@ func (api objectAPIHandlers) PutObjectPartHandler(w http.ResponseWriter, r *http
|
|||
}
|
||||
|
||||
putObjectPart := objectAPI.PutObjectPart
|
||||
if api.CacheAPI() != nil {
|
||||
putObjectPart = api.CacheAPI().PutObjectPart
|
||||
}
|
||||
|
||||
partInfo, err := putObjectPart(ctx, bucket, object, uploadID, partID, pReader, opts)
|
||||
if err != nil {
|
||||
|
@ -2907,6 +2917,9 @@ func (api objectAPIHandlers) AbortMultipartUploadHandler(w http.ResponseWriter,
|
|||
return
|
||||
}
|
||||
abortMultipartUpload := objectAPI.AbortMultipartUpload
|
||||
if api.CacheAPI() != nil {
|
||||
abortMultipartUpload = api.CacheAPI().AbortMultipartUpload
|
||||
}
|
||||
|
||||
if s3Error := checkRequestAuthType(ctx, r, policy.AbortMultipartUploadAction, bucket, object); s3Error != ErrNone {
|
||||
writeErrorResponse(ctx, w, errorCodes.ToAPIErr(s3Error), r.URL)
|
||||
|
@ -3191,7 +3204,9 @@ func (api objectAPIHandlers) CompleteMultipartUploadHandler(w http.ResponseWrite
|
|||
s3MD5 := getCompleteMultipartMD5(originalCompleteParts)
|
||||
|
||||
completeMultiPartUpload := objectAPI.CompleteMultipartUpload
|
||||
|
||||
if api.CacheAPI() != nil {
|
||||
completeMultiPartUpload = api.CacheAPI().CompleteMultipartUpload
|
||||
}
|
||||
// This code is specifically to handle the requirements for slow
|
||||
// complete multipart upload operations on FS mode.
|
||||
writeErrorResponseWithoutXMLHeader := func(ctx context.Context, w http.ResponseWriter, err APIError, reqURL *url.URL) {
|
||||
|
|
|
@ -16,7 +16,7 @@ minio gateway <name> -h
|
|||
MINIO_CACHE_WATERMARK_LOW: % of cache quota at which cache eviction stops
|
||||
MINIO_CACHE_WATERMARK_HIGH: % of cache quota at which cache eviction starts
|
||||
MINIO_CACHE_RANGE: set to "on" or "off" caching of independent range requests per object, defaults to "on"
|
||||
|
||||
MINIO_CACHE_COMMIT: set to 'writeback' or 'writethrough' for upload caching
|
||||
|
||||
...
|
||||
...
|
||||
|
@ -87,6 +87,10 @@ master key to automatically encrypt all cached content.
|
|||
Note that cache KMS master key is not recommended for use in production deployments. If the MinIO server/gateway machine is ever compromised, the cache KMS master key must also be treated as compromised.
|
||||
Support for external KMS to manage cache KMS keys is on the roadmap,and would be ideal for production use cases.
|
||||
|
||||
- `MINIO_CACHE_COMMIT` setting of `writethrough` allows caching of multipart uploads synchronously if enabled. By default, single PUT operations are already cached on write without any special setting.
|
||||
|
||||
NOTE: `MINIO_CACHE_COMMIT` also has a value of `writeback` which allows staging single uploads in cache before committing to remote. However, for consistency reasons, staging uploads in the cache are not permitted for multipart uploads. Partially cached stale uploads older than 24 hours are automatically cleaned up.
|
||||
|
||||
> NOTE: Expiration happens automatically based on the configured interval as explained above, frequently accessed objects stay alive in cache for a significantly longer time.
|
||||
|
||||
### Crash Recovery
|
||||
|
|
|
@ -39,7 +39,7 @@ type Config struct {
|
|||
WatermarkLow int `json:"watermark_low"`
|
||||
WatermarkHigh int `json:"watermark_high"`
|
||||
Range bool `json:"range"`
|
||||
CommitWriteback bool `json:"-"`
|
||||
CacheCommitMode string `json:"commit_mode"`
|
||||
}
|
||||
|
||||
// UnmarshalJSON - implements JSON unmarshal interface for unmarshalling
|
||||
|
@ -155,12 +155,11 @@ func parseCacheExcludes(excludes string) ([]string, error) {
|
|||
return excludesSlice, nil
|
||||
}
|
||||
|
||||
func parseCacheCommitMode(commitStr string) (bool, error) {
|
||||
func parseCacheCommitMode(commitStr string) (string, error) {
|
||||
switch strings.ToLower(commitStr) {
|
||||
case "writeback":
|
||||
return true, nil
|
||||
case "writethrough":
|
||||
return false, nil
|
||||
case "writeback", "writethrough":
|
||||
return strings.ToLower(commitStr), nil
|
||||
default:
|
||||
return "", config.ErrInvalidCacheCommitValue(nil).Msg("cache commit value must be `writeback` or `writethrough`")
|
||||
}
|
||||
return false, config.ErrInvalidCacheCommitValue(nil).Msg("cache commit value must be `writeback` or `writethrough`")
|
||||
}
|
||||
|
|
|
@ -219,11 +219,11 @@ func LookupConfig(kvs config.KVS) (Config, error) {
|
|||
cfg.Range = rng
|
||||
}
|
||||
if commit := env.Get(EnvCacheCommit, kvs.Get(Commit)); commit != "" {
|
||||
cfg.CommitWriteback, err = parseCacheCommitMode(commit)
|
||||
cfg.CacheCommitMode, err = parseCacheCommitMode(commit)
|
||||
if err != nil {
|
||||
return cfg, err
|
||||
}
|
||||
if cfg.After > 0 && cfg.CommitWriteback {
|
||||
if cfg.After > 0 && cfg.CacheCommitMode != "" {
|
||||
err := errors.New("cache after cannot be used with commit writeback")
|
||||
return cfg, config.ErrInvalidCacheSetting(err)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue