Avoid cache GC of writebacks before commit syncs (#13860)

Save part.1 for writebacks in a separate folder
and move it to cache dir atomically while saving
the cache metadata. This is to avoid GC mistaking
part.1 as orphaned cache entries and purging them.

This PR also fixes object size being overwritten during
retries for write-back mode.
This commit is contained in:
Poorna K 2021-12-08 14:52:31 -08:00 committed by GitHub
parent e82a5c5c54
commit 0a66a6f1e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 78 additions and 25 deletions

View File

@ -59,10 +59,13 @@ const (
cacheExpiryDays = 90 * time.Hour * 24 // defaults to 90 days cacheExpiryDays = 90 * time.Hour * 24 // defaults to 90 days
// SSECacheEncrypted is the metadata key indicating that the object // SSECacheEncrypted is the metadata key indicating that the object
// is a cache entry encrypted with cache KMS master key in globalCacheKMS. // is a cache entry encrypted with cache KMS master key in globalCacheKMS.
SSECacheEncrypted = "X-Minio-Internal-Encrypted-Cache" SSECacheEncrypted = "X-Minio-Internal-Encrypted-Cache"
cacheMultipartDir = "multipart" cacheMultipartDir = "multipart"
cacheWritebackDir = "writeback"
cacheStaleUploadCleanupInterval = time.Hour * 24 cacheStaleUploadCleanupInterval = time.Hour * 24
cacheStaleUploadExpiry = time.Hour * 24 cacheStaleUploadExpiry = time.Hour * 24
cacheWBStaleUploadExpiry = time.Hour * 24 * 7
) )
// CacheChecksumInfoV1 - carries checksums of individual blocks on disk. // CacheChecksumInfoV1 - carries checksums of individual blocks on disk.
@ -105,15 +108,15 @@ func (r *RangeInfo) Empty() bool {
return r.Range == "" && r.File == "" && r.Size == 0 return r.Range == "" && r.File == "" && r.Size == 0
} }
func (m *cacheMeta) ToObjectInfo(bucket, object string) (o ObjectInfo) { func (m *cacheMeta) ToObjectInfo() (o ObjectInfo) {
if len(m.Meta) == 0 { if len(m.Meta) == 0 {
m.Meta = make(map[string]string) m.Meta = make(map[string]string)
m.Stat.ModTime = timeSentinel m.Stat.ModTime = timeSentinel
} }
o = ObjectInfo{ o = ObjectInfo{
Bucket: bucket, Bucket: m.Bucket,
Name: object, Name: m.Object,
CacheStatus: CacheHit, CacheStatus: CacheHit,
CacheLookupStatus: CacheHit, CacheLookupStatus: CacheHit,
} }
@ -381,7 +384,7 @@ func (c *diskCache) purge(ctx context.Context) {
lastAtime := lastAtimeFn(meta.PartNumbers, pathJoin(c.dir, name)) lastAtime := lastAtimeFn(meta.PartNumbers, pathJoin(c.dir, name))
// stat all cached file ranges. // stat all cached file ranges.
cachedRngFiles := fiStatRangesFn(meta.Ranges, pathJoin(c.dir, name)) cachedRngFiles := fiStatRangesFn(meta.Ranges, pathJoin(c.dir, name))
objInfo := meta.ToObjectInfo("", "") objInfo := meta.ToObjectInfo()
// prevent gc from clearing un-synced commits. This metadata is present when // prevent gc from clearing un-synced commits. This metadata is present when
// cache writeback commit setting is enabled. // cache writeback commit setting is enabled.
status, ok := objInfo.UserDefined[writeBackStatusHeader] status, ok := objInfo.UserDefined[writeBackStatusHeader]
@ -408,6 +411,9 @@ func (c *diskCache) purge(ctx context.Context) {
} }
for fname, fi := range cachedRngFiles { for fname, fi := range cachedRngFiles {
if fi == nil {
continue
}
if cc != nil { if cc != nil {
if cc.isStale(objInfo.ModTime) { if cc.isStale(objInfo.ModTime) {
removeAll(fname) removeAll(fname)
@ -425,9 +431,11 @@ func (c *diskCache) purge(ctx context.Context) {
} }
// clean up stale cache.json files for objects that never got cached but access count was maintained in cache.json // clean up stale cache.json files for objects that never got cached but access count was maintained in cache.json
fi, err := os.Stat(pathJoin(cacheDir, cacheMetaJSONFile)) fi, err := os.Stat(pathJoin(cacheDir, cacheMetaJSONFile))
if err != nil || (fi.ModTime().Before(expiry) && len(cachedRngFiles) == 0) { if err != nil || (fi != nil && fi.ModTime().Before(expiry) && len(cachedRngFiles) == 0) {
removeAll(cacheDir) removeAll(cacheDir)
scorer.adjustSaveBytes(-fi.Size()) if fi != nil {
scorer.adjustSaveBytes(-fi.Size())
}
// Proceed to next file. // Proceed to next file.
return nil return nil
} }
@ -486,7 +494,7 @@ func (c *diskCache) Stat(ctx context.Context, bucket, object string) (oi ObjectI
if partial { if partial {
return oi, numHits, errFileNotFound return oi, numHits, errFileNotFound
} }
oi = meta.ToObjectInfo("", "") oi = meta.ToObjectInfo()
oi.Bucket = bucket oi.Bucket = bucket
oi.Name = object oi.Name = object
@ -521,7 +529,7 @@ func (c *diskCache) statRange(ctx context.Context, bucket, object string, rs *HT
return return
} }
oi = meta.ToObjectInfo("", "") oi = meta.ToObjectInfo()
oi.Bucket = bucket oi.Bucket = bucket
oi.Name = object oi.Name = object
if !partial { if !partial {
@ -573,12 +581,16 @@ func (c *diskCache) statCache(ctx context.Context, cacheObjPath string) (meta *c
if _, err := os.Stat(pathJoin(cacheObjPath, cacheDataFile)); err == nil { if _, err := os.Stat(pathJoin(cacheObjPath, cacheDataFile)); err == nil {
partial = false partial = false
} }
if writebackInProgress(meta.Meta) {
partial = false
}
return meta, partial, meta.Hits, nil return meta, partial, meta.Hits, nil
} }
// saves object metadata to disk cache // saves object metadata to disk cache
// incHitsOnly is true if metadata update is incrementing only the hit counter // incHitsOnly is true if metadata update is incrementing only the hit counter
func (c *diskCache) SaveMetadata(ctx context.Context, bucket, object string, meta map[string]string, actualSize int64, rs *HTTPRangeSpec, rsFileName string, incHitsOnly bool) error { // finalizeWB is true only if metadata update accompanied by moving part from temp location to cache dir.
func (c *diskCache) SaveMetadata(ctx context.Context, bucket, object string, meta map[string]string, actualSize int64, rs *HTTPRangeSpec, rsFileName string, incHitsOnly, finalizeWB bool) error {
cachedPath := getCacheSHADir(c.dir, bucket, object) cachedPath := getCacheSHADir(c.dir, bucket, object)
cLock := c.NewNSLockFn(cachedPath) cLock := c.NewNSLockFn(cachedPath)
lkctx, err := cLock.GetLock(ctx, globalOperationTimeout) lkctx, err := cLock.GetLock(ctx, globalOperationTimeout)
@ -587,7 +599,18 @@ func (c *diskCache) SaveMetadata(ctx context.Context, bucket, object string, met
} }
ctx = lkctx.Context() ctx = lkctx.Context()
defer cLock.Unlock(lkctx.Cancel) defer cLock.Unlock(lkctx.Cancel)
return c.saveMetadata(ctx, bucket, object, meta, actualSize, rs, rsFileName, incHitsOnly) if err = c.saveMetadata(ctx, bucket, object, meta, actualSize, rs, rsFileName, incHitsOnly); err != nil {
return err
}
// move part saved in writeback directory and cache.json atomically
if finalizeWB {
wbdir := getCacheWriteBackSHADir(c.dir, bucket, object)
if err = renameAll(pathJoin(wbdir, cacheDataFile), pathJoin(cachedPath, cacheDataFile)); err != nil {
return err
}
removeAll(wbdir) // cleanup writeback/shadir
}
return nil
} }
// saves object metadata to disk cache // saves object metadata to disk cache
@ -702,6 +725,11 @@ func getCacheSHADir(dir, bucket, object string) string {
return pathJoin(dir, getSHA256Hash([]byte(pathJoin(bucket, object)))) return pathJoin(dir, getSHA256Hash([]byte(pathJoin(bucket, object))))
} }
// returns temporary writeback cache location.
func getCacheWriteBackSHADir(dir, bucket, object string) string {
return pathJoin(dir, minioMetaBucket, "writeback", getSHA256Hash([]byte(pathJoin(bucket, object))))
}
// Cache data to disk with bitrot checksum added for each block of 1MB // Cache data to disk with bitrot checksum added for each block of 1MB
func (c *diskCache) bitrotWriteToCache(cachePath, fileName string, reader io.Reader, size uint64) (int64, string, error) { func (c *diskCache) bitrotWriteToCache(cachePath, fileName string, reader io.Reader, size uint64) (int64, string, error) {
if err := os.MkdirAll(cachePath, 0777); err != nil { if err := os.MkdirAll(cachePath, 0777); err != nil {
@ -846,6 +874,11 @@ func (c *diskCache) put(ctx context.Context, bucket, object string, data io.Read
if !c.diskSpaceAvailable(size) { if !c.diskSpaceAvailable(size) {
return oi, errDiskFull return oi, errDiskFull
} }
if writeback {
cachePath = getCacheWriteBackSHADir(c.dir, bucket, object)
}
if err := os.MkdirAll(cachePath, 0777); err != nil { if err := os.MkdirAll(cachePath, 0777); err != nil {
return oi, err return oi, err
} }
@ -1131,6 +1164,9 @@ func (c *diskCache) Get(ctx context.Context, bucket, object string, rs *HTTPRang
}() }()
} else { } else {
go func() { go func() {
if writebackInProgress(objInfo.UserDefined) {
cacheObjPath = getCacheWriteBackSHADir(c.dir, bucket, object)
}
filePath := pathJoin(cacheObjPath, cacheFile) filePath := pathJoin(cacheObjPath, cacheFile)
err := c.bitrotReadFromCache(ctx, filePath, startOffset, length, pw) err := c.bitrotReadFromCache(ctx, filePath, startOffset, length, pw)
if err != nil { if err != nil {
@ -1199,7 +1235,7 @@ func (c *diskCache) scanCacheWritebackFailures(ctx context.Context) {
return nil return nil
} }
objInfo := meta.ToObjectInfo("", "") objInfo := meta.ToObjectInfo()
status, ok := objInfo.UserDefined[writeBackStatusHeader] status, ok := objInfo.UserDefined[writeBackStatusHeader]
if !ok || status == CommitComplete.String() { if !ok || status == CommitComplete.String() {
return nil return nil
@ -1497,6 +1533,8 @@ func (c *diskCache) CompleteMultipartUpload(ctx context.Context, bucket, object,
} }
uploadMeta.Stat.Size = objectSize uploadMeta.Stat.Size = objectSize
uploadMeta.Stat.ModTime = roi.ModTime uploadMeta.Stat.ModTime = roi.ModTime
uploadMeta.Bucket = bucket
uploadMeta.Object = object
// if encrypted - make sure ETag updated // if encrypted - make sure ETag updated
uploadMeta.Meta["etag"] = roi.ETag uploadMeta.Meta["etag"] = roi.ETag
@ -1532,7 +1570,7 @@ func (c *diskCache) CompleteMultipartUpload(ctx context.Context, bucket, object,
} }
renameAll(pathJoin(uploadIDDir, cacheMetaJSONFile), pathJoin(cachePath, cacheMetaJSONFile)) renameAll(pathJoin(uploadIDDir, cacheMetaJSONFile), pathJoin(cachePath, cacheMetaJSONFile))
removeAll(uploadIDDir) // clean up any unused parts in the uploadIDDir removeAll(uploadIDDir) // clean up any unused parts in the uploadIDDir
return uploadMeta.ToObjectInfo(bucket, object), nil return uploadMeta.ToObjectInfo(), nil
} }
func (c *diskCache) AbortUpload(bucket, object, uploadID string) (err error) { func (c *diskCache) AbortUpload(bucket, object, uploadID string) (err error) {
@ -1579,9 +1617,6 @@ func getMultipartCacheSHADir(dir, bucket, object string) string {
// clean up stale cache multipart uploads according to cleanup interval. // clean up stale cache multipart uploads according to cleanup interval.
func (c *diskCache) cleanupStaleUploads(ctx context.Context) { func (c *diskCache) cleanupStaleUploads(ctx context.Context) {
if !c.commitWritethrough {
return
}
timer := time.NewTimer(cacheStaleUploadCleanupInterval) timer := time.NewTimer(cacheStaleUploadCleanupInterval)
defer timer.Stop() defer timer.Stop()
for { for {
@ -1605,6 +1640,22 @@ func (c *diskCache) cleanupStaleUploads(ctx context.Context) {
return nil return nil
}) })
}) })
// clean up of writeback folder where cache.json no longer exists in the main c.dir/<sha256(bucket,object> path
// and if past upload expiry window.
readDirFn(pathJoin(c.dir, minioMetaBucket, cacheWritebackDir), func(shaDir string, typ os.FileMode) error {
wbdir := pathJoin(c.dir, minioMetaBucket, cacheWritebackDir, shaDir)
cachedir := pathJoin(c.dir, shaDir)
if _, err := os.Stat(cachedir); os.IsNotExist(err) {
fi, err := os.Stat(wbdir)
if err != nil {
return nil
}
if now.Sub(fi.ModTime()) > cacheWBStaleUploadExpiry {
return removeAll(wbdir)
}
}
return nil
})
} }
} }
} }

View File

@ -585,8 +585,8 @@ func cacheMultiWriter(w1 io.Writer, w2 *io.PipeWriter) io.Writer {
return &multiWriter{backendWriter: w1, cacheWriter: w2} return &multiWriter{backendWriter: w1, cacheWriter: w2}
} }
// skipETagVerification returns true if writeback commit is not complete // writebackInProgress returns true if writeback commit is not complete
func skipETagVerification(m map[string]string) bool { func writebackInProgress(m map[string]string) bool {
if v, ok := m[writeBackStatusHeader]; ok { if v, ok := m[writeBackStatusHeader]; ok {
switch cacheCommitStatus(v) { switch cacheCommitStatus(v) {
case CommitPending, CommitFailed: case CommitPending, CommitFailed:

View File

@ -132,7 +132,7 @@ type cacheObjects struct {
func (c *cacheObjects) incHitsToMeta(ctx context.Context, dcache *diskCache, bucket, object string, size int64, eTag string, rs *HTTPRangeSpec) error { func (c *cacheObjects) incHitsToMeta(ctx context.Context, dcache *diskCache, bucket, object string, size int64, eTag string, rs *HTTPRangeSpec) error {
metadata := map[string]string{"etag": eTag} metadata := map[string]string{"etag": eTag}
return dcache.SaveMetadata(ctx, bucket, object, metadata, size, rs, "", true) return dcache.SaveMetadata(ctx, bucket, object, metadata, size, rs, "", true, false)
} }
// Backend metadata could have changed through server side copy - reset cache metadata if that is the case // Backend metadata could have changed through server side copy - reset cache metadata if that is the case
@ -159,7 +159,7 @@ func (c *cacheObjects) updateMetadataIfChanged(ctx context.Context, dcache *disk
bkObjectInfo.ETag != cacheObjInfo.ETag || bkObjectInfo.ETag != cacheObjInfo.ETag ||
bkObjectInfo.ContentType != cacheObjInfo.ContentType || bkObjectInfo.ContentType != cacheObjInfo.ContentType ||
!bkObjectInfo.Expires.Equal(cacheObjInfo.Expires) { !bkObjectInfo.Expires.Equal(cacheObjInfo.Expires) {
return dcache.SaveMetadata(ctx, bucket, object, getMetadata(bkObjectInfo), bkObjectInfo.Size, nil, "", false) return dcache.SaveMetadata(ctx, bucket, object, getMetadata(bkObjectInfo), bkObjectInfo.Size, nil, "", false, false)
} }
return c.incHitsToMeta(ctx, dcache, bucket, object, cacheObjInfo.Size, cacheObjInfo.ETag, rs) return c.incHitsToMeta(ctx, dcache, bucket, object, cacheObjInfo.Size, cacheObjInfo.ETag, rs)
} }
@ -277,7 +277,7 @@ func (c *cacheObjects) GetObjectNInfo(ctx context.Context, bucket, object string
return bReader, err return bReader, err
} }
// serve cached content without ETag verification if writeback commit is not yet complete // serve cached content without ETag verification if writeback commit is not yet complete
if skipETagVerification(cacheReader.ObjInfo.UserDefined) { if writebackInProgress(cacheReader.ObjInfo.UserDefined) {
return cacheReader, nil return cacheReader, nil
} }
} }
@ -427,7 +427,7 @@ func (c *cacheObjects) GetObjectInfo(ctx context.Context, bucket, object string,
return cachedObjInfo, nil return cachedObjInfo, nil
} }
// serve cache metadata without ETag verification if writeback commit is not yet complete // serve cache metadata without ETag verification if writeback commit is not yet complete
if skipETagVerification(cachedObjInfo.UserDefined) { if writebackInProgress(cachedObjInfo.UserDefined) {
return cachedObjInfo, nil return cachedObjInfo, nil
} }
} }
@ -794,6 +794,7 @@ func (c *cacheObjects) uploadObject(ctx context.Context, oi ObjectInfo) {
opts.UserDefined[xhttp.ContentMD5] = oi.UserDefined["content-md5"] opts.UserDefined[xhttp.ContentMD5] = oi.UserDefined["content-md5"]
objInfo, err := c.InnerPutObjectFn(ctx, oi.Bucket, oi.Name, NewPutObjReader(hashReader), opts) objInfo, err := c.InnerPutObjectFn(ctx, oi.Bucket, oi.Name, NewPutObjReader(hashReader), opts)
wbCommitStatus := CommitComplete wbCommitStatus := CommitComplete
size := objInfo.Size
if err != nil { if err != nil {
wbCommitStatus = CommitFailed wbCommitStatus = CommitFailed
} }
@ -804,12 +805,13 @@ func (c *cacheObjects) uploadObject(ctx context.Context, oi ObjectInfo) {
retryCnt, _ = strconv.Atoi(meta[writeBackRetryHeader]) retryCnt, _ = strconv.Atoi(meta[writeBackRetryHeader])
retryCnt++ retryCnt++
meta[writeBackRetryHeader] = strconv.Itoa(retryCnt) meta[writeBackRetryHeader] = strconv.Itoa(retryCnt)
size = cReader.ObjInfo.Size
} else { } else {
delete(meta, writeBackRetryHeader) delete(meta, writeBackRetryHeader)
} }
meta[writeBackStatusHeader] = wbCommitStatus.String() meta[writeBackStatusHeader] = wbCommitStatus.String()
meta["etag"] = oi.ETag meta["etag"] = oi.ETag
dcache.SaveMetadata(ctx, oi.Bucket, oi.Name, meta, objInfo.Size, nil, "", false) dcache.SaveMetadata(ctx, oi.Bucket, oi.Name, meta, size, nil, "", false, wbCommitStatus == CommitComplete)
if retryCnt > 0 { if retryCnt > 0 {
// slow down retries // slow down retries
time.Sleep(time.Second * time.Duration(retryCnt%10+1)) time.Sleep(time.Second * time.Duration(retryCnt%10+1))

View File

@ -25,7 +25,7 @@ import (
// Tests ToObjectInfo function. // Tests ToObjectInfo function.
func TestCacheMetadataObjInfo(t *testing.T) { func TestCacheMetadataObjInfo(t *testing.T) {
m := cacheMeta{Meta: nil} m := cacheMeta{Meta: nil}
objInfo := m.ToObjectInfo("testbucket", "testobject") objInfo := m.ToObjectInfo()
if objInfo.Size != 0 { if objInfo.Size != 0 {
t.Fatal("Unexpected object info value for Size", objInfo.Size) t.Fatal("Unexpected object info value for Size", objInfo.Size)
} }