From 2ac54e5a7bf864983fa0106ab470ded2c32dbfd5 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 22 Mar 2022 12:39:45 -0700 Subject: [PATCH] ListObjects: Filter lifecycle expired objects (#14606) For ListObjects and ListObjectsV2 perform lifecycle checks on all objects before returning. This will filter out objects that are pending lifecycle expiration. Bonus: Cheaper server pool conflict resolution by not converting to FileInfo. --- cmd/erasure-server-pool.go | 17 +++++++++++ cmd/metacache-entries.go | 2 +- cmd/metacache-server-pool.go | 58 ++++++++++++++++++++++++++++++++---- cmd/metacache-set.go | 6 ++++ 4 files changed, 76 insertions(+), 7 deletions(-) diff --git a/cmd/erasure-server-pool.go b/cmd/erasure-server-pool.go index e427dc860..b13ba2bbb 100644 --- a/cmd/erasure-server-pool.go +++ b/cmd/erasure-server-pool.go @@ -33,6 +33,7 @@ import ( "github.com/minio/madmin-go" "github.com/minio/minio-go/v7/pkg/set" "github.com/minio/minio-go/v7/pkg/tags" + "github.com/minio/minio/internal/bucket/lifecycle" "github.com/minio/minio/internal/config/storageclass" "github.com/minio/minio/internal/logger" "github.com/minio/minio/internal/sync/errgroup" @@ -1152,6 +1153,14 @@ func maxKeysPlusOne(maxKeys int, addOne bool) int { func (z *erasureServerPools) ListObjects(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) { var loi ListObjectsInfo + // Automatically remove the object/version is an expiry lifecycle rule can be applied + lc, _ := globalLifecycleSys.Get(bucket) + if lc != nil { + if !lc.HasActiveRules(prefix, true) { + lc = nil + } + } + if len(prefix) > 0 && maxKeys == 1 && delimiter == "" && marker == "" { // Optimization for certain applications like // - Cohesity @@ -1162,6 +1171,13 @@ func (z *erasureServerPools) ListObjects(ctx context.Context, bucket, prefix, ma // to avoid the need for ListObjects(). objInfo, err := z.GetObjectInfo(ctx, bucket, prefix, ObjectOptions{NoLock: true}) if err == nil { + if lc != nil { + action := evalActionFromLifecycle(ctx, *lc, objInfo, false) + switch action { + case lifecycle.DeleteVersionAction, lifecycle.DeleteAction: + return loi, nil + } + } loi.Objects = append(loi.Objects, objInfo) return loi, nil } @@ -1176,6 +1192,7 @@ func (z *erasureServerPools) ListObjects(ctx context.Context, bucket, prefix, ma InclDeleted: false, AskDisks: globalAPIConfig.getListQuorum(), } + merged, err := z.listPath(ctx, &opts) if err != nil && err != io.EOF { if !isErrBucketNotFound(err) { diff --git a/cmd/metacache-entries.go b/cmd/metacache-entries.go index 30cd0fef0..f0f26534b 100644 --- a/cmd/metacache-entries.go +++ b/cmd/metacache-entries.go @@ -498,7 +498,7 @@ func (m *metaCacheEntriesSorted) fileInfoVersions(bucket, prefix, delimiter, aft return versions } -// fileInfoVersions converts the metadata to FileInfoVersions where possible. +// fileInfos converts the metadata to ObjectInfo where possible. // Metadata that cannot be decoded is skipped. func (m *metaCacheEntriesSorted) fileInfos(bucket, prefix, delimiter string) (objects []ObjectInfo) { objects = make([]ObjectInfo, 0, m.len()) diff --git a/cmd/metacache-server-pool.go b/cmd/metacache-server-pool.go index 7ff4db7ec..7d350630b 100644 --- a/cmd/metacache-server-pool.go +++ b/cmd/metacache-server-pool.go @@ -28,6 +28,7 @@ import ( "sync" "time" + "github.com/minio/minio/internal/bucket/lifecycle" "github.com/minio/minio/internal/logger" ) @@ -289,6 +290,14 @@ func (z *erasureServerPools) listMerged(ctx context.Context, o listPathOptions, } mu.Unlock() + // Do lifecycle filtering. + if o.lcFilter != nil { + filterIn := make(chan metaCacheEntry, 10) + go filterLifeCycle(ctx, o.Bucket, o.lcFilter, filterIn, results) + // Replace results. + results = filterIn + } + // Gather results to a single channel. err := mergeEntryChannels(ctx, inputs, results, func(existing, other *metaCacheEntry) (replace bool) { // Pick object over directory @@ -298,21 +307,20 @@ func (z *erasureServerPools) listMerged(ctx context.Context, o listPathOptions, if !existing.isDir() && other.isDir() { return false } - - eFIV, err := existing.fileInfo(o.Bucket) + eMeta, err := existing.xlmeta() if err != nil { return true } - oFIV, err := other.fileInfo(o.Bucket) + oMeta, err := other.xlmeta() if err != nil { return false } // Replace if modtime is newer - if !oFIV.ModTime.Equal(eFIV.ModTime) { - return oFIV.ModTime.After(eFIV.ModTime) + if !oMeta.latestModtime().Equal(oMeta.latestModtime()) { + return oMeta.latestModtime().After(eMeta.latestModtime()) } // Use NumVersions as a final tiebreaker. - return oFIV.NumVersions > eFIV.NumVersions + return len(oMeta.versions) > len(eMeta.versions) }) cancelList() @@ -346,6 +354,44 @@ func (z *erasureServerPools) listMerged(ctx context.Context, o listPathOptions, return nil } +// filterLifeCycle will filter out objects if the most recent +// version should be deleted by lifecycle. +// out will be closed when there are no more results. +// When 'in' is closed or the context is canceled the +// function closes 'out' and exits. +func filterLifeCycle(ctx context.Context, bucket string, lc *lifecycle.Lifecycle, in <-chan metaCacheEntry, out chan<- metaCacheEntry) { + defer close(out) + for { + var obj metaCacheEntry + var ok bool + select { + case <-ctx.Done(): + return + case obj, ok = <-in: + if !ok { + return + } + } + + fi, err := obj.fileInfo(bucket) + if err != nil { + continue + } + objInfo := fi.ToObjectInfo(bucket, obj.name) + action := evalActionFromLifecycle(ctx, *lc, objInfo, false) + switch action { + case lifecycle.DeleteVersionAction, lifecycle.DeleteAction: + // Skip this entry. + continue + } + select { + case <-ctx.Done(): + return + case out <- obj: + } + } +} + func (z *erasureServerPools) listAndSave(ctx context.Context, o *listPathOptions) (entries metaCacheEntriesSorted, err error) { // Use ID as the object name... o.pool = z.getAvailablePoolIdx(ctx, minioMetaBucket, o.ID, 10<<20) diff --git a/cmd/metacache-set.go b/cmd/metacache-set.go index addaf6460..3639c1888 100644 --- a/cmd/metacache-set.go +++ b/cmd/metacache-set.go @@ -32,6 +32,7 @@ import ( "time" jsoniter "github.com/json-iterator/go" + "github.com/minio/minio/internal/bucket/lifecycle" "github.com/minio/minio/internal/color" "github.com/minio/minio/internal/hash" "github.com/minio/minio/internal/logger" @@ -96,6 +97,11 @@ type listPathOptions struct { // pool and set of where the cache is located. pool, set int + + // lcFilter performs filtering based on lifecycle. + // This will filter out objects if the most recent version should be deleted by lifecycle. + // Is not transferred across request calls. + lcFilter *lifecycle.Lifecycle } func init() {