From 9af6c6ceeff5d63dc90c8e0fe356c4d91f8e6795 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Wed, 21 Jun 2023 13:23:20 -0700 Subject: [PATCH] under rebalance look for expired versions v/s remaining versions (#17482) A continuation of PR #17479 for rebalance behavior must also match the decommission behavior. Fixes bug where rebalance would ignore rebalancing object versions after one of the version returned "ObjectNotFound" --- cmd/erasure-server-pool-decom.go | 21 +++++++++------------ cmd/erasure-server-pool-rebalance.go | 26 +++++++++++++++++--------- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/cmd/erasure-server-pool-decom.go b/cmd/erasure-server-pool-decom.go index 7da089ad4..1b0ae30cc 100644 --- a/cmd/erasure-server-pool-decom.go +++ b/cmd/erasure-server-pool-decom.go @@ -722,20 +722,21 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool // to create the appropriate stack. versionsSorter(fivs.Versions).reverse() - var decommissionedCount int + var decommissioned, expired int for _, version := range fivs.Versions { // Apply lifecycle rules on the objects that are expired. if filterLifecycle(bi.Name, version.Name, version) { - decommissionedCount++ + expired++ + decommissioned++ continue } // any object with only single DEL marker we don't need // to decommission, just skip it, this also includes // any other versions that have already expired. - remainingVersions := len(fivs.Versions) - decommissionedCount + remainingVersions := len(fivs.Versions) - expired if version.Deleted && remainingVersions == 1 { - decommissionedCount++ + decommissioned++ continue } @@ -760,11 +761,7 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool SkipDecommissioned: true, // make sure we skip the decommissioned pool }) var failure bool - if err != nil { - if isErrObjectNotFound(err) || isErrVersionNotFound(err) { - // object deleted by the application, nothing to do here we move on. - continue - } + if err != nil && !isErrObjectNotFound(err) && !isErrVersionNotFound(err) { logger.LogIf(ctx, err) failure = true } @@ -773,7 +770,7 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool z.poolMetaMutex.Unlock() if !failure { // Success keep a count. - decommissionedCount++ + decommissioned++ } continue } @@ -848,11 +845,11 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool if failure { break // break out on first error } - decommissionedCount++ + decommissioned++ } // if all versions were decommissioned, then we can delete the object versions. - if decommissionedCount == len(fivs.Versions) { + if decommissioned == len(fivs.Versions) { stopFn := globalDecommissionMetrics.log(decomMetricDecommissionRemoveObject, idx, bi.Name, entry.name) _, err := set.DeleteObject(ctx, bi.Name, diff --git a/cmd/erasure-server-pool-rebalance.go b/cmd/erasure-server-pool-rebalance.go index 564ee5bcf..15a5de530 100644 --- a/cmd/erasure-server-pool-rebalance.go +++ b/cmd/erasure-server-pool-rebalance.go @@ -500,7 +500,7 @@ func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string, // to create the appropriate stack. versionsSorter(fivs.Versions).reverse() - var rebalanced int + var rebalanced, expired int for _, version := range fivs.Versions { // Skip transitioned objects for now. TBD if version.IsRemote() { @@ -510,12 +510,15 @@ func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string, // Apply lifecycle rules on the objects that are expired. if filterLifecycle(bucket, version.Name, version) { rebalanced++ + expired++ continue } - // Empty delete markers we can assume that they can be purged - // as there is no more data associated with them. - if version.Deleted && len(fivs.Versions) == 1 { + // any object with only single DEL marker we don't need + // to rebalance, just skip it, this also includes + // any other versions that have already expired. + remainingVersions := len(fivs.Versions) - expired + if version.Deleted && remainingVersions == 1 { rebalanced++ continue } @@ -550,9 +553,10 @@ func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string, continue } - var failure bool + var failure, ignore bool for try := 0; try < 3; try++ { // GetObjectReader.Close is called by rebalanceObject + stopFn := globalRebalanceMetrics.log(rebalanceMetricRebalanceObject, poolIdx, bucket, version.Name, version.VersionID) gr, err := set.GetObjectNInfo(ctx, bucket, encodeDirObject(version.Name), @@ -565,19 +569,21 @@ func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string, }) if isErrObjectNotFound(err) || isErrVersionNotFound(err) { // object deleted by the application, nothing to do here we move on. - return + ignore = true + stopFn(nil) + break } if err != nil { failure = true logger.LogIf(ctx, err) + stopFn(err) continue } - stopFn := globalRebalanceMetrics.log(rebalanceMetricRebalanceObject, poolIdx, bucket, version.Name) if err = z.rebalanceObject(ctx, bucket, gr); err != nil { - stopFn(err) failure = true logger.LogIf(ctx, err) + stopFn(err) continue } @@ -585,7 +591,9 @@ func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string, failure = false break } - + if ignore { + continue + } if failure { break // break out on first error }