fix: retry listing in decommissioning if it fails perpetually (#17682)

This commit is contained in:
Harshavardhana 2023-07-19 13:09:37 -07:00 committed by GitHub
parent c0a5bdaed9
commit bddd53d6d2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -696,7 +696,9 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool
// Check if bucket is object locked.
lr, _ := globalBucketObjectLockSys.Get(bi.Name)
for _, set := range pool.sets {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
for setIdx, set := range pool.sets {
set := set
filterLifecycle := func(bucket, object string, fi FileInfo) bool {
@ -895,16 +897,26 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool
}
wk.Take()
go func() {
go func(setIdx int) {
defer wk.Give()
err := set.listObjectsToDecommission(ctx, bi,
func(entry metaCacheEntry) {
wk.Take()
go decommissionEntry(entry)
},
)
logger.LogIf(ctx, err)
}()
// We will perpetually retry listing if it fails, since we cannot
// possibly give up in this matter
for {
err := set.listObjectsToDecommission(ctx, bi,
func(entry metaCacheEntry) {
wk.Take()
go decommissionEntry(entry)
},
)
if err == nil {
break
}
setN := humanize.Ordinal(setIdx + 1)
retryDur := time.Duration(r.Float64() * float64(5*time.Second))
logger.LogOnceIf(ctx, fmt.Errorf("listing objects from %s set failed with %v, retrying in %v", setN, err, retryDur), "decom-listing-failed"+setN)
time.Sleep(retryDur)
}
}(setIdx)
}
wk.Wait()
return nil