mirror of
https://github.com/minio/minio.git
synced 2025-01-22 20:23:14 -05:00
retry disk replacement healing if listing fails (#13689)
listing can fail and it is allowed to be retried, instead of returning right away return an error at the end - heal the rest of the buckets and objects, and when we are retrying skip the buckets that are already marked done by using the tracked buckets. fixes #12972
This commit is contained in:
parent
81d19156e9
commit
17fd71164c
@ -18,12 +18,12 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
@ -412,7 +412,8 @@ func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerPools, bgSeq
|
||||
// So someone changed the drives underneath, healing tracker missing.
|
||||
tracker, err := loadHealingTracker(ctx, disk)
|
||||
if err != nil {
|
||||
logger.Info("Healing tracker missing on '%s', disk was swapped again on %s pool", disk, humanize.Ordinal(i+1))
|
||||
logger.Info("Healing tracker missing on '%s', disk was swapped again on %s pool",
|
||||
disk, humanize.Ordinal(i+1))
|
||||
tracker = newHealingTracker(disk)
|
||||
}
|
||||
|
||||
@ -434,16 +435,15 @@ func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerPools, bgSeq
|
||||
return
|
||||
}
|
||||
|
||||
err = z.serverPools[i].sets[setIndex].healErasureSet(ctx, buckets, tracker)
|
||||
err = z.serverPools[i].sets[setIndex].healErasureSet(ctx, tracker.QueuedBuckets, tracker)
|
||||
if err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
continue
|
||||
}
|
||||
|
||||
logger.Info("Healing disk '%s' on %s pool complete", disk, humanize.Ordinal(i+1))
|
||||
var buf bytes.Buffer
|
||||
tracker.printTo(&buf)
|
||||
logger.Info("Summary:\n%s", buf.String())
|
||||
logger.Info("Summary:\n")
|
||||
tracker.printTo(os.Stdout)
|
||||
logger.LogIf(ctx, tracker.delete(ctx))
|
||||
|
||||
// Only upon success pop the healed disk.
|
||||
|
@ -23,6 +23,7 @@ import (
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/minio/madmin-go"
|
||||
"github.com/minio/minio/internal/color"
|
||||
"github.com/minio/minio/internal/config/storageclass"
|
||||
@ -163,23 +164,20 @@ func mustGetHealSequence(ctx context.Context) *healSequence {
|
||||
}
|
||||
|
||||
// healErasureSet lists and heals all objects in a specific erasure set
|
||||
func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketInfo, tracker *healingTracker) error {
|
||||
func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string, tracker *healingTracker) error {
|
||||
bgSeq := mustGetHealSequence(ctx)
|
||||
buckets = append(buckets, BucketInfo{
|
||||
Name: pathJoin(minioMetaBucket, minioConfigPrefix),
|
||||
})
|
||||
|
||||
scanMode := globalHealConfig.ScanMode()
|
||||
|
||||
var retErr error
|
||||
// Heal all buckets with all objects
|
||||
for _, bucket := range buckets {
|
||||
if tracker.isHealed(bucket.Name) {
|
||||
if tracker.isHealed(bucket) {
|
||||
continue
|
||||
}
|
||||
var forwardTo string
|
||||
// If we resume to the same bucket, forward to last known item.
|
||||
if tracker.Bucket != "" {
|
||||
if tracker.Bucket == bucket.Name {
|
||||
if tracker.Bucket == bucket {
|
||||
forwardTo = tracker.Object
|
||||
} else {
|
||||
// Reset to where last bucket ended if resuming.
|
||||
@ -187,16 +185,18 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
||||
}
|
||||
}
|
||||
tracker.Object = ""
|
||||
tracker.Bucket = bucket.Name
|
||||
tracker.Bucket = bucket
|
||||
// Heal current bucket
|
||||
if _, err := er.HealBucket(ctx, bucket.Name, madmin.HealOpts{
|
||||
if _, err := er.HealBucket(ctx, bucket, madmin.HealOpts{
|
||||
ScanMode: scanMode,
|
||||
}); err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if serverDebugLog {
|
||||
console.Debugf(color.Green("healDisk:")+" healing bucket %s content on erasure set %d\n", bucket.Name, tracker.SetIndex+1)
|
||||
console.Debugf(color.Green("healDisk:")+" healing bucket %s content on %s erasure set\n",
|
||||
bucket, humanize.Ordinal(tracker.SetIndex+1))
|
||||
}
|
||||
|
||||
disks, _ := er.getOnlineDisksWithHealing()
|
||||
@ -204,7 +204,7 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
||||
// all disks are healing in this set, this is allowed
|
||||
// so we simply proceed to next bucket, marking the bucket
|
||||
// as done as there are no objects to heal.
|
||||
tracker.bucketDone(bucket.Name)
|
||||
tracker.bucketDone(bucket)
|
||||
logger.LogIf(ctx, tracker.update(ctx))
|
||||
continue
|
||||
}
|
||||
@ -221,7 +221,7 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
||||
// We might land at .metacache, .trash, .multipart
|
||||
// no need to heal them skip, only when bucket
|
||||
// is '.minio.sys'
|
||||
if bucket.Name == minioMetaBucket {
|
||||
if bucket == minioMetaBucket {
|
||||
if wildcard.Match("buckets/*/.metacache/*", entry.name) {
|
||||
return
|
||||
}
|
||||
@ -233,19 +233,25 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
||||
}
|
||||
}
|
||||
|
||||
fivs, err := entry.fileInfoVersions(bucket.Name)
|
||||
fivs, err := entry.fileInfoVersions(bucket)
|
||||
if err != nil {
|
||||
err := bgSeq.queueHealTask(healSource{
|
||||
bucket: bucket.Name,
|
||||
bucket: bucket,
|
||||
object: entry.name,
|
||||
versionID: "",
|
||||
}, madmin.HealItemObject)
|
||||
logger.LogIf(ctx, err)
|
||||
if err != nil {
|
||||
tracker.ItemsFailed++
|
||||
logger.LogIf(ctx, err)
|
||||
} else {
|
||||
tracker.ItemsHealed++
|
||||
}
|
||||
bgSeq.logHeal(madmin.HealItemObject)
|
||||
return
|
||||
}
|
||||
|
||||
for _, version := range fivs.Versions {
|
||||
if _, err := er.HealObject(ctx, bucket.Name, version.Name,
|
||||
if _, err := er.HealObject(ctx, bucket, version.Name,
|
||||
version.VersionID, madmin.HealOpts{
|
||||
ScanMode: scanMode,
|
||||
Remove: healDeleteDangling,
|
||||
@ -273,12 +279,12 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
||||
resolver := metadataResolutionParams{
|
||||
dirQuorum: 1,
|
||||
objQuorum: 1,
|
||||
bucket: bucket.Name,
|
||||
bucket: bucket,
|
||||
}
|
||||
|
||||
err := listPathRaw(ctx, listPathRawOptions{
|
||||
disks: disks,
|
||||
bucket: bucket.Name,
|
||||
bucket: bucket,
|
||||
recursive: true,
|
||||
forwardTo: forwardTo,
|
||||
minDisks: 1,
|
||||
@ -297,8 +303,12 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
// Set this such that when we return this function
|
||||
// we let the caller retry this disk again for the
|
||||
// buckets it failed to list.
|
||||
retErr = err
|
||||
logger.LogIf(ctx, err)
|
||||
return err
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
@ -306,15 +316,14 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
logger.LogIf(ctx, err)
|
||||
tracker.bucketDone(bucket.Name)
|
||||
tracker.bucketDone(bucket)
|
||||
logger.LogIf(ctx, tracker.update(ctx))
|
||||
}
|
||||
}
|
||||
tracker.Object = ""
|
||||
tracker.Bucket = ""
|
||||
|
||||
return nil
|
||||
return retErr
|
||||
}
|
||||
|
||||
// healObject heals given object path in deep to fix bitrot.
|
||||
|
Loading…
x
Reference in New Issue
Block a user