mirror of
https://github.com/minio/minio.git
synced 2024-12-24 22:25:54 -05:00
retry disk replacement healing if listing fails (#13689)
listing can fail and it is allowed to be retried, instead of returning right away return an error at the end - heal the rest of the buckets and objects, and when we are retrying skip the buckets that are already marked done by using the tracked buckets. fixes #12972
This commit is contained in:
parent
81d19156e9
commit
17fd71164c
@ -18,12 +18,12 @@
|
|||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"os"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@ -412,7 +412,8 @@ func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerPools, bgSeq
|
|||||||
// So someone changed the drives underneath, healing tracker missing.
|
// So someone changed the drives underneath, healing tracker missing.
|
||||||
tracker, err := loadHealingTracker(ctx, disk)
|
tracker, err := loadHealingTracker(ctx, disk)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Info("Healing tracker missing on '%s', disk was swapped again on %s pool", disk, humanize.Ordinal(i+1))
|
logger.Info("Healing tracker missing on '%s', disk was swapped again on %s pool",
|
||||||
|
disk, humanize.Ordinal(i+1))
|
||||||
tracker = newHealingTracker(disk)
|
tracker = newHealingTracker(disk)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -434,16 +435,15 @@ func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerPools, bgSeq
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
err = z.serverPools[i].sets[setIndex].healErasureSet(ctx, buckets, tracker)
|
err = z.serverPools[i].sets[setIndex].healErasureSet(ctx, tracker.QueuedBuckets, tracker)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.LogIf(ctx, err)
|
logger.LogIf(ctx, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.Info("Healing disk '%s' on %s pool complete", disk, humanize.Ordinal(i+1))
|
logger.Info("Healing disk '%s' on %s pool complete", disk, humanize.Ordinal(i+1))
|
||||||
var buf bytes.Buffer
|
logger.Info("Summary:\n")
|
||||||
tracker.printTo(&buf)
|
tracker.printTo(os.Stdout)
|
||||||
logger.Info("Summary:\n%s", buf.String())
|
|
||||||
logger.LogIf(ctx, tracker.delete(ctx))
|
logger.LogIf(ctx, tracker.delete(ctx))
|
||||||
|
|
||||||
// Only upon success pop the healed disk.
|
// Only upon success pop the healed disk.
|
||||||
|
@ -23,6 +23,7 @@ import (
|
|||||||
"sort"
|
"sort"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/dustin/go-humanize"
|
||||||
"github.com/minio/madmin-go"
|
"github.com/minio/madmin-go"
|
||||||
"github.com/minio/minio/internal/color"
|
"github.com/minio/minio/internal/color"
|
||||||
"github.com/minio/minio/internal/config/storageclass"
|
"github.com/minio/minio/internal/config/storageclass"
|
||||||
@ -163,23 +164,20 @@ func mustGetHealSequence(ctx context.Context) *healSequence {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// healErasureSet lists and heals all objects in a specific erasure set
|
// healErasureSet lists and heals all objects in a specific erasure set
|
||||||
func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketInfo, tracker *healingTracker) error {
|
func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string, tracker *healingTracker) error {
|
||||||
bgSeq := mustGetHealSequence(ctx)
|
bgSeq := mustGetHealSequence(ctx)
|
||||||
buckets = append(buckets, BucketInfo{
|
|
||||||
Name: pathJoin(minioMetaBucket, minioConfigPrefix),
|
|
||||||
})
|
|
||||||
|
|
||||||
scanMode := globalHealConfig.ScanMode()
|
scanMode := globalHealConfig.ScanMode()
|
||||||
|
|
||||||
|
var retErr error
|
||||||
// Heal all buckets with all objects
|
// Heal all buckets with all objects
|
||||||
for _, bucket := range buckets {
|
for _, bucket := range buckets {
|
||||||
if tracker.isHealed(bucket.Name) {
|
if tracker.isHealed(bucket) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
var forwardTo string
|
var forwardTo string
|
||||||
// If we resume to the same bucket, forward to last known item.
|
// If we resume to the same bucket, forward to last known item.
|
||||||
if tracker.Bucket != "" {
|
if tracker.Bucket != "" {
|
||||||
if tracker.Bucket == bucket.Name {
|
if tracker.Bucket == bucket {
|
||||||
forwardTo = tracker.Object
|
forwardTo = tracker.Object
|
||||||
} else {
|
} else {
|
||||||
// Reset to where last bucket ended if resuming.
|
// Reset to where last bucket ended if resuming.
|
||||||
@ -187,16 +185,18 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
tracker.Object = ""
|
tracker.Object = ""
|
||||||
tracker.Bucket = bucket.Name
|
tracker.Bucket = bucket
|
||||||
// Heal current bucket
|
// Heal current bucket
|
||||||
if _, err := er.HealBucket(ctx, bucket.Name, madmin.HealOpts{
|
if _, err := er.HealBucket(ctx, bucket, madmin.HealOpts{
|
||||||
ScanMode: scanMode,
|
ScanMode: scanMode,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
logger.LogIf(ctx, err)
|
logger.LogIf(ctx, err)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if serverDebugLog {
|
if serverDebugLog {
|
||||||
console.Debugf(color.Green("healDisk:")+" healing bucket %s content on erasure set %d\n", bucket.Name, tracker.SetIndex+1)
|
console.Debugf(color.Green("healDisk:")+" healing bucket %s content on %s erasure set\n",
|
||||||
|
bucket, humanize.Ordinal(tracker.SetIndex+1))
|
||||||
}
|
}
|
||||||
|
|
||||||
disks, _ := er.getOnlineDisksWithHealing()
|
disks, _ := er.getOnlineDisksWithHealing()
|
||||||
@ -204,7 +204,7 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
|||||||
// all disks are healing in this set, this is allowed
|
// all disks are healing in this set, this is allowed
|
||||||
// so we simply proceed to next bucket, marking the bucket
|
// so we simply proceed to next bucket, marking the bucket
|
||||||
// as done as there are no objects to heal.
|
// as done as there are no objects to heal.
|
||||||
tracker.bucketDone(bucket.Name)
|
tracker.bucketDone(bucket)
|
||||||
logger.LogIf(ctx, tracker.update(ctx))
|
logger.LogIf(ctx, tracker.update(ctx))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -221,7 +221,7 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
|||||||
// We might land at .metacache, .trash, .multipart
|
// We might land at .metacache, .trash, .multipart
|
||||||
// no need to heal them skip, only when bucket
|
// no need to heal them skip, only when bucket
|
||||||
// is '.minio.sys'
|
// is '.minio.sys'
|
||||||
if bucket.Name == minioMetaBucket {
|
if bucket == minioMetaBucket {
|
||||||
if wildcard.Match("buckets/*/.metacache/*", entry.name) {
|
if wildcard.Match("buckets/*/.metacache/*", entry.name) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -233,19 +233,25 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fivs, err := entry.fileInfoVersions(bucket.Name)
|
fivs, err := entry.fileInfoVersions(bucket)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err := bgSeq.queueHealTask(healSource{
|
err := bgSeq.queueHealTask(healSource{
|
||||||
bucket: bucket.Name,
|
bucket: bucket,
|
||||||
object: entry.name,
|
object: entry.name,
|
||||||
versionID: "",
|
versionID: "",
|
||||||
}, madmin.HealItemObject)
|
}, madmin.HealItemObject)
|
||||||
|
if err != nil {
|
||||||
|
tracker.ItemsFailed++
|
||||||
logger.LogIf(ctx, err)
|
logger.LogIf(ctx, err)
|
||||||
|
} else {
|
||||||
|
tracker.ItemsHealed++
|
||||||
|
}
|
||||||
|
bgSeq.logHeal(madmin.HealItemObject)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, version := range fivs.Versions {
|
for _, version := range fivs.Versions {
|
||||||
if _, err := er.HealObject(ctx, bucket.Name, version.Name,
|
if _, err := er.HealObject(ctx, bucket, version.Name,
|
||||||
version.VersionID, madmin.HealOpts{
|
version.VersionID, madmin.HealOpts{
|
||||||
ScanMode: scanMode,
|
ScanMode: scanMode,
|
||||||
Remove: healDeleteDangling,
|
Remove: healDeleteDangling,
|
||||||
@ -273,12 +279,12 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
|||||||
resolver := metadataResolutionParams{
|
resolver := metadataResolutionParams{
|
||||||
dirQuorum: 1,
|
dirQuorum: 1,
|
||||||
objQuorum: 1,
|
objQuorum: 1,
|
||||||
bucket: bucket.Name,
|
bucket: bucket,
|
||||||
}
|
}
|
||||||
|
|
||||||
err := listPathRaw(ctx, listPathRawOptions{
|
err := listPathRaw(ctx, listPathRawOptions{
|
||||||
disks: disks,
|
disks: disks,
|
||||||
bucket: bucket.Name,
|
bucket: bucket,
|
||||||
recursive: true,
|
recursive: true,
|
||||||
forwardTo: forwardTo,
|
forwardTo: forwardTo,
|
||||||
minDisks: 1,
|
minDisks: 1,
|
||||||
@ -297,8 +303,12 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
|||||||
})
|
})
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// Set this such that when we return this function
|
||||||
|
// we let the caller retry this disk again for the
|
||||||
|
// buckets it failed to list.
|
||||||
|
retErr = err
|
||||||
logger.LogIf(ctx, err)
|
logger.LogIf(ctx, err)
|
||||||
return err
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
select {
|
select {
|
||||||
@ -306,15 +316,14 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return ctx.Err()
|
return ctx.Err()
|
||||||
default:
|
default:
|
||||||
logger.LogIf(ctx, err)
|
tracker.bucketDone(bucket)
|
||||||
tracker.bucketDone(bucket.Name)
|
|
||||||
logger.LogIf(ctx, tracker.update(ctx))
|
logger.LogIf(ctx, tracker.update(ctx))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tracker.Object = ""
|
tracker.Object = ""
|
||||||
tracker.Bucket = ""
|
tracker.Bucket = ""
|
||||||
|
|
||||||
return nil
|
return retErr
|
||||||
}
|
}
|
||||||
|
|
||||||
// healObject heals given object path in deep to fix bitrot.
|
// healObject heals given object path in deep to fix bitrot.
|
||||||
|
Loading…
Reference in New Issue
Block a user