mirror of
https://github.com/minio/minio.git
synced 2025-11-07 12:52:58 -05:00
fix: heal only once per disk per set among multiple disks (#16358)
This commit is contained in:
@@ -79,6 +79,10 @@ type healingTracker struct {
|
||||
|
||||
// Filled during heal.
|
||||
HealedBuckets []string
|
||||
|
||||
// ID of the current healing operation
|
||||
HealID string
|
||||
|
||||
// Add future tracking capabilities
|
||||
// Be sure that they are included in toHealingDisk
|
||||
}
|
||||
@@ -112,11 +116,12 @@ func loadHealingTracker(ctx context.Context, disk StorageAPI) (*healingTracker,
|
||||
}
|
||||
|
||||
// newHealingTracker will create a new healing tracker for the disk.
|
||||
func newHealingTracker(disk StorageAPI) *healingTracker {
|
||||
func newHealingTracker(disk StorageAPI, healID string) *healingTracker {
|
||||
diskID, _ := disk.GetDiskID()
|
||||
h := healingTracker{
|
||||
disk: disk,
|
||||
ID: diskID,
|
||||
HealID: healID,
|
||||
Path: disk.String(),
|
||||
Endpoint: disk.Endpoint().String(),
|
||||
Started: time.Now().UTC(),
|
||||
@@ -227,6 +232,7 @@ func (h *healingTracker) printTo(writer io.Writer) {
|
||||
func (h *healingTracker) toHealingDisk() madmin.HealingDisk {
|
||||
return madmin.HealingDisk{
|
||||
ID: h.ID,
|
||||
HealID: h.HealID,
|
||||
Endpoint: h.Endpoint,
|
||||
PoolIndex: h.PoolIndex,
|
||||
SetIndex: h.SetIndex,
|
||||
@@ -286,8 +292,6 @@ func getLocalDisksToHeal() (disksToHeal Endpoints) {
|
||||
var newDiskHealingTimeout = newDynamicTimeout(30*time.Second, 10*time.Second)
|
||||
|
||||
func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint) error {
|
||||
logger.Info(fmt.Sprintf("Proceeding to heal '%s' - 'mc admin heal alias/ --verbose' to check the status.", endpoint))
|
||||
|
||||
disk, format, err := connectEndpoint(endpoint)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error: %w, %s", err, endpoint)
|
||||
@@ -318,6 +322,20 @@ func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint
|
||||
ctx = lkctx.Context()
|
||||
defer locker.Unlock(lkctx)
|
||||
|
||||
// Load healing tracker in this disk
|
||||
tracker, err := loadHealingTracker(ctx, disk)
|
||||
if err != nil {
|
||||
// A healing track can be not found when another disk in the same
|
||||
// erasure set and same healing-id successfully finished healing.
|
||||
if err == errFileNotFound {
|
||||
return nil
|
||||
}
|
||||
logger.LogIf(ctx, fmt.Errorf("Unable to load a healing tracker on '%s': %w", disk, err))
|
||||
tracker = newHealingTracker(disk, mustGetUUID())
|
||||
}
|
||||
|
||||
logger.Info(fmt.Sprintf("Proceeding to heal '%s' - 'mc admin heal alias/ --verbose' to check the status.", endpoint))
|
||||
|
||||
buckets, _ := z.ListBuckets(ctx, BucketOptions{})
|
||||
// Buckets data are dispersed in multiple zones/sets, make
|
||||
// sure to heal all bucket metadata configuration.
|
||||
@@ -340,15 +358,6 @@ func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint
|
||||
logger.Info("Healing drive '%v' on %s pool", disk, humanize.Ordinal(poolIdx+1))
|
||||
}
|
||||
|
||||
// Load healing tracker in this disk
|
||||
tracker, err := loadHealingTracker(ctx, disk)
|
||||
if err != nil {
|
||||
// So someone changed the drives underneath, healing tracker missing.
|
||||
logger.LogIf(ctx, fmt.Errorf("Healing tracker missing on '%s', drive was swapped again on %s pool: %w",
|
||||
disk, humanize.Ordinal(poolIdx+1), err))
|
||||
tracker = newHealingTracker(disk)
|
||||
}
|
||||
|
||||
// Load bucket totals
|
||||
cache := dataUsageCache{}
|
||||
if err := cache.load(ctx, z.serverPools[poolIdx].sets[setIdx], dataUsageCacheName); err == nil {
|
||||
@@ -379,7 +388,24 @@ func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint
|
||||
logger.Info("\n")
|
||||
}
|
||||
|
||||
logger.LogIf(ctx, tracker.delete(ctx))
|
||||
if tracker.HealID == "" { // HealID is empty only before Feb 2023
|
||||
logger.LogIf(ctx, tracker.delete(ctx))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Remove .healing.bin from all disks with similar heal-id
|
||||
for _, disk := range z.serverPools[poolIdx].sets[setIdx].getDisks() {
|
||||
t, err := loadHealingTracker(ctx, disk)
|
||||
if err != nil {
|
||||
if err != errFileNotFound {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if t.HealID == tracker.HealID {
|
||||
t.delete(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user