initialize the disk healer early on (#19143)

This PR fixes a bug that perhaps has been long introduced,
with no visible workarounds. In any deployment, if an entire
erasure set is deleted, there is no way the cluster recovers.
This commit is contained in:
Harshavardhana
2024-02-27 23:02:14 -08:00
committed by GitHub
parent 0aae0180fb
commit 9a012a53ef
15 changed files with 59 additions and 122 deletions

View File

@@ -224,7 +224,9 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
disks, _ := er.getOnlineDisksWithHealing(false)
if len(disks) == 0 {
logger.LogIf(ctx, fmt.Errorf("no online disks found to heal the bucket `%s`", bucket))
// No object healing necessary
tracker.bucketDone(bucket)
logger.LogIf(ctx, tracker.update(ctx))
continue
}
@@ -472,9 +474,7 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
func healBucket(bucket string, scan madmin.HealScanMode) error {
// Get background heal sequence to send elements to heal
globalHealStateLK.Lock()
bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
globalHealStateLK.Unlock()
if ok {
return bgSeq.queueHealTask(healSource{bucket: bucket}, madmin.HealItemBucket)
}
@@ -484,9 +484,7 @@ func healBucket(bucket string, scan madmin.HealScanMode) error {
// healObject sends the given object/version to the background healing workers
func healObject(bucket, object, versionID string, scan madmin.HealScanMode) error {
// Get background heal sequence to send elements to heal
globalHealStateLK.Lock()
bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
globalHealStateLK.Unlock()
if ok {
return bgSeq.queueHealTask(healSource{
bucket: bucket,