heal: Add MRF metrics to background heal API response (#12398)

This commit gathers MRF metrics from all nodes in a cluster and return it to the caller. This will show information about the number of objects in the MRF queues waiting to be healed.
2025-11-07 21:02:58 -05:00 · 2021-07-16 06:32:06 +01:00
parent ead8778305
commit b0b4696a64
12 changed files with 458 additions and 209 deletions
--- a/cmd/erasure-sets.go
+++ b/cmd/erasure-sets.go
@@ -96,8 +96,6 @@ type erasureSets struct {

 	disksStorageInfoCache timedValue

-	mrfMU                  sync.Mutex
-	mrfOperations          map[healSource]int
 	lastConnectDisksOpTime time.Time
 }

@@ -268,20 +266,11 @@ func (s *erasureSets) connectDisks() {
 	wg.Wait()

 	go func() {
-		idler := time.NewTimer(100 * time.Millisecond)
-		defer idler.Stop()
-
 		for setIndex, justConnected := range setsJustConnected {
 			if !justConnected {
 				continue
 			}
-
-			// Send a new set connect event with a timeout
-			idler.Reset(100 * time.Millisecond)
-			select {
-			case s.setReconnectEvent <- setIndex:
-			case <-idler.C:
-			}
+			globalMRFState.newSetReconnected(setIndex, s.poolIndex)
 		}
 	}()
 }
@@ -375,7 +364,6 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
 		setReconnectEvent:  make(chan int),
 		distributionAlgo:   format.Erasure.DistributionAlgo,
 		deploymentID:       uuid.MustParse(format.ID),
-		mrfOperations:      make(map[healSource]int),
 		poolIndex:          poolIdx,
 	}

@@ -446,7 +434,6 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
 			nsMutex:               mutex,
 			bp:                    bp,
 			bpOld:                 bpOld,
-			mrfOpCh:               make(chan partialOperation, 10000),
 		}
 	}

@@ -466,8 +453,6 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto

 	// Start the disk monitoring and connect routine.
 	go s.monitorAndConnectEndpoints(ctx, defaultMonitorConnectEndpointInterval)
-	go s.maintainMRFList()
-	go s.healMRFRoutine()

 	return s, nil
 }
@@ -1388,71 +1373,6 @@ func (s *erasureSets) GetObjectTags(ctx context.Context, bucket, object string,
 	return er.GetObjectTags(ctx, bucket, object, opts)
 }

-// maintainMRFList gathers the list of successful partial uploads
-// from all underlying er.sets and puts them in a global map which
-// should not have more than 10000 entries.
-func (s *erasureSets) maintainMRFList() {
-	var agg = make(chan partialOperation, 10000)
-	for i, er := range s.sets {
-		go func(c <-chan partialOperation, setIndex int) {
-			for msg := range c {
-				msg.failedSet = setIndex
-				select {
-				case agg <- msg:
-				default:
-				}
-			}
-		}(er.mrfOpCh, i)
-	}
-
-	for fOp := range agg {
-		s.mrfMU.Lock()
-		if len(s.mrfOperations) > 10000 {
-			s.mrfMU.Unlock()
-			continue
-		}
-		s.mrfOperations[healSource{
-			bucket:    fOp.bucket,
-			object:    fOp.object,
-			versionID: fOp.versionID,
-			opts:      &madmin.HealOpts{Remove: true},
-		}] = fOp.failedSet
-		s.mrfMU.Unlock()
-	}
-}
-
-// healMRFRoutine monitors new disks connection, sweep the MRF list
-// to find objects related to the new disk that needs to be healed.
-func (s *erasureSets) healMRFRoutine() {
-	// Wait until background heal state is initialized
-	bgSeq := mustGetHealSequence(GlobalContext)
-
-	for setIndex := range s.setReconnectEvent {
-		// Get the list of objects related the er.set
-		// to which the connected disk belongs.
-		var mrfOperations []healSource
-		s.mrfMU.Lock()
-		for k, v := range s.mrfOperations {
-			if v == setIndex {
-				mrfOperations = append(mrfOperations, k)
-			}
-		}
-		s.mrfMU.Unlock()
-
-		// Heal objects
-		for _, u := range mrfOperations {
-			waitForLowHTTPReq(globalHealConfig.IOCount, globalHealConfig.Sleep)
-
-			// Send an object to background heal
-			bgSeq.sourceCh <- u
-
-			s.mrfMU.Lock()
-			delete(s.mrfOperations, u)
-			s.mrfMU.Unlock()
-		}
-	}
-}
-
 // TransitionObject - transition object content to target tier.
 func (s *erasureSets) TransitionObject(ctx context.Context, bucket, object string, opts ObjectOptions) error {
 	return s.getHashedSet(object).TransitionObject(ctx, bucket, object, opts)