heal: Add MRF metrics to background heal API response (#12398)

This commit gathers MRF metrics from 
all nodes in a cluster and return it to the caller. This will show information about the 
number of objects in the MRF queues 
waiting to be healed.
This commit is contained in:
Anis Elleuch
2021-07-16 06:32:06 +01:00
committed by GitHub
parent ead8778305
commit b0b4696a64
12 changed files with 458 additions and 209 deletions

View File

@@ -48,18 +48,23 @@ const (
type healingTracker struct {
disk StorageAPI `msg:"-"`
ID string
PoolIndex int
SetIndex int
DiskIndex int
Path string
Endpoint string
Started time.Time
LastUpdate time.Time
ObjectsHealed uint64
ObjectsFailed uint64
BytesDone uint64
BytesFailed uint64
ID string
PoolIndex int
SetIndex int
DiskIndex int
Path string
Endpoint string
Started time.Time
LastUpdate time.Time
ObjectsTotalCount uint64
ObjectsTotalSize uint64
ItemsHealed uint64
ItemsFailed uint64
BytesDone uint64
BytesFailed uint64
// Last object scanned.
Bucket string `json:"-"`
@@ -67,10 +72,10 @@ type healingTracker struct {
// Numbers when current bucket started healing,
// for resuming with correct numbers.
ResumeObjectsHealed uint64 `json:"-"`
ResumeObjectsFailed uint64 `json:"-"`
ResumeBytesDone uint64 `json:"-"`
ResumeBytesFailed uint64 `json:"-"`
ResumeItemsHealed uint64 `json:"-"`
ResumeItemsFailed uint64 `json:"-"`
ResumeBytesDone uint64 `json:"-"`
ResumeBytesFailed uint64 `json:"-"`
// Filled on startup/restarts.
QueuedBuckets []string
@@ -175,8 +180,8 @@ func (h *healingTracker) isHealed(bucket string) bool {
// resume will reset progress to the numbers at the start of the bucket.
func (h *healingTracker) resume() {
h.ObjectsHealed = h.ResumeObjectsHealed
h.ObjectsFailed = h.ResumeObjectsFailed
h.ItemsHealed = h.ResumeItemsHealed
h.ItemsFailed = h.ResumeItemsFailed
h.BytesDone = h.ResumeBytesDone
h.BytesFailed = h.ResumeBytesFailed
}
@@ -184,8 +189,8 @@ func (h *healingTracker) resume() {
// bucketDone should be called when a bucket is done healing.
// Adds the bucket to the list of healed buckets and updates resume numbers.
func (h *healingTracker) bucketDone(bucket string) {
h.ResumeObjectsHealed = h.ObjectsHealed
h.ResumeObjectsFailed = h.ObjectsFailed
h.ResumeItemsHealed = h.ItemsHealed
h.ResumeItemsFailed = h.ItemsFailed
h.ResumeBytesDone = h.BytesDone
h.ResumeBytesFailed = h.BytesFailed
h.HealedBuckets = append(h.HealedBuckets, bucket)
@@ -220,22 +225,28 @@ func (h *healingTracker) printTo(writer io.Writer) {
// toHealingDisk converts the information to madmin.HealingDisk
func (h *healingTracker) toHealingDisk() madmin.HealingDisk {
return madmin.HealingDisk{
ID: h.ID,
Endpoint: h.Endpoint,
PoolIndex: h.PoolIndex,
SetIndex: h.SetIndex,
DiskIndex: h.DiskIndex,
Path: h.Path,
Started: h.Started.UTC(),
LastUpdate: h.LastUpdate.UTC(),
ObjectsHealed: h.ObjectsHealed,
ObjectsFailed: h.ObjectsFailed,
BytesDone: h.BytesDone,
BytesFailed: h.BytesFailed,
Bucket: h.Bucket,
Object: h.Object,
QueuedBuckets: h.QueuedBuckets,
HealedBuckets: h.HealedBuckets,
ID: h.ID,
Endpoint: h.Endpoint,
PoolIndex: h.PoolIndex,
SetIndex: h.SetIndex,
DiskIndex: h.DiskIndex,
Path: h.Path,
Started: h.Started.UTC(),
LastUpdate: h.LastUpdate.UTC(),
ObjectsTotalCount: h.ObjectsTotalCount,
ObjectsTotalSize: h.ObjectsTotalSize,
ItemsHealed: h.ItemsHealed,
ItemsFailed: h.ItemsFailed,
BytesDone: h.BytesDone,
BytesFailed: h.BytesFailed,
Bucket: h.Bucket,
Object: h.Object,
QueuedBuckets: h.QueuedBuckets,
HealedBuckets: h.HealedBuckets,
ObjectsHealed: h.ItemsHealed, // Deprecated July 2021
ObjectsFailed: h.ItemsFailed, // Deprecated July 2021
}
}
@@ -413,6 +424,14 @@ func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerPools, bgSeq
tracker = newHealingTracker(disk)
}
// Load bucket totals
cache := dataUsageCache{}
if err := cache.load(ctx, z.serverPools[i].sets[setIndex], dataUsageCacheName); err == nil {
dataUsageInfo := cache.dui(dataUsageRoot, nil)
tracker.ObjectsTotalCount = dataUsageInfo.ObjectsTotalCount
tracker.ObjectsTotalSize = dataUsageInfo.ObjectsTotalSize
}
tracker.PoolIndex, tracker.SetIndex, tracker.DiskIndex = disk.GetDiskLoc()
tracker.setQueuedBuckets(buckets)
if err := tracker.save(ctx); err != nil {