Imporve healing and reporting (#11312)

* Provide information on *actively* healing, buckets healed/queued, objects healed/failed.
* Add concurrent healing of multiple sets (typically on startup).
* Add bucket level resume, so restarts will only heal non-healed buckets.
* Print summary after healing a disk is done.
This commit is contained in:
Klaus Post
2021-03-04 14:36:23 -08:00
committed by GitHub
parent 97e7a902d0
commit fa9cf1251b
36 changed files with 1357 additions and 307 deletions

View File

@@ -90,8 +90,9 @@ type allHealState struct {
sync.RWMutex
// map of heal path to heal sequence
healSeqMap map[string]*healSequence
healSeqMap map[string]*healSequence // Indexed by endpoint
healLocalDisks map[Endpoint]struct{}
healStatus map[string]healingTracker // Indexed by disk ID
}
// newHealState - initialize global heal state management
@@ -99,6 +100,7 @@ func newHealState(cleanup bool) *allHealState {
hstate := &allHealState{
healSeqMap: make(map[string]*healSequence),
healLocalDisks: map[Endpoint]struct{}{},
healStatus: make(map[string]healingTracker),
}
if cleanup {
go hstate.periodicHealSeqsClean(GlobalContext)
@@ -113,7 +115,56 @@ func (ahs *allHealState) healDriveCount() int {
return len(ahs.healLocalDisks)
}
func (ahs *allHealState) getHealLocalDisks() Endpoints {
func (ahs *allHealState) popHealLocalDisks(healLocalDisks ...Endpoint) {
ahs.Lock()
defer ahs.Unlock()
for _, ep := range healLocalDisks {
delete(ahs.healLocalDisks, ep)
}
for id, disk := range ahs.healStatus {
for _, ep := range healLocalDisks {
if disk.Endpoint == ep.String() {
delete(ahs.healStatus, id)
}
}
}
}
// updateHealStatus will update the heal status.
func (ahs *allHealState) updateHealStatus(tracker *healingTracker) {
ahs.Lock()
defer ahs.Unlock()
ahs.healStatus[tracker.ID] = *tracker
}
// Sort by zone, set and disk index
func sortDisks(disks []madmin.Disk) {
sort.Slice(disks, func(i, j int) bool {
a, b := &disks[i], &disks[j]
if a.PoolIndex != b.PoolIndex {
return a.PoolIndex < b.PoolIndex
}
if a.SetIndex != b.SetIndex {
return a.SetIndex < b.SetIndex
}
return a.DiskIndex < b.DiskIndex
})
}
// getLocalHealingDisks returns local healing disks indexed by endpoint.
func (ahs *allHealState) getLocalHealingDisks() map[string]madmin.HealingDisk {
ahs.RLock()
defer ahs.RUnlock()
dst := make(map[string]madmin.HealingDisk, len(ahs.healStatus))
for _, v := range ahs.healStatus {
dst[v.Endpoint] = v.toHealingDisk()
}
return dst
}
func (ahs *allHealState) getHealLocalDiskEndpoints() Endpoints {
ahs.RLock()
defer ahs.RUnlock()
@@ -124,15 +175,6 @@ func (ahs *allHealState) getHealLocalDisks() Endpoints {
return endpoints
}
func (ahs *allHealState) popHealLocalDisks(healLocalDisks ...Endpoint) {
ahs.Lock()
defer ahs.Unlock()
for _, ep := range healLocalDisks {
delete(ahs.healLocalDisks, ep)
}
}
func (ahs *allHealState) pushHealLocalDisks(healLocalDisks ...Endpoint) {
ahs.Lock()
defer ahs.Unlock()