xl: Rewrite auto-healing and implement auto new-disk healer (#8114)

The new auto healing model selects one node always responsible
for auto-healing the whole cluster, erasure set by erasure set.
If that node dies, another node will be elected as a leading
operator to perform healing.

This code also adds a goroutine which checks each 10 minutes
if there are any new unformatted disks and performs its healing
in that case, only the erasure set which has the new disk will
be healed.
This commit is contained in:
Anis Elleuch
2019-10-28 18:27:49 +01:00
committed by kannappanr
parent 10b526ed86
commit a49d4a9cb2
7 changed files with 339 additions and 113 deletions

View File

@@ -83,7 +83,9 @@ func (h *healRoutine) run() {
case bucket != "" && object != "":
res, err = bgHealObject(ctx, bucket, object, task.opts)
}
task.responseCh <- healResult{result: res, err: err}
if task.responseCh != nil {
task.responseCh <- healResult{result: res, err: err}
}
case <-h.doneCh:
return
case <-GlobalServiceDoneCh:
@@ -100,11 +102,33 @@ func initHealRoutine() *healRoutine {
}
func initBackgroundHealing() {
healBg := initHealRoutine()
go healBg.run()
func startBackgroundHealing() {
ctx := context.Background()
globalBackgroundHealing = healBg
var objAPI ObjectLayer
for {
objAPI = newObjectLayerFn()
if objAPI == nil {
time.Sleep(time.Second)
continue
}
break
}
// Run the background healer
globalBackgroundHealRoutine = initHealRoutine()
go globalBackgroundHealRoutine.run()
// Launch the background healer sequence to track
// background healing operations
info := objAPI.StorageInfo(ctx)
numDisks := info.Backend.OnlineDisks.Sum() + info.Backend.OfflineDisks.Sum()
nh := newBgHealSequence(numDisks)
globalBackgroundHealState.LaunchNewHealSequence(nh)
}
func initBackgroundHealing() {
go startBackgroundHealing()
}
// bgHealDiskFormat - heals format.json, return value indicates if a