add support for tuning healing to make healing more aggressive (#11003)

supports `mc admin config set <alias> heal sleep=100ms` to
enable more aggressive healing under certain times.

also optimize some areas that were doing extra checks than
necessary when bitrotscan was enabled, avoid double sleeps
make healing more predictable.

fixes #10497
This commit is contained in:
Harshavardhana
2020-12-02 11:12:00 -08:00
committed by GitHub
parent fe11e9047d
commit 96c0ce1f0c
9 changed files with 92 additions and 84 deletions

View File

@@ -20,6 +20,7 @@ import (
"context"
"time"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/madmin"
)
@@ -53,20 +54,28 @@ func (h *healRoutine) queueHealTask(task healTask) {
h.tasks <- task
}
func waitForLowHTTPReq(tolerance int32, maxWait time.Duration) {
const wait = 10 * time.Millisecond
waitCount := maxWait / wait
func waitForLowHTTPReq(tolerance int, maxWait time.Duration) {
// At max 10 attempts to wait with 100 millisecond interval before proceeding
waitCount := 10
waitTick := 100 * time.Millisecond
// Bucket notification and http trace are not costly, it is okay to ignore them
// while counting the number of concurrent connections
tolerance += int32(globalHTTPListen.NumSubscribers() + globalHTTPTrace.NumSubscribers())
toleranceFn := func() int {
return tolerance + globalHTTPListen.NumSubscribers() + globalHTTPTrace.NumSubscribers()
}
if httpServer := newHTTPServerFn(); httpServer != nil {
// Any requests in progress, delay the heal.
for (httpServer.GetRequestCount() >= tolerance) &&
waitCount > 0 {
for httpServer.GetRequestCount() >= toleranceFn() {
time.Sleep(waitTick)
waitCount--
time.Sleep(wait)
if waitCount == 0 {
if intDataUpdateTracker.debug {
logger.Info("waitForLowHTTPReq: waited %d times, resuming", waitCount)
}
break
}
}
}
}
@@ -80,9 +89,6 @@ func (h *healRoutine) run(ctx context.Context, objAPI ObjectLayer) {
break
}
// Wait and proceed if there are active requests
waitForLowHTTPReq(int32(globalEndpoints.NEndpoints()), time.Second)
var res madmin.HealResultItem
var err error
switch {