mirror of
https://github.com/minio/minio.git
synced 2025-11-07 12:52:58 -05:00
add support for tuning healing to make healing more aggressive (#11003)
supports `mc admin config set <alias> heal sleep=100ms` to enable more aggressive healing under certain times. also optimize some areas that were doing extra checks than necessary when bitrotscan was enabled, avoid double sleeps make healing more predictable. fixes #10497
This commit is contained in:
@@ -20,6 +20,7 @@ import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
"github.com/minio/minio/pkg/madmin"
|
||||
)
|
||||
|
||||
@@ -53,20 +54,28 @@ func (h *healRoutine) queueHealTask(task healTask) {
|
||||
h.tasks <- task
|
||||
}
|
||||
|
||||
func waitForLowHTTPReq(tolerance int32, maxWait time.Duration) {
|
||||
const wait = 10 * time.Millisecond
|
||||
waitCount := maxWait / wait
|
||||
func waitForLowHTTPReq(tolerance int, maxWait time.Duration) {
|
||||
// At max 10 attempts to wait with 100 millisecond interval before proceeding
|
||||
waitCount := 10
|
||||
waitTick := 100 * time.Millisecond
|
||||
|
||||
// Bucket notification and http trace are not costly, it is okay to ignore them
|
||||
// while counting the number of concurrent connections
|
||||
tolerance += int32(globalHTTPListen.NumSubscribers() + globalHTTPTrace.NumSubscribers())
|
||||
toleranceFn := func() int {
|
||||
return tolerance + globalHTTPListen.NumSubscribers() + globalHTTPTrace.NumSubscribers()
|
||||
}
|
||||
|
||||
if httpServer := newHTTPServerFn(); httpServer != nil {
|
||||
// Any requests in progress, delay the heal.
|
||||
for (httpServer.GetRequestCount() >= tolerance) &&
|
||||
waitCount > 0 {
|
||||
for httpServer.GetRequestCount() >= toleranceFn() {
|
||||
time.Sleep(waitTick)
|
||||
waitCount--
|
||||
time.Sleep(wait)
|
||||
if waitCount == 0 {
|
||||
if intDataUpdateTracker.debug {
|
||||
logger.Info("waitForLowHTTPReq: waited %d times, resuming", waitCount)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -80,9 +89,6 @@ func (h *healRoutine) run(ctx context.Context, objAPI ObjectLayer) {
|
||||
break
|
||||
}
|
||||
|
||||
// Wait and proceed if there are active requests
|
||||
waitForLowHTTPReq(int32(globalEndpoints.NEndpoints()), time.Second)
|
||||
|
||||
var res madmin.HealResultItem
|
||||
var err error
|
||||
switch {
|
||||
|
||||
Reference in New Issue
Block a user