mirror of
https://github.com/minio/minio.git
synced 2025-01-11 15:03:22 -05:00
Avoid excessive listing attempts in the daily sweep (#8081)
Add better dynamic timeouts for locks, also add jitters before launching daily sweep to ensure that not all the servers in distributed setup are not trying to hold locks to begin the sweep round. Also, add enough delay for incoming requests based on totalSetCount*totalDriveCount. A possible fix for #8071
This commit is contained in:
parent
60f52f461f
commit
b3ca304c01
@ -62,11 +62,11 @@ func (h *healRoutine) run() {
|
||||
break
|
||||
}
|
||||
if globalHTTPServer != nil {
|
||||
// Wait at max 1 minute for an inprogress request
|
||||
// before proceeding to heal
|
||||
waitCount := 60
|
||||
// Wait at max 10 minute for an inprogress request before proceeding to heal
|
||||
waitCount := 600
|
||||
// Any requests in progress, delay the heal.
|
||||
for globalHTTPServer.GetRequestCount() > 2 && waitCount > 0 {
|
||||
for (globalHTTPServer.GetRequestCount() >= int32(globalXLSetCount*globalXLSetDriveCount)) &&
|
||||
waitCount > 0 {
|
||||
waitCount--
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
|
@ -103,14 +103,12 @@ func startDailyLifecycle() {
|
||||
}
|
||||
}
|
||||
|
||||
var lifecycleTimeout = newDynamicTimeout(60*time.Second, time.Second)
|
||||
|
||||
func lifecycleRound(ctx context.Context, objAPI ObjectLayer) error {
|
||||
|
||||
zeroDuration := time.Millisecond
|
||||
zeroDynamicTimeout := newDynamicTimeout(zeroDuration, zeroDuration)
|
||||
|
||||
// Lock to avoid concurrent lifecycle ops from other nodes
|
||||
sweepLock := globalNSMutex.NewNSLock(ctx, "system", "daily-lifecycle-ops")
|
||||
if err := sweepLock.GetLock(zeroDynamicTimeout); err != nil {
|
||||
if err := sweepLock.GetLock(lifecycleTimeout); err != nil {
|
||||
return err
|
||||
}
|
||||
defer sweepLock.Unlock()
|
||||
|
@ -18,6 +18,7 @@ package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math/rand"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@ -48,15 +49,14 @@ func copyDailySweepListeners() []chan string {
|
||||
return listenersCopy
|
||||
}
|
||||
|
||||
var sweepTimeout = newDynamicTimeout(60*time.Second, time.Second)
|
||||
|
||||
// sweepRound will list all objects, having read quorum or not and
|
||||
// feeds to all listeners, such as the background healing
|
||||
func sweepRound(ctx context.Context, objAPI ObjectLayer) error {
|
||||
zeroDuration := time.Millisecond
|
||||
zeroDynamicTimeout := newDynamicTimeout(zeroDuration, zeroDuration)
|
||||
|
||||
// General lock so we avoid parallel daily sweep by different instances.
|
||||
sweepLock := globalNSMutex.NewNSLock(ctx, "system", "daily-sweep")
|
||||
if err := sweepLock.GetLock(zeroDynamicTimeout); err != nil {
|
||||
if err := sweepLock.GetLock(sweepTimeout); err != nil {
|
||||
return err
|
||||
}
|
||||
defer sweepLock.Unlock()
|
||||
@ -76,6 +76,17 @@ func sweepRound(ctx context.Context, objAPI ObjectLayer) error {
|
||||
|
||||
marker := ""
|
||||
for {
|
||||
if globalHTTPServer != nil {
|
||||
// Wait at max 10 minute for an inprogress request before proceeding to heal
|
||||
waitCount := 600
|
||||
// Any requests in progress, delay the heal.
|
||||
for (globalHTTPServer.GetRequestCount() >= int32(globalXLSetCount*globalXLSetDriveCount)) &&
|
||||
waitCount > 0 {
|
||||
waitCount--
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
res, err := objAPI.ListObjectsHeal(ctx, bucket.Name, "", marker, "", 1000)
|
||||
if err != nil {
|
||||
continue
|
||||
@ -119,6 +130,9 @@ func dailySweeper() {
|
||||
break
|
||||
}
|
||||
|
||||
// Start with random sleep time, so as to avoid "synchronous checks" between servers
|
||||
time.Sleep(time.Duration(rand.Float64() * float64(time.Hour)))
|
||||
|
||||
// Perform a sweep round each month
|
||||
for {
|
||||
if time.Since(lastSweepTime) < 30*24*time.Hour {
|
||||
|
Loading…
Reference in New Issue
Block a user