diff --git a/cmd/server-main.go b/cmd/server-main.go index 7022f59be..46a6df143 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -358,6 +358,12 @@ func initServer(ctx context.Context, newObject ObjectLayer) error { r := rand.New(rand.NewSource(time.Now().UnixNano())) lockTimeout := newDynamicTimeout(5*time.Second, 3*time.Second) + // do not retry to avoid high contention on startup. + lockTimeout.retryInterval = -1 + + // Do an initial random sleep to avoid stampeding herd of initial + // lock request. This will spread locks requests over 1 second. + time.Sleep(time.Duration(r.Float64() * float64(time.Second))) for { select { @@ -380,7 +386,8 @@ func initServer(ctx context.Context, newObject ObjectLayer) error { if err != nil { logger.Info("Waiting for all MinIO sub-systems to be initialized.. trying to acquire lock") - time.Sleep(time.Duration(r.Float64() * float64(5*time.Second))) + // Sleep 0 -> 2 seconds to average 1 second retry interval. + time.Sleep(time.Duration(r.Float64() * 2 * float64(time.Second))) continue } diff --git a/internal/dsync/drwmutex.go b/internal/dsync/drwmutex.go index 2c6b4c84a..d82023e64 100644 --- a/internal/dsync/drwmutex.go +++ b/internal/dsync/drwmutex.go @@ -23,6 +23,7 @@ import ( "math/rand" "os" "sort" + "strconv" "sync" "time" @@ -32,9 +33,23 @@ import ( // Indicator if logging is enabled. var dsyncLog bool +// maximum time to sleep before retrying a failed blocking lock() +var lockRetryInterval time.Duration + func init() { // Check for MINIO_DSYNC_TRACE env variable, if set logging will be enabled for failed REST operations. dsyncLog = os.Getenv("MINIO_DSYNC_TRACE") == "1" + + // lockRetryInterval specifies the maximum time between retries for failed locks. + // Average retry time will be value / 2. + lockRetryInterval = 100 * time.Millisecond + if lri := os.Getenv("_MINIO_LOCK_RETRY_INTERVAL"); lri != "" { + v, err := strconv.Atoi(lri) + if err != nil { + panic(err) + } + lockRetryInterval = time.Duration(v) * time.Millisecond + } } func log(format string, data ...interface{}) { @@ -59,9 +74,6 @@ const ( // dRWMutexRefreshInterval - default the interval between two refresh calls drwMutexRefreshInterval = 10 * time.Second - // maximum time to sleep before retrying a failed blocking lock() - lockRetryInterval = 50 * time.Millisecond - drwMutexInfinite = 1<<63 - 1 ) @@ -239,9 +251,12 @@ func (dm *DRWMutex) lockBlocking(ctx context.Context, lockLossCallback func(), i } lockRetryInterval := dm.lockRetryInterval - if opts.RetryInterval > 0 { + if opts.RetryInterval != 0 { lockRetryInterval = opts.RetryInterval } + if lockRetryInterval < 0 { + return false + } time.Sleep(time.Duration(dm.rng.Float64() * float64(lockRetryInterval))) } }