fix: avoid timer leaks in dsync/lsync (#9781)

At a customer setup with lots of concurrent calls it can be observed that in newRetryTimer there were lots of tiny alloations which are not relinquished upon retries, in this codepath we were only interested in re-using the timer and use it wisely for each locker. ``` (pprof) top Showing nodes accounting for 8.68TB, 97.02% of 8.95TB total Dropped 1198 nodes (cum <= 0.04TB) Showing top 10 nodes out of 79 flat flat% sum% cum cum% 5.95TB 66.50% 66.50% 5.95TB 66.50% time.NewTimer 1.16TB 13.02% 79.51% 1.16TB 13.02% github.com/ncw/directio.AlignedBlock 0.67TB 7.53% 87.04% 0.70TB 7.78% github.com/minio/minio/cmd.xlObjects.putObject 0.21TB 2.36% 89.40% 0.21TB 2.36% github.com/minio/minio/cmd.(*posix).Walk 0.19TB 2.08% 91.49% 0.27TB 2.99% os.statNolog 0.14TB 1.59% 93.08% 0.14TB 1.60% os.(*File).readdirnames 0.10TB 1.09% 94.17% 0.11TB 1.25% github.com/minio/minio/cmd.readDirN 0.10TB 1.07% 95.23% 0.10TB 1.07% syscall.ByteSliceFromString 0.09TB 1.03% 96.27% 0.09TB 1.03% strings.(*Builder).grow 0.07TB 0.75% 97.02% 0.07TB 0.75% path.(*lazybuf).append ```
2025-05-20 17:14:06 -04:00 · 2020-06-08 11:28:40 -07:00 · 2020-06-08 11:28:40 -07:00 · febe9cc26a
commit febe9cc26a
parent 2ce2e88adf
16 changed files with 783 additions and 675 deletions
--- a/cmd/api-errors.go
+++ b/cmd/api-errors.go
@ -1138,14 +1138,14 @@ var errorCodes = errorCodeMap{
 		HTTPStatusCode: http.StatusBadRequest,
 	},
 	ErrOperationTimedOut: {
-		Code:           "XMinioServerTimedOut",
-		Description:    "A timeout occurred while trying to lock a resource",
-		HTTPStatusCode: http.StatusRequestTimeout,
+		Code:           "RequestTimeout",
+		Description:    "A timeout occurred while trying to lock a resource, please reduce your request rate",
+		HTTPStatusCode: http.StatusServiceUnavailable,
 	},
 	ErrOperationMaxedOut: {
-		Code:           "XMinioServerTimedOut",
-		Description:    "A timeout exceeded while waiting to proceed with the request",
-		HTTPStatusCode: http.StatusRequestTimeout,
+		Code:           "SlowDown",
+		Description:    "A timeout exceeded while waiting to proceed with the request, please reduce your request rate",
+		HTTPStatusCode: http.StatusServiceUnavailable,
 	},
 	ErrUnsupportedMetadata: {
 		Code:           "InvalidArgument",
--- a/cmd/format-fs.go
+++ b/cmd/format-fs.go
@ -27,6 +27,7 @@ import (
 	"github.com/minio/minio/cmd/config"
 	"github.com/minio/minio/cmd/logger"
 	"github.com/minio/minio/pkg/lock"
+	"github.com/minio/minio/pkg/retry"
 )

 // FS format version strings.
@ -344,7 +345,7 @@ func formatFSFixDeploymentID(ctx context.Context, fsFormatPath string) error {
 	defer cancel()

 	var wlk *lock.LockedFile
-	retryCh := newRetryTimerSimple(retryCtx)
+	retryCh := retry.NewTimerWithJitter(retryCtx, time.Second, 30*time.Second, retry.MaxJitter)
 	var stop bool
 	for !stop {
 		select {
--- a/cmd/fs-v1.go
+++ b/cmd/fs-v1.go
@ -613,13 +613,11 @@ func (fs *FSObjects) GetObjectNInfo(ctx context.Context, bucket, object string,
 		switch lockType {
 		case writeLock:
 			if err = lock.GetLock(globalObjectTimeout); err != nil {
-				logger.LogIf(ctx, err)
 				return nil, err
 			}
 			nsUnlocker = lock.Unlock
 		case readLock:
 			if err = lock.GetRLock(globalObjectTimeout); err != nil {
-				logger.LogIf(ctx, err)
 				return nil, err
 			}
 			nsUnlocker = lock.RUnlock
--- a/cmd/lock-rest-client.go
+++ b/cmd/lock-rest-client.go
@ -76,7 +76,7 @@ func (client *lockRESTClient) call(method string, values url.Values, body io.Rea
 	}

 	if isNetworkError(err) {
-		time.AfterFunc(defaultRetryUnit, func() {
+		time.AfterFunc(time.Second, func() {
 			// After 1 seconds, take this lock client online for a retry.
 			atomic.StoreInt32(&client.connected, 1)
 		})
--- a/cmd/namespace-lock.go
+++ b/cmd/namespace-lock.go
@ -28,9 +28,9 @@ import (
 	"fmt"
 	"time"

-	"github.com/minio/lsync"
 	"github.com/minio/minio/cmd/logger"
 	"github.com/minio/minio/pkg/dsync"
+	"github.com/minio/minio/pkg/lsync"
 )

 // local lock servers
--- a/cmd/server-main.go
+++ b/cmd/server-main.go
@ -38,6 +38,7 @@ import (
 	"github.com/minio/minio/pkg/certs"
 	"github.com/minio/minio/pkg/color"
 	"github.com/minio/minio/pkg/env"
+	"github.com/minio/minio/pkg/retry"
 )

 // ServerFlags - server command specific flags
@ -216,10 +217,10 @@ func initSafeMode() (err error) {
 	//    version is needed, migration is needed etc.
 	rquorum := InsufficientReadQuorum{}
 	wquorum := InsufficientWriteQuorum{}
-	for range newRetryTimerSimple(retryCtx) {
+	for range retry.NewTimer(retryCtx) {
 		// let one of the server acquire the lock, if not let them timeout.
 		// which shall be retried again by this loop.
-		if err = txnLk.GetLock(leaderLockTimeout); err != nil {
+		if err = txnLk.GetLock(newDynamicTimeout(5*time.Second, 30*time.Second)); err != nil {
 			logger.Info("Waiting for all MinIO sub-systems to be initialized.. trying to acquire lock")
 			continue
 		}
--- a/cmd/server_test.go
+++ b/cmd/server_test.go
--- a/go.mod
+++ b/go.mod
@ -67,7 +67,6 @@ require (
 	github.com/miekg/dns v1.1.8
 	github.com/minio/cli v1.22.0
 	github.com/minio/highwayhash v1.0.0
-	github.com/minio/lsync v1.0.1
 	github.com/minio/minio-go/v6 v6.0.56
 	github.com/minio/parquet-go v0.0.0-20200414234858-838cfa8aae61
 	github.com/minio/sha256-simd v0.1.1
--- a/go.sum
+++ b/go.sum
@ -273,8 +273,6 @@ github.com/minio/cli v1.22.0 h1:VTQm7lmXm3quxO917X3p+el1l0Ca5X3S4PM2ruUYO68=
 github.com/minio/cli v1.22.0/go.mod h1:bYxnK0uS629N3Bq+AOZZ+6lwF77Sodk4+UL9vNuXhOY=
 github.com/minio/highwayhash v1.0.0 h1:iMSDhgUILCr0TNm8LWlSjF8N0ZIj2qbO8WHp6Q/J2BA=
 github.com/minio/highwayhash v1.0.0/go.mod h1:xQboMTeM9nY9v/LlAOxFctujiv5+Aq2hR5dxBpaMbdc=
-github.com/minio/lsync v1.0.1 h1:AVvILxA976xc27hstd1oR+X9PQG0sPSom1MNb1ImfUs=
-github.com/minio/lsync v1.0.1/go.mod h1:tCFzfo0dlvdGl70IT4IAK/5Wtgb0/BrTmo/jE8pArKA=
 github.com/minio/minio-go/v6 v6.0.53/go.mod h1:DIvC/IApeHX8q1BAMVCXSXwpmrmM+I+iBvhvztQorfI=
 github.com/minio/minio-go/v6 v6.0.56 h1:H4+v6UFV1V7VkEf1HjL15W9OvTL1Gy8EbMmjQZHqEbg=
 github.com/minio/minio-go/v6 v6.0.56/go.mod h1:KQMM+/44DSlSGSQWSfRrAZ12FVMmpWNuX37i2AX0jfI=
--- a/pkg/dsync/drwmutex.go
+++ b/pkg/dsync/drwmutex.go
@ -24,6 +24,8 @@ import (
 	"os"
 	"sync"
 	"time"
+
+	"github.com/minio/minio/pkg/retry"
 )

 // Indicator if logging is enabled.
@ -128,49 +130,41 @@ func (dm *DRWMutex) GetRLock(id, source string, timeout time.Duration) (locked b
 // algorithm until either the lock is acquired successfully or more
 // time has elapsed than the timeout value.
 func (dm *DRWMutex) lockBlocking(timeout time.Duration, id, source string, isReadLock bool) (locked bool) {
-	start := time.Now().UTC()
-
 	restClnts := dm.clnt.GetLockersFn()

-	retryCtx, cancel := context.WithCancel(dm.ctx)
+	retryCtx, cancel := context.WithTimeout(dm.ctx, timeout)

 	defer cancel()

 	// Use incremental back-off algorithm for repeated attempts to acquire the lock
-	for range newRetryTimerSimple(retryCtx) {
-		select {
-		case <-dm.ctx.Done():
-			return
-		default:
-		}
-
+	for range retry.NewTimer(retryCtx) {
 		// Create temp array on stack.
 		locks := make([]string, len(restClnts))

 		// Try to acquire the lock.
 		success := lock(dm.clnt, &locks, id, source, isReadLock, dm.Names...)
-		if success {
-			dm.m.Lock()
-
-			// If success, copy array to object
-			if isReadLock {
-				// Append new array of strings at the end
-				dm.readersLocks = append(dm.readersLocks, make([]string, len(restClnts)))
-				// and copy stack array into last spot
-				copy(dm.readersLocks[len(dm.readersLocks)-1], locks[:])
-			} else {
-				copy(dm.writeLocks, locks[:])
-			}
-
-			dm.m.Unlock()
-			return true
+		if !success {
+			continue
 		}
-		if time.Now().UTC().Sub(start) >= timeout { // Are we past the timeout?
-			break
+
+		dm.m.Lock()
+
+		// If success, copy array to object
+		if isReadLock {
+			// Append new array of strings at the end
+			dm.readersLocks = append(dm.readersLocks, make([]string, len(restClnts)))
+			// and copy stack array into last spot
+			copy(dm.readersLocks[len(dm.readersLocks)-1], locks[:])
+		} else {
+			copy(dm.writeLocks, locks[:])
 		}
-		// Failed to acquire the lock on this attempt, incrementally wait
-		// for a longer back-off time and try again afterwards.
+
+		dm.m.Unlock()
+		return true
 	}
+
+	// Failed to acquire the lock on this attempt, incrementally wait
+	// for a longer back-off time and try again afterwards.
 	return false
 }

@ -233,7 +227,7 @@ func lock(ds *Dsync, locks *[]string, id, source string, isReadLock bool, lockNa
 		//
 		// a) received all lock responses
 		// b) received too many 'non-'locks for quorum to be still possible
-		// c) time out
+		// c) timedout
 		//
 		i, locksFailed := 0, 0
 		done := false
--- a/pkg/dsync/retry.go
+++ b/pkg/dsync/retry.go
@ -1,136 +0,0 @@
-/*
- * Minio Cloud Storage, (C) 2017 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dsync
-
-import (
-	"context"
-	"math/rand"
-	"sync"
-	"time"
-)
-
-// lockedRandSource provides protected rand source, implements rand.Source interface.
-type lockedRandSource struct {
-	lk  sync.Mutex
-	src rand.Source
-}
-
-// Int63 returns a non-negative pseudo-random 63-bit integer as an
-// int64.
-func (r *lockedRandSource) Int63() (n int64) {
-	r.lk.Lock()
-	n = r.src.Int63()
-	r.lk.Unlock()
-	return
-}
-
-// Seed uses the provided seed value to initialize the generator to a
-// deterministic state.
-func (r *lockedRandSource) Seed(seed int64) {
-	r.lk.Lock()
-	r.src.Seed(seed)
-	r.lk.Unlock()
-}
-
-// MaxJitter will randomize over the full exponential backoff time
-const MaxJitter = 1.0
-
-// NoJitter disables the use of jitter for randomizing the
-// exponential backoff time
-const NoJitter = 0.0
-
-// Global random source for fetching random values.
-var globalRandomSource = rand.New(&lockedRandSource{
-	src: rand.NewSource(time.Now().UTC().UnixNano()),
-})
-
-// newRetryTimerJitter creates a timer with exponentially increasing delays
-// until the maximum retry attempts are reached. - this function is a fully
-// configurable version, meant for only advanced use cases. For the most part
-// one should use newRetryTimerSimple and newRetryTimer.
-func newRetryTimerWithJitter(ctx context.Context, unit time.Duration, cap time.Duration, jitter float64) <-chan int {
-	attemptCh := make(chan int)
-
-	// normalize jitter to the range [0, 1.0]
-	if jitter < NoJitter {
-		jitter = NoJitter
-	}
-	if jitter > MaxJitter {
-		jitter = MaxJitter
-	}
-
-	// computes the exponential backoff duration according to
-	// https://www.awsarchitectureblog.com/2015/03/backoff.html
-	exponentialBackoffWait := func(attempt int) time.Duration {
-		// 1<<uint(attempt) below could overflow, so limit the value of attempt
-		maxAttempt := 30
-		if attempt > maxAttempt {
-			attempt = maxAttempt
-		}
-		//sleep = random_between(0, min(cap, base * 2 ** attempt))
-		sleep := unit * time.Duration(1<<uint(attempt))
-		if sleep > cap {
-			sleep = cap
-		}
-		if jitter != NoJitter {
-			sleep -= time.Duration(globalRandomSource.Float64() * float64(sleep) * jitter)
-		}
-		return sleep
-	}
-
-	go func() {
-		defer close(attemptCh)
-		nextBackoff := 0
-		// Channel used to signal after the expiry of backoff wait seconds.
-		var timer *time.Timer
-		for {
-			select { // Attempts starts.
-			case attemptCh <- nextBackoff:
-				nextBackoff++
-			case <-ctx.Done():
-				// Stop the routine.
-				return
-			}
-			timer = time.NewTimer(exponentialBackoffWait(nextBackoff))
-			// wait till next backoff time or till doneCh gets a message.
-			select {
-			case <-timer.C:
-			case <-ctx.Done():
-				// stop the timer and return.
-				timer.Stop()
-				return
-			}
-
-		}
-	}()
-
-	// Start reading..
-	return attemptCh
-}
-
-// Default retry constants.
-const (
-	defaultRetryUnit = time.Second     // 1 second.
-	defaultRetryCap  = 1 * time.Second // 1 second.
-)
-
-// newRetryTimerSimple creates a timer with exponentially increasing delays
-// until the maximum retry attempts are reached. - this function is a
-// simpler version with all default values.
-func newRetryTimerSimple(ctx context.Context) <-chan int {
-	return newRetryTimerWithJitter(ctx, defaultRetryUnit, defaultRetryCap, MaxJitter)
-}
--- a/pkg/dsync/retry_test.go
+++ b/pkg/dsync/retry_test.go
@ -1,87 +0,0 @@
-/*
- * Minio Cloud Storage, (C) 2017 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package dsync
-
-import (
-	"context"
-	"testing"
-	"time"
-)
-
-// Tests for retry timer.
-func TestRetryTimerSimple(t *testing.T) {
-	rctx, cancel := context.WithCancel(context.Background())
-	attemptCh := newRetryTimerSimple(rctx)
-	i := <-attemptCh
-	if i != 0 {
-		cancel()
-		t.Fatalf("Invalid attempt counter returned should be 0, found %d instead", i)
-	}
-	i = <-attemptCh
-	if i <= 0 {
-		cancel()
-		t.Fatalf("Invalid attempt counter returned should be greater than 0, found %d instead", i)
-	}
-	cancel()
-	_, ok := <-attemptCh
-	if ok {
-		t.Fatal("Attempt counter should be closed")
-	}
-}
-
-// Test retry time with no jitter.
-func TestRetryTimerWithNoJitter(t *testing.T) {
-	rctx, cancel := context.WithCancel(context.Background())
-
-	// No jitter
-	attemptCh := newRetryTimerWithJitter(rctx, time.Millisecond, 5*time.Millisecond, NoJitter)
-	i := <-attemptCh
-	if i != 0 {
-		cancel()
-		t.Fatalf("Invalid attempt counter returned should be 0, found %d instead", i)
-	}
-	// Loop through the maximum possible attempt.
-	for i = range attemptCh {
-		if i == 30 {
-			break
-		}
-	}
-	cancel()
-
-	_, ok := <-attemptCh
-	if ok {
-		t.Fatal("Attempt counter should be closed")
-	}
-}
-
-// Test retry time with Jitter greater than MaxJitter.
-func TestRetryTimerWithJitter(t *testing.T) {
-	rctx, cancel := context.WithCancel(context.Background())
-
-	// Jitter will be set back to 1.0
-	attemptCh := newRetryTimerWithJitter(rctx, time.Second, 30*time.Second, 2.0)
-	i := <-attemptCh
-	if i != 0 {
-		cancel()
-		t.Fatalf("Invalid attempt counter returned should be 0, found %d instead", i)
-	}
-	cancel()
-	_, ok := <-attemptCh
-	if ok {
-		t.Fatal("Attempt counter should be closed")
-	}
-}
--- a/pkg/lsync/lrwmutex.go
+++ b/pkg/lsync/lrwmutex.go
@ -0,0 +1,188 @@
+/*
+ * Minio Cloud Storage, (C) 2017 Minio, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package lsync
+
+import (
+	"context"
+	"math"
+	"sync"
+	"time"
+
+	"github.com/minio/minio/pkg/retry"
+)
+
+// A LRWMutex is a mutual exclusion lock with timeouts.
+type LRWMutex struct {
+	id          string
+	source      string
+	isWriteLock bool
+	ref         int
+	m           sync.Mutex // Mutex to prevent multiple simultaneous locks
+	ctx         context.Context
+}
+
+// NewLRWMutex - initializes a new lsync RW mutex.
+func NewLRWMutex(ctx context.Context) *LRWMutex {
+	return &LRWMutex{ctx: ctx}
+}
+
+// Lock holds a write lock on lm.
+//
+// If the lock is already in use, the calling go routine
+// blocks until the mutex is available.
+func (lm *LRWMutex) Lock() {
+
+	const isWriteLock = true
+	lm.lockLoop(lm.id, lm.source, time.Duration(math.MaxInt64), isWriteLock)
+}
+
+// GetLock tries to get a write lock on lm before the timeout occurs.
+func (lm *LRWMutex) GetLock(id string, source string, timeout time.Duration) (locked bool) {
+
+	const isWriteLock = true
+	return lm.lockLoop(id, source, timeout, isWriteLock)
+}
+
+// RLock holds a read lock on lm.
+//
+// If one or more read lock are already in use, it will grant another lock.
+// Otherwise the calling go routine blocks until the mutex is available.
+func (lm *LRWMutex) RLock() {
+
+	const isWriteLock = false
+	lm.lockLoop(lm.id, lm.source, time.Duration(1<<63-1), isWriteLock)
+}
+
+// GetRLock tries to get a read lock on lm before the timeout occurs.
+func (lm *LRWMutex) GetRLock(id string, source string, timeout time.Duration) (locked bool) {
+
+	const isWriteLock = false
+	return lm.lockLoop(id, source, timeout, isWriteLock)
+}
+
+// lockLoop will acquire either a read or a write lock
+//
+// The call will block until the lock is granted using a built-in
+// timing randomized back-off algorithm to try again until successful
+func (lm *LRWMutex) lockLoop(id, source string, timeout time.Duration, isWriteLock bool) bool {
+	retryCtx, cancel := context.WithTimeout(lm.ctx, timeout)
+
+	defer cancel()
+
+	// We timed out on the previous lock, incrementally wait
+	// for a longer back-off time and try again afterwards.
+	for range retry.NewTimer(retryCtx) {
+		// Try to acquire the lock.
+		var success bool
+		{
+			lm.m.Lock()
+
+			lm.id = id
+			lm.source = source
+
+			if isWriteLock {
+				if lm.ref == 0 && !lm.isWriteLock {
+					lm.ref = 1
+					lm.isWriteLock = true
+					success = true
+				}
+			} else {
+				if !lm.isWriteLock {
+					lm.ref++
+					success = true
+				}
+			}
+
+			lm.m.Unlock()
+		}
+
+		if success {
+			return true
+		}
+	}
+
+	// We timed out on the previous lock, incrementally wait
+	// for a longer back-off time and try again afterwards.
+
+	return false
+}
+
+// Unlock unlocks the write lock.
+//
+// It is a run-time error if lm is not locked on entry to Unlock.
+func (lm *LRWMutex) Unlock() {
+
+	isWriteLock := true
+	success := lm.unlock(isWriteLock)
+	if !success {
+		panic("Trying to Unlock() while no Lock() is active")
+	}
+}
+
+// RUnlock releases a read lock held on lm.
+//
+// It is a run-time error if lm is not locked on entry to RUnlock.
+func (lm *LRWMutex) RUnlock() {
+
+	isWriteLock := false
+	success := lm.unlock(isWriteLock)
+	if !success {
+		panic("Trying to RUnlock() while no RLock() is active")
+	}
+}
+
+func (lm *LRWMutex) unlock(isWriteLock bool) (unlocked bool) {
+	lm.m.Lock()
+
+	// Try to release lock.
+	if isWriteLock {
+		if lm.isWriteLock && lm.ref == 1 {
+			lm.ref = 0
+			lm.isWriteLock = false
+			unlocked = true
+		}
+	} else {
+		if !lm.isWriteLock {
+			if lm.ref > 0 {
+				lm.ref--
+				unlocked = true
+			}
+		}
+	}
+
+	lm.m.Unlock()
+	return unlocked
+}
+
+// ForceUnlock will forcefully clear a write or read lock.
+func (lm *LRWMutex) ForceUnlock() {
+	lm.m.Lock()
+	lm.ref = 0
+	lm.isWriteLock = false
+	lm.m.Unlock()
+}
+
+// DRLocker returns a sync.Locker interface that implements
+// the Lock and Unlock methods by calling drw.RLock and drw.RUnlock.
+func (lm *LRWMutex) DRLocker() sync.Locker {
+	return (*drlocker)(lm)
+}
+
+type drlocker LRWMutex
+
+func (dr *drlocker) Lock()   { (*LRWMutex)(dr).RLock() }
+func (dr *drlocker) Unlock() { (*LRWMutex)(dr).RUnlock() }
--- a/pkg/lsync/lrwmutex_test.go
+++ b/pkg/lsync/lrwmutex_test.go
@ -0,0 +1,338 @@
+/*
+ * Minio Cloud Storage, (C) 2017 Minio, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// GOMAXPROCS=10 go test
+
+package lsync_test
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"runtime"
+
+	. "github.com/minio/minio/pkg/lsync"
+)
+
+func testSimpleWriteLock(t *testing.T, duration time.Duration) (locked bool) {
+
+	lrwm := NewLRWMutex(context.Background())
+
+	if !lrwm.GetRLock("", "object1", time.Second) {
+		panic("Failed to acquire read lock")
+	}
+	// fmt.Println("1st read lock acquired, waiting...")
+
+	if !lrwm.GetRLock("", "object1", time.Second) {
+		panic("Failed to acquire read lock")
+	}
+	// fmt.Println("2nd read lock acquired, waiting...")
+
+	go func() {
+		time.Sleep(2 * time.Second)
+		lrwm.RUnlock()
+		// fmt.Println("1st read lock released, waiting...")
+	}()
+
+	go func() {
+		time.Sleep(3 * time.Second)
+		lrwm.RUnlock()
+		// fmt.Println("2nd read lock released, waiting...")
+	}()
+
+	// fmt.Println("Trying to acquire write lock, waiting...")
+	locked = lrwm.GetLock("", "", duration)
+	if locked {
+		// fmt.Println("Write lock acquired, waiting...")
+		time.Sleep(1 * time.Second)
+
+		lrwm.Unlock()
+	} else {
+		// fmt.Println("Write lock failed due to timeout")
+	}
+	return
+}
+
+func TestSimpleWriteLockAcquired(t *testing.T) {
+	locked := testSimpleWriteLock(t, 5*time.Second)
+
+	expected := true
+	if locked != expected {
+		t.Errorf("TestSimpleWriteLockAcquired(): \nexpected %#v\ngot      %#v", expected, locked)
+	}
+}
+
+func TestSimpleWriteLockTimedOut(t *testing.T) {
+	locked := testSimpleWriteLock(t, time.Second)
+
+	expected := false
+	if locked != expected {
+		t.Errorf("TestSimpleWriteLockTimedOut(): \nexpected %#v\ngot      %#v", expected, locked)
+	}
+}
+
+func testDualWriteLock(t *testing.T, duration time.Duration) (locked bool) {
+
+	lrwm := NewLRWMutex(context.Background())
+
+	// fmt.Println("Getting initial write lock")
+	if !lrwm.GetLock("", "", time.Second) {
+		panic("Failed to acquire initial write lock")
+	}
+
+	go func() {
+		time.Sleep(2 * time.Second)
+		lrwm.Unlock()
+		// fmt.Println("Initial write lock released, waiting...")
+	}()
+
+	// fmt.Println("Trying to acquire 2nd write lock, waiting...")
+	locked = lrwm.GetLock("", "", duration)
+	if locked {
+		// fmt.Println("2nd write lock acquired, waiting...")
+		time.Sleep(time.Second)
+
+		lrwm.Unlock()
+	} else {
+		// fmt.Println("2nd write lock failed due to timeout")
+	}
+	return
+}
+
+func TestDualWriteLockAcquired(t *testing.T) {
+	locked := testDualWriteLock(t, 3*time.Second)
+
+	expected := true
+	if locked != expected {
+		t.Errorf("TestDualWriteLockAcquired(): \nexpected %#v\ngot      %#v", expected, locked)
+	}
+
+}
+
+func TestDualWriteLockTimedOut(t *testing.T) {
+	locked := testDualWriteLock(t, time.Second)
+
+	expected := false
+	if locked != expected {
+		t.Errorf("TestDualWriteLockTimedOut(): \nexpected %#v\ngot      %#v", expected, locked)
+	}
+
+}
+
+// Test cases below are copied 1 to 1 from sync/rwmutex_test.go (adapted to use LRWMutex)
+
+// Borrowed from rwmutex_test.go
+func parallelReader(m *LRWMutex, clocked, cunlock, cdone chan bool) {
+	if m.GetRLock("", "", time.Second) {
+		clocked <- true
+		<-cunlock
+		m.RUnlock()
+		cdone <- true
+	}
+}
+
+// Borrowed from rwmutex_test.go
+func doTestParallelReaders(numReaders, gomaxprocs int) {
+	runtime.GOMAXPROCS(gomaxprocs)
+	m := NewLRWMutex(context.Background())
+
+	clocked := make(chan bool)
+	cunlock := make(chan bool)
+	cdone := make(chan bool)
+	for i := 0; i < numReaders; i++ {
+		go parallelReader(m, clocked, cunlock, cdone)
+	}
+	// Wait for all parallel RLock()s to succeed.
+	for i := 0; i < numReaders; i++ {
+		<-clocked
+	}
+	for i := 0; i < numReaders; i++ {
+		cunlock <- true
+	}
+	// Wait for the goroutines to finish.
+	for i := 0; i < numReaders; i++ {
+		<-cdone
+	}
+}
+
+// Borrowed from rwmutex_test.go
+func TestParallelReaders(t *testing.T) {
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1))
+	doTestParallelReaders(1, 4)
+	doTestParallelReaders(3, 4)
+	doTestParallelReaders(4, 2)
+}
+
+// Borrowed from rwmutex_test.go
+func reader(rwm *LRWMutex, numIterations int, activity *int32, cdone chan bool) {
+	for i := 0; i < numIterations; i++ {
+		if rwm.GetRLock("", "", time.Second) {
+			n := atomic.AddInt32(activity, 1)
+			if n < 1 || n >= 10000 {
+				panic(fmt.Sprintf("wlock(%d)\n", n))
+			}
+			for i := 0; i < 100; i++ {
+			}
+			atomic.AddInt32(activity, -1)
+			rwm.RUnlock()
+		}
+	}
+	cdone <- true
+}
+
+// Borrowed from rwmutex_test.go
+func writer(rwm *LRWMutex, numIterations int, activity *int32, cdone chan bool) {
+	for i := 0; i < numIterations; i++ {
+		if rwm.GetLock("", "", time.Second) {
+			n := atomic.AddInt32(activity, 10000)
+			if n != 10000 {
+				panic(fmt.Sprintf("wlock(%d)\n", n))
+			}
+			for i := 0; i < 100; i++ {
+			}
+			atomic.AddInt32(activity, -10000)
+			rwm.Unlock()
+		}
+	}
+	cdone <- true
+}
+
+// Borrowed from rwmutex_test.go
+func HammerRWMutex(gomaxprocs, numReaders, numIterations int) {
+	runtime.GOMAXPROCS(gomaxprocs)
+	// Number of active readers + 10000 * number of active writers.
+	var activity int32
+	rwm := NewLRWMutex(context.Background())
+	cdone := make(chan bool)
+	go writer(rwm, numIterations, &activity, cdone)
+	var i int
+	for i = 0; i < numReaders/2; i++ {
+		go reader(rwm, numIterations, &activity, cdone)
+	}
+	go writer(rwm, numIterations, &activity, cdone)
+	for ; i < numReaders; i++ {
+		go reader(rwm, numIterations, &activity, cdone)
+	}
+	// Wait for the 2 writers and all readers to finish.
+	for i := 0; i < 2+numReaders; i++ {
+		<-cdone
+	}
+}
+
+// Borrowed from rwmutex_test.go
+func TestRWMutex(t *testing.T) {
+	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(-1))
+	n := 1000
+	if testing.Short() {
+		n = 5
+	}
+	HammerRWMutex(1, 1, n)
+	HammerRWMutex(1, 3, n)
+	HammerRWMutex(1, 10, n)
+	HammerRWMutex(4, 1, n)
+	HammerRWMutex(4, 3, n)
+	HammerRWMutex(4, 10, n)
+	HammerRWMutex(10, 1, n)
+	HammerRWMutex(10, 3, n)
+	HammerRWMutex(10, 10, n)
+	HammerRWMutex(10, 5, n)
+}
+
+// Borrowed from rwmutex_test.go
+func TestDRLocker(t *testing.T) {
+	wl := NewLRWMutex(context.Background())
+	var rl sync.Locker
+	wlocked := make(chan bool, 1)
+	rlocked := make(chan bool, 1)
+	rl = wl.DRLocker()
+	n := 10
+	go func() {
+		for i := 0; i < n; i++ {
+			rl.Lock()
+			rl.Lock()
+			rlocked <- true
+			wl.Lock()
+			wlocked <- true
+		}
+	}()
+	for i := 0; i < n; i++ {
+		<-rlocked
+		rl.Unlock()
+		select {
+		case <-wlocked:
+			t.Fatal("RLocker() didn't read-lock it")
+		default:
+		}
+		rl.Unlock()
+		<-wlocked
+		select {
+		case <-rlocked:
+			t.Fatal("RLocker() didn't respect the write lock")
+		default:
+		}
+		wl.Unlock()
+	}
+}
+
+// Borrowed from rwmutex_test.go
+func TestUnlockPanic(t *testing.T) {
+	defer func() {
+		if recover() == nil {
+			t.Fatalf("unlock of unlocked RWMutex did not panic")
+		}
+	}()
+	mu := NewLRWMutex(context.Background())
+	mu.Unlock()
+}
+
+// Borrowed from rwmutex_test.go
+func TestUnlockPanic2(t *testing.T) {
+	defer func() {
+		if recover() == nil {
+			t.Fatalf("unlock of unlocked RWMutex did not panic")
+		}
+	}()
+	mu := NewLRWMutex(context.Background())
+	mu.RLock()
+	mu.Unlock()
+}
+
+// Borrowed from rwmutex_test.go
+func TestRUnlockPanic(t *testing.T) {
+	defer func() {
+		if recover() == nil {
+			t.Fatalf("read unlock of unlocked RWMutex did not panic")
+		}
+	}()
+	mu := NewLRWMutex(context.Background())
+	mu.RUnlock()
+}
+
+// Borrowed from rwmutex_test.go
+func TestRUnlockPanic2(t *testing.T) {
+	defer func() {
+		if recover() == nil {
+			t.Fatalf("read unlock of unlocked RWMutex did not panic")
+		}
+	}()
+	mu := NewLRWMutex(context.Background())
+	mu.Lock()
+	mu.RUnlock()
+}
--- a/pkg/retry/retry.go
+++ b/pkg/retry/retry.go
@ -1,5 +1,5 @@
 /*
- * MinIO Cloud Storage, (C) 2016, 2017 MinIO, Inc.
+ * Minio Cloud Storage, (C) 2020 MinIO, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -14,38 +14,15 @@
 * limitations under the License.
 */

-package cmd
+package retry

 import (
 	"context"
+	"math"
 	"math/rand"
-	"sync"
 	"time"
 )

-// lockedRandSource provides protected rand source, implements rand.Source interface.
-type lockedRandSource struct {
-	lk  sync.Mutex
-	src rand.Source
-}
-
-// Int63 returns a non-negative pseudo-random 63-bit integer as an
-// int64.
-func (r *lockedRandSource) Int63() (n int64) {
-	r.lk.Lock()
-	n = r.src.Int63()
-	r.lk.Unlock()
-	return
-}
-
-// Seed uses the provided seed value to initialize the generator to a
-// deterministic state.
-func (r *lockedRandSource) Seed(seed int64) {
-	r.lk.Lock()
-	r.src.Seed(seed)
-	r.lk.Unlock()
-}
-
 // MaxJitter will randomize over the full exponential backoff time
 const MaxJitter = 1.0

@ -53,31 +30,48 @@ const MaxJitter = 1.0
 // exponential backoff time
 const NoJitter = 0.0

-// Global random source for fetching random values.
-var globalRandomSource = rand.New(&lockedRandSource{
-	src: rand.NewSource(UTCNow().UnixNano()),
-})
+// defaultTimer implements Timer interface using time.Timer
+type defaultTimer struct {
+	timer *time.Timer
+}

-// newRetryTimerJitter creates a timer with exponentially increasing delays
+// C returns the timers channel which receives the current time when the timer fires.
+func (t *defaultTimer) C() <-chan time.Time {
+	return t.timer.C
+}
+
+// Start starts the timer to fire after the given duration
+// don't use this code concurrently.
+func (t *defaultTimer) Start(duration time.Duration) {
+	if t.timer == nil {
+		t.timer = time.NewTimer(duration)
+	} else {
+		t.timer.Reset(duration)
+	}
+}
+
+// Stop is called when the timer is not used anymore and resources may be freed.
+func (t *defaultTimer) Stop() {
+	if t.timer != nil {
+		t.timer.Stop()
+	}
+}
+
+// NewTimerWithJitter creates a timer with exponentially increasing delays
 // until the maximum retry attempts are reached. - this function is a fully
 // configurable version, meant for only advanced use cases. For the most part
 // one should use newRetryTimerSimple and newRetryTimer.
-func newRetryTimerWithJitter(ctx context.Context, unit time.Duration, cap time.Duration, jitter float64) <-chan int {
+func NewTimerWithJitter(ctx context.Context, unit time.Duration, cap time.Duration, jitter float64) <-chan int {
 	attemptCh := make(chan int)

 	// normalize jitter to the range [0, 1.0]
-	if jitter < NoJitter {
-		jitter = NoJitter
-	}
-	if jitter > MaxJitter {
-		jitter = MaxJitter
-	}
+	jitter = math.Max(NoJitter, math.Min(MaxJitter, jitter))

 	// computes the exponential backoff duration according to
 	// https://www.awsarchitectureblog.com/2015/03/backoff.html
 	exponentialBackoffWait := func(attempt int) time.Duration {
 		// 1<<uint(attempt) below could overflow, so limit the value of attempt
-		maxAttempt := 30
+		const maxAttempt = 30
 		if attempt > maxAttempt {
 			attempt = maxAttempt
 		}
@ -87,34 +81,37 @@ func newRetryTimerWithJitter(ctx context.Context, unit time.Duration, cap time.D
 			sleep = cap
 		}
 		if jitter != NoJitter {
-			sleep -= time.Duration(globalRandomSource.Float64() * float64(sleep) * jitter)
+			sleep -= time.Duration(rand.Float64() * float64(sleep) * jitter)
 		}
 		return sleep
 	}

 	go func() {
-		defer close(attemptCh)
 		nextBackoff := 0
+		t := &defaultTimer{}
+
+		defer func() {
+			t.Stop()
+		}()
+
+		defer close(attemptCh)
+
 		// Channel used to signal after the expiry of backoff wait seconds.
-		var timer *time.Timer
 		for {
-			select { // Attempts starts.
+			select {
 			case attemptCh <- nextBackoff:
 				nextBackoff++
 			case <-ctx.Done():
-				// Stop the routine.
-				return
-			}
-			timer = time.NewTimer(exponentialBackoffWait(nextBackoff))
-			// wait till next backoff time or till doneCh gets a message.
-			select {
-			case <-timer.C:
-			case <-ctx.Done():
-				// stop the timer and return.
-				timer.Stop()
 				return
 			}

+			t.Start(exponentialBackoffWait(nextBackoff))
+
+			select {
+			case <-ctx.Done():
+				return
+			case <-t.C():
+			}
 		}
 	}()

@ -124,13 +121,13 @@ func newRetryTimerWithJitter(ctx context.Context, unit time.Duration, cap time.D

 // Default retry constants.
 const (
-	defaultRetryUnit = time.Second      // 1 second.
-	defaultRetryCap  = 30 * time.Second // 30 seconds.
+	defaultRetryUnit = 50 * time.Millisecond  // 50 millisecond.
+	defaultRetryCap  = 500 * time.Millisecond // 500 millisecond.
 )

-// newRetryTimerSimple creates a timer with exponentially increasing delays
+// NewTimer creates a timer with exponentially increasing delays
 // until the maximum retry attempts are reached. - this function is a
 // simpler version with all default values.
-func newRetryTimerSimple(ctx context.Context) <-chan int {
-	return newRetryTimerWithJitter(ctx, defaultRetryUnit, defaultRetryCap, MaxJitter)
+func NewTimer(ctx context.Context) <-chan int {
+	return NewTimerWithJitter(ctx, defaultRetryUnit, defaultRetryCap, MaxJitter)
 }
--- a/pkg/retry/retry_test.go
+++ b/pkg/retry/retry_test.go
@ -1,5 +1,5 @@
 /*
- * Minio Cloud Storage, (C) 2016-2020 Minio, Inc.
+ * Minio Cloud Storage, (C) 2020 Minio, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -14,7 +14,7 @@
 * limitations under the License.
 */

-package cmd
+package retry

 import (
 	"context"
@ -24,8 +24,8 @@ import (

 // Tests for retry timer.
 func TestRetryTimerSimple(t *testing.T) {
-	rctx, cancel := context.WithCancel(context.Background())
-	attemptCh := newRetryTimerSimple(rctx)
+	retryCtx, cancel := context.WithCancel(context.Background())
+	attemptCh := NewTimer(retryCtx)
 	i := <-attemptCh
 	if i != 0 {
 		cancel()
@ -45,10 +45,11 @@ func TestRetryTimerSimple(t *testing.T) {

 // Test retry time with no jitter.
 func TestRetryTimerWithNoJitter(t *testing.T) {
-	rctx, cancel := context.WithCancel(context.Background())
+	retryCtx, cancel := context.WithCancel(context.Background())
+	defer cancel()

 	// No jitter
-	attemptCh := newRetryTimerWithJitter(rctx, time.Millisecond, 5*time.Millisecond, NoJitter)
+	attemptCh := NewTimerWithJitter(retryCtx, time.Millisecond, 5*time.Millisecond, NoJitter)
 	i := <-attemptCh
 	if i != 0 {
 		cancel()
@ -57,11 +58,9 @@ func TestRetryTimerWithNoJitter(t *testing.T) {
 	// Loop through the maximum possible attempt.
 	for i = range attemptCh {
 		if i == 30 {
-			break
+			cancel()
 		}
 	}
-
-	cancel()
 	_, ok := <-attemptCh
 	if ok {
 		t.Fatal("Attempt counter should be closed")
@ -70,10 +69,9 @@ func TestRetryTimerWithNoJitter(t *testing.T) {

 // Test retry time with Jitter greater than MaxJitter.
 func TestRetryTimerWithJitter(t *testing.T) {
-	rctx, cancel := context.WithCancel(context.Background())
-
+	retryCtx, cancel := context.WithCancel(context.Background())
 	// Jitter will be set back to 1.0
-	attemptCh := newRetryTimerWithJitter(rctx, time.Second, 30*time.Second, 2.0)
+	attemptCh := NewTimerWithJitter(retryCtx, time.Second, 30*time.Second, 2.0)
 	i := <-attemptCh
 	if i != 0 {
 		cancel()