fix: avoid timer leaks in dsync/lsync (#9781)

At a customer setup with lots of concurrent calls
it can be observed that in newRetryTimer there
were lots of tiny alloations which are not
relinquished upon retries, in this codepath
we were only interested in re-using the timer
and use it wisely for each locker.

```
(pprof) top
Showing nodes accounting for 8.68TB, 97.02% of 8.95TB total
Dropped 1198 nodes (cum <= 0.04TB)
Showing top 10 nodes out of 79
      flat  flat%   sum%        cum   cum%
    5.95TB 66.50% 66.50%     5.95TB 66.50%  time.NewTimer
    1.16TB 13.02% 79.51%     1.16TB 13.02%  github.com/ncw/directio.AlignedBlock
    0.67TB  7.53% 87.04%     0.70TB  7.78%  github.com/minio/minio/cmd.xlObjects.putObject
    0.21TB  2.36% 89.40%     0.21TB  2.36%  github.com/minio/minio/cmd.(*posix).Walk
    0.19TB  2.08% 91.49%     0.27TB  2.99%  os.statNolog
    0.14TB  1.59% 93.08%     0.14TB  1.60%  os.(*File).readdirnames
    0.10TB  1.09% 94.17%     0.11TB  1.25%  github.com/minio/minio/cmd.readDirN
    0.10TB  1.07% 95.23%     0.10TB  1.07%  syscall.ByteSliceFromString
    0.09TB  1.03% 96.27%     0.09TB  1.03%  strings.(*Builder).grow
    0.07TB  0.75% 97.02%     0.07TB  0.75%  path.(*lazybuf).append
```
This commit is contained in:
Harshavardhana
2020-06-08 11:28:40 -07:00
committed by GitHub
parent 2ce2e88adf
commit febe9cc26a
16 changed files with 783 additions and 675 deletions

View File

@@ -24,6 +24,8 @@ import (
"os"
"sync"
"time"
"github.com/minio/minio/pkg/retry"
)
// Indicator if logging is enabled.
@@ -128,49 +130,41 @@ func (dm *DRWMutex) GetRLock(id, source string, timeout time.Duration) (locked b
// algorithm until either the lock is acquired successfully or more
// time has elapsed than the timeout value.
func (dm *DRWMutex) lockBlocking(timeout time.Duration, id, source string, isReadLock bool) (locked bool) {
start := time.Now().UTC()
restClnts := dm.clnt.GetLockersFn()
retryCtx, cancel := context.WithCancel(dm.ctx)
retryCtx, cancel := context.WithTimeout(dm.ctx, timeout)
defer cancel()
// Use incremental back-off algorithm for repeated attempts to acquire the lock
for range newRetryTimerSimple(retryCtx) {
select {
case <-dm.ctx.Done():
return
default:
}
for range retry.NewTimer(retryCtx) {
// Create temp array on stack.
locks := make([]string, len(restClnts))
// Try to acquire the lock.
success := lock(dm.clnt, &locks, id, source, isReadLock, dm.Names...)
if success {
dm.m.Lock()
// If success, copy array to object
if isReadLock {
// Append new array of strings at the end
dm.readersLocks = append(dm.readersLocks, make([]string, len(restClnts)))
// and copy stack array into last spot
copy(dm.readersLocks[len(dm.readersLocks)-1], locks[:])
} else {
copy(dm.writeLocks, locks[:])
}
dm.m.Unlock()
return true
if !success {
continue
}
if time.Now().UTC().Sub(start) >= timeout { // Are we past the timeout?
break
dm.m.Lock()
// If success, copy array to object
if isReadLock {
// Append new array of strings at the end
dm.readersLocks = append(dm.readersLocks, make([]string, len(restClnts)))
// and copy stack array into last spot
copy(dm.readersLocks[len(dm.readersLocks)-1], locks[:])
} else {
copy(dm.writeLocks, locks[:])
}
// Failed to acquire the lock on this attempt, incrementally wait
// for a longer back-off time and try again afterwards.
dm.m.Unlock()
return true
}
// Failed to acquire the lock on this attempt, incrementally wait
// for a longer back-off time and try again afterwards.
return false
}
@@ -233,7 +227,7 @@ func lock(ds *Dsync, locks *[]string, id, source string, isReadLock bool, lockNa
//
// a) received all lock responses
// b) received too many 'non-'locks for quorum to be still possible
// c) time out
// c) timedout
//
i, locksFailed := 0, 0
done := false