change default lock retry interval to 50ms (#15560)

competing calls on the same object on versioned bucket
mutating calls on the same object may unexpected have
higher delays.

This can be reproduced with a replicated bucket
overwriting the same object writes, deletes repeatedly.

For longer locks like scanner keep the 1sec interval
This commit is contained in:
Harshavardhana 2022-08-19 16:21:05 -07:00 committed by GitHub
parent a2e037f0ec
commit ae4ee95d25
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 49 additions and 18 deletions

View File

@ -1059,13 +1059,15 @@ func replicateObjectToTarget(ctx context.Context, ri ReplicateObjectInfo, object
VersionSuspended: versionSuspended, VersionSuspended: versionSuspended,
}) })
if err != nil { if err != nil {
sendEvent(eventArgs{ if !isErrObjectNotFound(err) {
EventName: event.ObjectReplicationNotTracked, sendEvent(eventArgs{
BucketName: bucket, EventName: event.ObjectReplicationNotTracked,
Object: objInfo, BucketName: bucket,
Host: "Internal: [Replication]", Object: objInfo,
}) Host: "Internal: [Replication]",
logger.LogIf(ctx, fmt.Errorf("Unable to update replicate for %s/%s(%s): %w", bucket, object, objInfo.VersionID, err)) })
logger.LogIf(ctx, fmt.Errorf("Unable to update replicate metadata for %s/%s(%s): %w", bucket, object, objInfo.VersionID, err))
}
return return
} }
defer func() { defer func() {

View File

@ -64,7 +64,11 @@ const (
var ( var (
globalHealConfig heal.Config globalHealConfig heal.Config
dataScannerLeaderLockTimeout = newDynamicTimeout(30*time.Second, 10*time.Second) dataScannerLeaderLockTimeout = newDynamicTimeoutWithOpts(dynamicTimeoutOpts{
timeout: 30 * time.Second,
minimum: 10 * time.Second,
retryInterval: time.Second,
})
// Sleeper values are updated when config is loaded. // Sleeper values are updated when config is loaded.
scannerSleeper = newDynamicSleeper(10, 10*time.Second, true) scannerSleeper = newDynamicSleeper(10, 10*time.Second, true)
scannerCycle = uatomic.NewDuration(dataScannerStartDelay) scannerCycle = uatomic.NewDuration(dataScannerStartDelay)

View File

@ -34,11 +34,24 @@ const (
// timeouts that are dynamically adapted based on actual usage results // timeouts that are dynamically adapted based on actual usage results
type dynamicTimeout struct { type dynamicTimeout struct {
timeout int64 timeout int64
minimum int64 minimum int64
entries int64 entries int64
log [dynamicTimeoutLogSize]time.Duration log [dynamicTimeoutLogSize]time.Duration
mutex sync.Mutex mutex sync.Mutex
retryInterval time.Duration
}
type dynamicTimeoutOpts struct {
timeout time.Duration
minimum time.Duration
retryInterval time.Duration
}
func newDynamicTimeoutWithOpts(opts dynamicTimeoutOpts) *dynamicTimeout {
dt := newDynamicTimeout(opts.timeout, opts.minimum)
dt.retryInterval = opts.retryInterval
return dt
} }
// newDynamicTimeout returns a new dynamic timeout initialized with timeout value // newDynamicTimeout returns a new dynamic timeout initialized with timeout value
@ -57,6 +70,10 @@ func (dt *dynamicTimeout) Timeout() time.Duration {
return time.Duration(atomic.LoadInt64(&dt.timeout)) return time.Duration(atomic.LoadInt64(&dt.timeout))
} }
func (dt *dynamicTimeout) RetryInterval() time.Duration {
return dt.retryInterval
}
// LogSuccess logs the duration of a successful action that // LogSuccess logs the duration of a successful action that
// did not hit the timeout // did not hit the timeout
func (dt *dynamicTimeout) LogSuccess(duration time.Duration) { func (dt *dynamicTimeout) LogSuccess(duration time.Duration) {

View File

@ -166,7 +166,8 @@ func (di *distLockInstance) GetLock(ctx context.Context, timeout *dynamicTimeout
newCtx, cancel := context.WithCancel(ctx) newCtx, cancel := context.WithCancel(ctx)
if !di.rwMutex.GetLock(newCtx, cancel, di.opsID, lockSource, dsync.Options{ if !di.rwMutex.GetLock(newCtx, cancel, di.opsID, lockSource, dsync.Options{
Timeout: timeout.Timeout(), Timeout: timeout.Timeout(),
RetryInterval: timeout.RetryInterval(),
}) { }) {
timeout.LogFailure() timeout.LogFailure()
cancel() cancel()
@ -195,7 +196,8 @@ func (di *distLockInstance) GetRLock(ctx context.Context, timeout *dynamicTimeou
newCtx, cancel := context.WithCancel(ctx) newCtx, cancel := context.WithCancel(ctx)
if !di.rwMutex.GetRLock(ctx, cancel, di.opsID, lockSource, dsync.Options{ if !di.rwMutex.GetRLock(ctx, cancel, di.opsID, lockSource, dsync.Options{
Timeout: timeout.Timeout(), Timeout: timeout.Timeout(),
RetryInterval: timeout.RetryInterval(),
}) { }) {
timeout.LogFailure() timeout.LogFailure()
cancel() cancel()

View File

@ -59,7 +59,8 @@ const (
// dRWMutexRefreshInterval - default the interval between two refresh calls // dRWMutexRefreshInterval - default the interval between two refresh calls
drwMutexRefreshInterval = 10 * time.Second drwMutexRefreshInterval = 10 * time.Second
lockRetryInterval = 1 * time.Second // maximum time to sleep before retrying a failed blocking lock()
lockRetryInterval = 50 * time.Millisecond
drwMutexInfinite = 1<<63 - 1 drwMutexInfinite = 1<<63 - 1
) )
@ -142,7 +143,8 @@ func (dm *DRWMutex) Lock(id, source string) {
// Options lock options. // Options lock options.
type Options struct { type Options struct {
Timeout time.Duration Timeout time.Duration
RetryInterval time.Duration
} }
// GetLock tries to get a write lock on dm before the timeout elapses. // GetLock tries to get a write lock on dm before the timeout elapses.
@ -236,7 +238,11 @@ func (dm *DRWMutex) lockBlocking(ctx context.Context, lockLossCallback func(), i
return locked return locked
} }
time.Sleep(time.Duration(dm.rng.Float64() * float64(dm.lockRetryInterval))) lockRetryInterval := dm.lockRetryInterval
if opts.RetryInterval > 0 {
lockRetryInterval = opts.RetryInterval
}
time.Sleep(time.Duration(dm.rng.Float64() * float64(lockRetryInterval)))
} }
} }
} }