Do not block on distributed unlocks (#19952)

* Prevents blocking when losing quorum (standard on cluster restarts).
* Time out to prevent endless buildup. Timed-out remote locks will be canceled because they miss the refresh anyway.
* Reduces latency for all calls since the wall time for the roundtrip to remotes no longer adds to the requests.
This commit is contained in:
Klaus Post 2024-06-19 07:35:19 -07:00 committed by GitHub
parent 69e41f87ef
commit a6ffdf1dd4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 26 additions and 7 deletions

View File

@ -639,9 +639,17 @@ func (dm *DRWMutex) Unlock(ctx context.Context) {
tolerance := len(restClnts) / 2
isReadLock := false
for !releaseAll(ctx, dm.clnt, tolerance, owner, &locks, isReadLock, restClnts, dm.Names...) {
time.Sleep(time.Duration(dm.rng.Float64() * float64(dm.lockRetryMinInterval)))
}
started := time.Now()
// Do async unlocking.
// This means unlock will no longer block on the network or missing quorum.
go func() {
for !releaseAll(ctx, dm.clnt, tolerance, owner, &locks, isReadLock, restClnts, dm.Names...) {
time.Sleep(time.Duration(dm.rng.Float64() * float64(dm.lockRetryMinInterval)))
if time.Since(started) > dm.clnt.Timeouts.UnlockCall {
return
}
}
}()
}
// RUnlock releases a read lock held on dm.
@ -678,11 +686,20 @@ func (dm *DRWMutex) RUnlock(ctx context.Context) {
// Tolerance is not set, defaults to half of the locker clients.
tolerance := len(restClnts) / 2
isReadLock := true
for !releaseAll(ctx, dm.clnt, tolerance, owner, &locks, isReadLock, restClnts, dm.Names...) {
time.Sleep(time.Duration(dm.rng.Float64() * float64(dm.lockRetryMinInterval)))
}
started := time.Now()
// Do async unlocking.
// This means unlock will no longer block on the network or missing quorum.
go func() {
for !releaseAll(ctx, dm.clnt, tolerance, owner, &locks, isReadLock, restClnts, dm.Names...) {
time.Sleep(time.Duration(dm.rng.Float64() * float64(dm.lockRetryMinInterval)))
// If we have been waiting for more than the force unlock timeout, return
// Remotes will have canceled due to the missing refreshes anyway.
if time.Since(started) > dm.clnt.Timeouts.UnlockCall {
return
}
}
}()
}
// sendRelease sends a release message to a node that previously granted a lock

View File

@ -293,6 +293,8 @@ func TestUnlockShouldNotTimeout(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
defer cancel()
dm.Unlock(ctx)
// Unlock is not blocking. Try to get a new lock.
dm.GetLock(ctx, nil, id, source, Options{Timeout: 5 * time.Minute})
unlockReturned <- struct{}{}
}()