From a6ffdf1dd450e1cb31c87eadf55f5d44ca303adc Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Wed, 19 Jun 2024 07:35:19 -0700 Subject: [PATCH] Do not block on distributed unlocks (#19952) * Prevents blocking when losing quorum (standard on cluster restarts). * Time out to prevent endless buildup. Timed-out remote locks will be canceled because they miss the refresh anyway. * Reduces latency for all calls since the wall time for the roundtrip to remotes no longer adds to the requests. --- internal/dsync/drwmutex.go | 31 ++++++++++++++++++++++++------- internal/dsync/dsync_test.go | 2 ++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/internal/dsync/drwmutex.go b/internal/dsync/drwmutex.go index be26ff50b..aa967ade1 100644 --- a/internal/dsync/drwmutex.go +++ b/internal/dsync/drwmutex.go @@ -639,9 +639,17 @@ func (dm *DRWMutex) Unlock(ctx context.Context) { tolerance := len(restClnts) / 2 isReadLock := false - for !releaseAll(ctx, dm.clnt, tolerance, owner, &locks, isReadLock, restClnts, dm.Names...) { - time.Sleep(time.Duration(dm.rng.Float64() * float64(dm.lockRetryMinInterval))) - } + started := time.Now() + // Do async unlocking. + // This means unlock will no longer block on the network or missing quorum. + go func() { + for !releaseAll(ctx, dm.clnt, tolerance, owner, &locks, isReadLock, restClnts, dm.Names...) { + time.Sleep(time.Duration(dm.rng.Float64() * float64(dm.lockRetryMinInterval))) + if time.Since(started) > dm.clnt.Timeouts.UnlockCall { + return + } + } + }() } // RUnlock releases a read lock held on dm. @@ -678,11 +686,20 @@ func (dm *DRWMutex) RUnlock(ctx context.Context) { // Tolerance is not set, defaults to half of the locker clients. tolerance := len(restClnts) / 2 - isReadLock := true - for !releaseAll(ctx, dm.clnt, tolerance, owner, &locks, isReadLock, restClnts, dm.Names...) { - time.Sleep(time.Duration(dm.rng.Float64() * float64(dm.lockRetryMinInterval))) - } + started := time.Now() + // Do async unlocking. + // This means unlock will no longer block on the network or missing quorum. + go func() { + for !releaseAll(ctx, dm.clnt, tolerance, owner, &locks, isReadLock, restClnts, dm.Names...) { + time.Sleep(time.Duration(dm.rng.Float64() * float64(dm.lockRetryMinInterval))) + // If we have been waiting for more than the force unlock timeout, return + // Remotes will have canceled due to the missing refreshes anyway. + if time.Since(started) > dm.clnt.Timeouts.UnlockCall { + return + } + } + }() } // sendRelease sends a release message to a node that previously granted a lock diff --git a/internal/dsync/dsync_test.go b/internal/dsync/dsync_test.go index b4556bbe7..f9517de6e 100644 --- a/internal/dsync/dsync_test.go +++ b/internal/dsync/dsync_test.go @@ -293,6 +293,8 @@ func TestUnlockShouldNotTimeout(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) defer cancel() dm.Unlock(ctx) + // Unlock is not blocking. Try to get a new lock. + dm.GetLock(ctx, nil, id, source, Options{Timeout: 5 * time.Minute}) unlockReturned <- struct{}{} }()