From 06b71c99eed34fd360eedef46585414c39fdb113 Mon Sep 17 00:00:00 2001 From: Anis Elleuch Date: Fri, 27 Aug 2021 16:59:36 +0100 Subject: [PATCH] locks: Ensure local lock removal after a failed refresh (#12979) In the event when a lock is not refreshed in the cluster, this latter will be automatically removed in the subsequent cleanup of non refreshed locks routine, but it forgot to clean the local server, hence having the same weird stale locks present. This commit will remove the lock locally also in remote nodes, if removing a lock from a remote node will fail, it will be anyway removed later in the locks cleanup routine. --- cmd/local-locker.go | 19 ++++++++++++++----- internal/dsync/drwmutex.go | 27 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/cmd/local-locker.go b/cmd/local-locker.go index 8f081ab46..5dd951fbc 100644 --- a/cmd/local-locker.go +++ b/cmd/local-locker.go @@ -227,13 +227,22 @@ func (l *localLocker) ForceUnlock(ctx context.Context, args dsync.LockArgs) (rep default: l.mutex.Lock() defer l.mutex.Unlock() - if len(args.UID) != 0 { - return false, fmt.Errorf("ForceUnlock called with non-empty UID: %s", args.UID) + if len(args.UID) == 0 { + for _, resource := range args.Resources { + delete(l.lockMap, resource) // Remove the lock (irrespective of write or read lock) + } + return true, nil } - for _, resource := range args.Resources { - delete(l.lockMap, resource) // Remove the lock (irrespective of write or read lock) + + for _, lris := range l.lockMap { + for _, lri := range lris { + if lri.UID == args.UID { + l.removeEntry(lri.Name, dsync.LockArgs{Owner: lri.Owner, UID: lri.UID}, &lris) + return true, nil + } + } } - return true, nil + return false, nil } } diff --git a/internal/dsync/drwmutex.go b/internal/dsync/drwmutex.go index 730944cc5..fceffc3b5 100644 --- a/internal/dsync/drwmutex.go +++ b/internal/dsync/drwmutex.go @@ -51,6 +51,9 @@ const drwMutexRefreshCallTimeout = 5 * time.Second // dRWMutexUnlockTimeout - timeout for the unlock call const drwMutexUnlockCallTimeout = 30 * time.Second +// dRWMutexForceUnlockTimeout - timeout for the unlock call +const drwMutexForceUnlockCallTimeout = 30 * time.Second + // dRWMutexRefreshInterval - the interval between two refresh calls const drwMutexRefreshInterval = 10 * time.Second @@ -237,6 +240,9 @@ func (dm *DRWMutex) startContinousLockRefresh(lockLossCallback func(), id, sourc refreshed, err := refresh(ctx, dm.clnt, id, source, quorum, dm.Names...) if err == nil && !refreshed { + // Clean the lock locally and in remote nodes + forceUnlock(ctx, dm.clnt, id) + // Execute the caller lock loss callback if lockLossCallback != nil { lockLossCallback() } @@ -247,6 +253,27 @@ func (dm *DRWMutex) startContinousLockRefresh(lockLossCallback func(), id, sourc }() } +func forceUnlock(ctx context.Context, ds *Dsync, id string) { + ctx, cancel := context.WithTimeout(ctx, drwMutexForceUnlockCallTimeout) + defer cancel() + + restClnts, _ := ds.GetLockers() + + var wg sync.WaitGroup + for index, c := range restClnts { + wg.Add(1) + // Send refresh request to all nodes + go func(index int, c NetLocker) { + defer wg.Done() + args := LockArgs{ + UID: id, + } + c.ForceUnlock(ctx, args) + }(index, c) + } + wg.Wait() +} + type refreshResult struct { offline bool succeeded bool