locks: Ensure local lock removal after a failed refresh (#12979) (#13183)

In the event when a lock is not refreshed in the cluster, this latter
will be automatically removed in the subsequent cleanup of non
refreshed locks routine, but it forgot to clean the local server,
hence having the same weird stale locks present.

This commit will remove the lock locally also in remote nodes, if
removing a lock from a remote node will fail, it will be anyway
removed later in the locks cleanup routine.
This commit is contained in:
Anis Elleuch 2021-09-10 16:53:46 +01:00 committed by Minio Trusted
parent a700415c9c
commit 287829c4a8
2 changed files with 43 additions and 5 deletions

View File

@ -222,13 +222,24 @@ func (l *localLocker) ForceUnlock(ctx context.Context, args dsync.LockArgs) (rep
default: default:
l.mutex.Lock() l.mutex.Lock()
defer l.mutex.Unlock() defer l.mutex.Unlock()
if len(args.UID) != 0 { if len(args.UID) == 0 {
return false, fmt.Errorf("ForceUnlock called with non-empty UID: %s", args.UID) for _, resource := range args.Resources {
delete(l.lockMap, resource) // Remove the lock (irrespective of write or read lock)
}
return true, nil
} }
for _, resource := range args.Resources {
delete(l.lockMap, resource) // Remove the lock (irrespective of write or read lock) lockUIDFound := false
for resource, lris := range l.lockMap {
for _, lri := range lris {
if lri.UID == args.UID {
l.removeEntry(resource, dsync.LockArgs{Owner: lri.Owner, UID: lri.UID}, &lris)
lockUIDFound = true
}
}
} }
return true, nil return lockUIDFound, nil
} }
} }

View File

@ -50,6 +50,9 @@ const drwMutexUnlockCallTimeout = 30 * time.Second
// dRWMutexRefreshTimeout - timeout for the refresh call // dRWMutexRefreshTimeout - timeout for the refresh call
const drwMutexRefreshTimeout = 5 * time.Second const drwMutexRefreshTimeout = 5 * time.Second
// dRWMutexForceUnlockTimeout - timeout for the unlock call
const drwMutexForceUnlockCallTimeout = 30 * time.Second
// dRWMutexRefreshInterval - the interval between two refresh calls // dRWMutexRefreshInterval - the interval between two refresh calls
const drwMutexRefreshInterval = 10 * time.Second const drwMutexRefreshInterval = 10 * time.Second
@ -239,6 +242,9 @@ func (dm *DRWMutex) startContinousLockRefresh(lockLossCallback func(), id, sourc
refreshTimer.Reset(drwMutexRefreshInterval) refreshTimer.Reset(drwMutexRefreshInterval)
refreshed, err := refresh(ctx, dm.clnt, id, source, quorum, dm.Names...) refreshed, err := refresh(ctx, dm.clnt, id, source, quorum, dm.Names...)
if err == nil && !refreshed { if err == nil && !refreshed {
// Clean the lock locally and in remote nodes
forceUnlock(ctx, dm.clnt, id)
// Execute the caller lock loss callback
if lockLossCallback != nil { if lockLossCallback != nil {
lockLossCallback() lockLossCallback()
} }
@ -249,6 +255,27 @@ func (dm *DRWMutex) startContinousLockRefresh(lockLossCallback func(), id, sourc
}() }()
} }
func forceUnlock(ctx context.Context, ds *Dsync, id string) {
ctx, cancel := context.WithTimeout(ctx, drwMutexForceUnlockCallTimeout)
defer cancel()
restClnts, _ := ds.GetLockers()
var wg sync.WaitGroup
for index, c := range restClnts {
wg.Add(1)
// Send refresh request to all nodes
go func(index int, c NetLocker) {
defer wg.Done()
args := LockArgs{
UID: id,
}
c.ForceUnlock(ctx, args)
}(index, c)
}
wg.Wait()
}
type refreshResult struct { type refreshResult struct {
offline bool offline bool
succeeded bool succeeded bool