locking: Add Refresh for better locking cleanup (#11535)

Co-authored-by: Anis Elleuch <anis@min.io>
Co-authored-by: Harshavardhana <harsha@minio.io>
This commit is contained in:
Anis Elleuch
2021-03-04 03:36:43 +01:00
committed by GitHub
parent 464fa08f2e
commit 7be7109471
23 changed files with 597 additions and 313 deletions

View File

@@ -41,17 +41,25 @@ func log(format string, data ...interface{}) {
}
}
// DRWMutexAcquireTimeout - tolerance limit to wait for lock acquisition before.
const DRWMutexAcquireTimeout = 1 * time.Second // 1 second.
// dRWMutexAcquireTimeout - tolerance limit to wait for lock acquisition before.
const drwMutexAcquireTimeout = 1 * time.Second // 1 second.
// dRWMutexRefreshTimeout - timeout for the refresh call
const drwMutexRefreshTimeout = 5 * time.Second
// dRWMutexRefreshInterval - the interval between two refresh calls
const drwMutexRefreshInterval = 10 * time.Second
const drwMutexInfinite = 1<<63 - 1
// A DRWMutex is a distributed mutual exclusion lock.
type DRWMutex struct {
Names []string
writeLocks []string // Array of nodes that granted a write lock
readersLocks [][]string // Array of array of nodes that granted reader locks
m sync.Mutex // Mutex to prevent multiple simultaneous locks from this node
clnt *Dsync
Names []string
writeLocks []string // Array of nodes that granted a write lock
readersLocks [][]string // Array of array of nodes that granted reader locks
m sync.Mutex // Mutex to prevent multiple simultaneous locks from this node
clnt *Dsync
cancelRefresh context.CancelFunc
}
// Granted - represents a structure of a granted lock.
@@ -85,7 +93,7 @@ func NewDRWMutex(clnt *Dsync, names ...string) *DRWMutex {
func (dm *DRWMutex) Lock(id, source string) {
isReadLock := false
dm.lockBlocking(context.Background(), id, source, isReadLock, Options{
dm.lockBlocking(context.Background(), nil, id, source, isReadLock, Options{
Timeout: drwMutexInfinite,
})
}
@@ -100,10 +108,10 @@ type Options struct {
// If the lock is already in use, the calling go routine
// blocks until either the mutex becomes available and return success or
// more time has passed than the timeout value and return false.
func (dm *DRWMutex) GetLock(ctx context.Context, id, source string, opts Options) (locked bool) {
func (dm *DRWMutex) GetLock(ctx context.Context, cancel context.CancelFunc, id, source string, opts Options) (locked bool) {
isReadLock := false
return dm.lockBlocking(ctx, id, source, isReadLock, opts)
return dm.lockBlocking(ctx, cancel, id, source, isReadLock, opts)
}
// RLock holds a read lock on dm.
@@ -113,7 +121,7 @@ func (dm *DRWMutex) GetLock(ctx context.Context, id, source string, opts Options
func (dm *DRWMutex) RLock(id, source string) {
isReadLock := true
dm.lockBlocking(context.Background(), id, source, isReadLock, Options{
dm.lockBlocking(context.Background(), nil, id, source, isReadLock, Options{
Timeout: drwMutexInfinite,
})
}
@@ -124,10 +132,10 @@ func (dm *DRWMutex) RLock(id, source string) {
// Otherwise the calling go routine blocks until either the mutex becomes
// available and return success or more time has passed than the timeout
// value and return false.
func (dm *DRWMutex) GetRLock(ctx context.Context, id, source string, opts Options) (locked bool) {
func (dm *DRWMutex) GetRLock(ctx context.Context, cancel context.CancelFunc, id, source string, opts Options) (locked bool) {
isReadLock := true
return dm.lockBlocking(ctx, id, source, isReadLock, opts)
return dm.lockBlocking(ctx, cancel, id, source, isReadLock, opts)
}
const (
@@ -139,7 +147,7 @@ const (
// The function will loop using a built-in timing randomized back-off
// algorithm until either the lock is acquired successfully or more
// time has elapsed than the timeout value.
func (dm *DRWMutex) lockBlocking(ctx context.Context, id, source string, isReadLock bool, opts Options) (locked bool) {
func (dm *DRWMutex) lockBlocking(ctx context.Context, lockLossCallback func(), id, source string, isReadLock bool, opts Options) (locked bool) {
restClnts, _ := dm.clnt.GetLockers()
r := rand.New(rand.NewSource(time.Now().UnixNano()))
@@ -191,6 +199,10 @@ func (dm *DRWMutex) lockBlocking(ctx context.Context, id, source string, isReadL
dm.m.Unlock()
log("lockBlocking %s/%s for %#v: granted\n", id, source, dm.Names)
// Refresh lock continuously and cancel if there is no quorum in the lock anymore
dm.startContinousLockRefresh(lockLossCallback, id, source, quorum)
return locked
}
@@ -199,6 +211,132 @@ func (dm *DRWMutex) lockBlocking(ctx context.Context, id, source string, isReadL
}
}
func (dm *DRWMutex) startContinousLockRefresh(lockLossCallback func(), id, source string, quorum int) {
ctx, cancel := context.WithCancel(context.Background())
dm.m.Lock()
dm.cancelRefresh = cancel
dm.m.Unlock()
go func() {
defer cancel()
for {
select {
case <-ctx.Done():
return
case <-time.NewTimer(drwMutexRefreshInterval).C:
refreshed, err := refresh(ctx, dm.clnt, id, source, quorum, dm.Names...)
if err == nil && !refreshed {
if lockLossCallback != nil {
lockLossCallback()
}
return
}
}
}
}()
}
type refreshResult struct {
offline bool
succeeded bool
}
func refresh(ctx context.Context, ds *Dsync, id, source string, quorum int, lockNames ...string) (bool, error) {
restClnts, owner := ds.GetLockers()
// Create buffered channel of size equal to total number of nodes.
ch := make(chan refreshResult, len(restClnts))
var wg sync.WaitGroup
for index, c := range restClnts {
wg.Add(1)
// Send refresh request to all nodes
go func(index int, c NetLocker) {
defer wg.Done()
if c == nil {
ch <- refreshResult{offline: true}
return
}
args := LockArgs{
Owner: owner,
UID: id,
Resources: lockNames,
Source: source,
Quorum: quorum,
}
ctx, cancel := context.WithTimeout(ctx, drwMutexRefreshTimeout)
defer cancel()
refreshed, err := c.Refresh(ctx, args)
if refreshed && err == nil {
ch <- refreshResult{succeeded: true}
} else {
if err != nil {
ch <- refreshResult{offline: true}
log("dsync: Unable to call Refresh failed with %s for %#v at %s\n", err, args, c)
} else {
ch <- refreshResult{succeeded: false}
log("dsync: Refresh returned false for %#v at %s\n", args, c)
}
}
}(index, c)
}
// Wait until we have either
//
// a) received all refresh responses
// b) received too many refreshed for quorum to be still possible
// c) timed out
//
i, refreshFailed, refreshSucceeded := 0, 0, 0
done := false
for ; i < len(restClnts); i++ {
select {
case refresh := <-ch:
if refresh.offline {
continue
}
if refresh.succeeded {
refreshSucceeded++
} else {
refreshFailed++
}
if refreshFailed > quorum {
// We know that we are not going to succeed with refresh
done = true
}
case <-ctx.Done():
// Refreshing is canceled
return false, ctx.Err()
}
if done {
break
}
}
refreshQuorum := refreshSucceeded >= quorum
if !refreshQuorum {
refreshQuorum = refreshFailed < quorum
}
// We may have some unused results in ch, release them async.
go func() {
wg.Wait()
close(ch)
for range ch {
}
}()
return refreshQuorum, nil
}
// lock tries to acquire the distributed lock, returning true or false.
func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, isReadLock bool, tolerance, quorum int, lockNames ...string) bool {
for i := range *locks {
@@ -212,7 +350,7 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is
var wg sync.WaitGroup
// Combined timeout for the lock attempt.
ctx, cancel := context.WithTimeout(ctx, DRWMutexAcquireTimeout)
ctx, cancel := context.WithTimeout(ctx, drwMutexAcquireTimeout)
defer cancel()
for index, c := range restClnts {
wg.Add(1)
@@ -383,6 +521,9 @@ func releaseAll(ds *Dsync, tolerance int, owner string, locks *[]string, isReadL
//
// It is a run-time error if dm is not locked on entry to Unlock.
func (dm *DRWMutex) Unlock() {
dm.m.Lock()
dm.cancelRefresh()
dm.m.Unlock()
restClnts, owner := dm.clnt.GetLockers()
// create temp array on stack
@@ -422,6 +563,9 @@ func (dm *DRWMutex) Unlock() {
//
// It is a run-time error if dm is not locked on entry to RUnlock.
func (dm *DRWMutex) RUnlock() {
dm.m.Lock()
dm.cancelRefresh()
dm.m.Unlock()
// create temp array on stack
restClnts, owner := dm.clnt.GetLockers()