fix: allow read unlocks to be defensive about split brains (#10637)

This commit is contained in:
Harshavardhana 2020-10-07 09:15:01 -07:00 committed by GitHub
parent 01498a3e34
commit effe131090
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 35 additions and 29 deletions

View File

@ -334,7 +334,6 @@ func (r *replicationState) addWorker(ctx context.Context, objectAPI ObjectLayer)
return return
} }
replicateObject(ctx, oi, objectAPI) replicateObject(ctx, oi, objectAPI)
default:
} }
} }
}() }()

View File

@ -183,7 +183,7 @@ func (dm *DRWMutex) lockBlocking(ctx context.Context, id, source string, isReadL
// return false anyways for both situations. // return false anyways for both situations.
// make sure to unlock any successful locks, since caller has timedout or canceled the request. // make sure to unlock any successful locks, since caller has timedout or canceled the request.
releaseAll(dm.clnt, quorum, owner, &locks, isReadLock, restClnts, dm.Names...) releaseAll(dm.clnt, tolerance, owner, &locks, isReadLock, restClnts, dm.Names...)
return false return false
default: default:
@ -294,7 +294,7 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is
done = true done = true
// Increment the number of grants received from the buffered channel. // Increment the number of grants received from the buffered channel.
i++ i++
releaseAll(ds, quorum, owner, locks, isReadLock, restClnts, lockNames...) releaseAll(ds, tolerance, owner, locks, isReadLock, restClnts, lockNames...)
} }
} }
case <-timeout: case <-timeout:
@ -303,7 +303,7 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is
// number of locks to check whether we have quorum or not // number of locks to check whether we have quorum or not
if !checkQuorumLocked(locks, quorum) { if !checkQuorumLocked(locks, quorum) {
log("Quorum not met after timeout\n") log("Quorum not met after timeout\n")
releaseAll(ds, quorum, owner, locks, isReadLock, restClnts, lockNames...) releaseAll(ds, tolerance, owner, locks, isReadLock, restClnts, lockNames...)
} else { } else {
log("Quorum met after timeout\n") log("Quorum met after timeout\n")
} }
@ -339,15 +339,31 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is
return quorumLocked return quorumLocked
} }
// checkQuorumUnlocked determines whether we have unlocked the required quorum of underlying locks or not // checkFailedUnlocks determines whether we have sufficiently unlocked all
func checkQuorumUnlocked(locks *[]string, quorum int) bool { // resources to ensure no deadlocks for future callers
count := 0 func checkFailedUnlocks(locks []string, tolerance int) bool {
for _, uid := range *locks { unlocksFailed := 0
if uid == "" { for lockID := range locks {
count++ if isLocked(locks[lockID]) {
unlocksFailed++
} }
} }
return count >= quorum
// Unlock failures are higher than tolerance limit
// for this instance of unlocker, we should let the
// caller know that lock is not successfully released
// yet.
if len(locks)-tolerance == tolerance {
// Incase of split brain scenarios where
// tolerance is exactly half of the len(*locks)
// then we need to make sure we have unlocked
// upto tolerance+1 - especially for RUnlock
// to ensure that we don't end up with active
// read locks on the resource after unlocking
// only half of the lockers.
return unlocksFailed >= tolerance
}
return unlocksFailed > tolerance
} }
// checkQuorumLocked determines whether we have locked the required quorum of underlying locks or not // checkQuorumLocked determines whether we have locked the required quorum of underlying locks or not
@ -363,7 +379,7 @@ func checkQuorumLocked(locks *[]string, quorum int) bool {
} }
// releaseAll releases all locks that are marked as locked // releaseAll releases all locks that are marked as locked
func releaseAll(ds *Dsync, quorum int, owner string, locks *[]string, isReadLock bool, restClnts []NetLocker, lockNames ...string) bool { func releaseAll(ds *Dsync, tolerance int, owner string, locks *[]string, isReadLock bool, restClnts []NetLocker, lockNames ...string) bool {
var wg sync.WaitGroup var wg sync.WaitGroup
for lockID := range restClnts { for lockID := range restClnts {
wg.Add(1) wg.Add(1)
@ -377,7 +393,12 @@ func releaseAll(ds *Dsync, quorum int, owner string, locks *[]string, isReadLock
}(lockID) }(lockID)
} }
wg.Wait() wg.Wait()
return checkQuorumUnlocked(locks, quorum)
// Return true if releaseAll was successful, otherwise we return 'false'
// to indicate we haven't sufficiently unlocked lockers to avoid deadlocks.
//
// Caller may use this as an indication to call again.
return !checkFailedUnlocks(*locks, tolerance)
} }
// Unlock unlocks the write lock. // Unlock unlocks the write lock.
@ -412,20 +433,9 @@ func (dm *DRWMutex) Unlock() {
// Tolerance is not set, defaults to half of the locker clients. // Tolerance is not set, defaults to half of the locker clients.
tolerance := len(restClnts) / 2 tolerance := len(restClnts) / 2
// Quorum is effectively = total clients subtracted with tolerance limit
quorum := len(restClnts) - tolerance
// In situations for write locks, as a special case
// to avoid split brains we make sure to acquire
// quorum + 1 when tolerance is exactly half of the
// total locker clients.
if quorum == tolerance {
quorum++
}
isReadLock := false isReadLock := false
r := rand.New(rand.NewSource(time.Now().UnixNano())) r := rand.New(rand.NewSource(time.Now().UnixNano()))
for !releaseAll(dm.clnt, quorum, owner, &locks, isReadLock, restClnts, dm.Names...) { for !releaseAll(dm.clnt, tolerance, owner, &locks, isReadLock, restClnts, dm.Names...) {
time.Sleep(time.Duration(r.Float64() * float64(lockRetryInterval))) time.Sleep(time.Duration(r.Float64() * float64(lockRetryInterval)))
} }
} }
@ -454,12 +464,9 @@ func (dm *DRWMutex) RUnlock() {
// Tolerance is not set, defaults to half of the locker clients. // Tolerance is not set, defaults to half of the locker clients.
tolerance := len(restClnts) / 2 tolerance := len(restClnts) / 2
// Quorum is effectively = total clients subtracted with tolerance limit
quorum := len(restClnts) - tolerance
isReadLock := true isReadLock := true
r := rand.New(rand.NewSource(time.Now().UnixNano())) r := rand.New(rand.NewSource(time.Now().UnixNano()))
for !releaseAll(dm.clnt, quorum, owner, &locks, isReadLock, restClnts, dm.Names...) { for !releaseAll(dm.clnt, tolerance, owner, &locks, isReadLock, restClnts, dm.Names...) {
time.Sleep(time.Duration(r.Float64() * float64(lockRetryInterval))) time.Sleep(time.Duration(r.Float64() * float64(lockRetryInterval)))
} }
} }