fix: initialize new drwMutex for each attempt in 'for {' loop. (#14009)

It is possible that GetLock() call remembers a previously
failed releaseAll() when there are networking issues, now
this state can have potential side effects.

This PR tries to avoid this side affect by making sure
to initialize NewNSLock() for each GetLock() attempts
made to avoid any prior state in the memory that can
interfere with the new lock grants.
This commit is contained in:
Harshavardhana 2022-01-02 09:15:34 -08:00 committed by GitHub
parent f527c708f2
commit 42ba0da6b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 24 additions and 28 deletions

View File

@ -72,14 +72,16 @@ var (
// initDataScanner will start the scanner in the background. // initDataScanner will start the scanner in the background.
func initDataScanner(ctx context.Context, objAPI ObjectLayer) { func initDataScanner(ctx context.Context, objAPI ObjectLayer) {
go func() { go func() {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
// Run the data scanner in a loop // Run the data scanner in a loop
for { for {
select {
case <-ctx.Done():
return
default:
runDataScanner(ctx, objAPI) runDataScanner(ctx, objAPI)
duration := time.Duration(r.Float64() * float64(scannerCycle.Get()))
if duration < time.Second {
// Make sure to sleep atleast a second to avoid high CPU ticks.
duration = time.Second
} }
time.Sleep(duration)
} }
}() }()
} }
@ -106,20 +108,14 @@ func (s *safeDuration) Get() time.Duration {
// There should only ever be one scanner running per cluster. // There should only ever be one scanner running per cluster.
func runDataScanner(pctx context.Context, objAPI ObjectLayer) { func runDataScanner(pctx context.Context, objAPI ObjectLayer) {
// Make sure only 1 scanner is running on the cluster. // Make sure only 1 scanner is running on the cluster.
locker := objAPI.NewNSLock(minioMetaBucket, "runDataScanner.lock") locker := objAPI.NewNSLock(minioMetaBucket, "scanner/runDataScanner.lock")
var ctx context.Context
r := rand.New(rand.NewSource(time.Now().UnixNano()))
for {
lkctx, err := locker.GetLock(pctx, dataScannerLeaderLockTimeout) lkctx, err := locker.GetLock(pctx, dataScannerLeaderLockTimeout)
if err != nil { if err != nil {
time.Sleep(time.Duration(r.Float64() * float64(scannerCycle.Get()))) return
continue
} }
ctx = lkctx.Context() ctx := lkctx.Context()
defer lkctx.Cancel() defer lkctx.Cancel()
break
// No unlock for "leader" lock. // No unlock for "leader" lock.
}
// Load current bloom cycle // Load current bloom cycle
nextBloomCycle := intDataUpdateTracker.current() + 1 nextBloomCycle := intDataUpdateTracker.current() + 1

View File

@ -213,9 +213,6 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer, etcdClient *etc
// Indicate to our routine to exit cleanly upon return. // Indicate to our routine to exit cleanly upon return.
defer cancel() defer cancel()
// Hold the lock for migration only.
txnLk := objAPI.NewNSLock(minioMetaBucket, minioConfigPrefix+"/iam.lock")
// allocate dynamic timeout once before the loop // allocate dynamic timeout once before the loop
iamLockTimeout := newDynamicTimeout(5*time.Second, 3*time.Second) iamLockTimeout := newDynamicTimeout(5*time.Second, 3*time.Second)
@ -223,6 +220,9 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer, etcdClient *etc
// Migrate storage format if needed. // Migrate storage format if needed.
for { for {
// Hold the lock for migration only.
txnLk := objAPI.NewNSLock(minioMetaBucket, minioConfigPrefix+"/iam.lock")
// let one of the server acquire the lock, if not let them timeout. // let one of the server acquire the lock, if not let them timeout.
// which shall be retried again by this loop. // which shall be retried again by this loop.
lkctx, err := txnLk.GetLock(retryCtx, iamLockTimeout) lkctx, err := txnLk.GetLock(retryCtx, iamLockTimeout)

View File

@ -297,13 +297,6 @@ func initServer(ctx context.Context, newObject ObjectLayer) ([]BucketInfo, error
// Once the config is fully loaded, initialize the new object layer. // Once the config is fully loaded, initialize the new object layer.
setObjectLayer(newObject) setObjectLayer(newObject)
// Make sure to hold lock for entire migration to avoid
// such that only one server should migrate the entire config
// at a given time, this big transaction lock ensures this
// appropriately. This is also true for rotation of encrypted
// content.
txnLk := newObject.NewNSLock(minioMetaBucket, minioConfigPrefix+"/transaction.lock")
// **** WARNING **** // **** WARNING ****
// Migrating to encrypted backend should happen before initialization of any // Migrating to encrypted backend should happen before initialization of any
// sub-systems, make sure that we do not move the above codeblock elsewhere. // sub-systems, make sure that we do not move the above codeblock elsewhere.
@ -320,6 +313,13 @@ func initServer(ctx context.Context, newObject ObjectLayer) ([]BucketInfo, error
default: default:
} }
// Make sure to hold lock for entire migration to avoid
// such that only one server should migrate the entire config
// at a given time, this big transaction lock ensures this
// appropriately. This is also true for rotation of encrypted
// content.
txnLk := newObject.NewNSLock(minioMetaBucket, minioConfigPrefix+"/transaction.lock")
// let one of the server acquire the lock, if not let them timeout. // let one of the server acquire the lock, if not let them timeout.
// which shall be retried again by this loop. // which shall be retried again by this loop.
lkctx, err := txnLk.GetLock(ctx, lockTimeout) lkctx, err := txnLk.GetLock(ctx, lockTimeout)