From 42ba0da6b057931b901b0cec12e031aff5e5131f Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sun, 2 Jan 2022 09:15:34 -0800 Subject: [PATCH] fix: initialize new drwMutex for each attempt in 'for {' loop. (#14009) It is possible that GetLock() call remembers a previously failed releaseAll() when there are networking issues, now this state can have potential side effects. This PR tries to avoid this side affect by making sure to initialize NewNSLock() for each GetLock() attempts made to avoid any prior state in the memory that can interfere with the new lock grants. --- cmd/data-scanner.go | 32 ++++++++++++++------------------ cmd/iam.go | 6 +++--- cmd/server-main.go | 14 +++++++------- 3 files changed, 24 insertions(+), 28 deletions(-) diff --git a/cmd/data-scanner.go b/cmd/data-scanner.go index e1613dae2..4d5c7ea1e 100644 --- a/cmd/data-scanner.go +++ b/cmd/data-scanner.go @@ -72,14 +72,16 @@ var ( // initDataScanner will start the scanner in the background. func initDataScanner(ctx context.Context, objAPI ObjectLayer) { go func() { + r := rand.New(rand.NewSource(time.Now().UnixNano())) // Run the data scanner in a loop for { - select { - case <-ctx.Done(): - return - default: - runDataScanner(ctx, objAPI) + runDataScanner(ctx, objAPI) + duration := time.Duration(r.Float64() * float64(scannerCycle.Get())) + if duration < time.Second { + // Make sure to sleep atleast a second to avoid high CPU ticks. + duration = time.Second } + time.Sleep(duration) } }() } @@ -106,20 +108,14 @@ func (s *safeDuration) Get() time.Duration { // There should only ever be one scanner running per cluster. func runDataScanner(pctx context.Context, objAPI ObjectLayer) { // Make sure only 1 scanner is running on the cluster. - locker := objAPI.NewNSLock(minioMetaBucket, "runDataScanner.lock") - var ctx context.Context - r := rand.New(rand.NewSource(time.Now().UnixNano())) - for { - lkctx, err := locker.GetLock(pctx, dataScannerLeaderLockTimeout) - if err != nil { - time.Sleep(time.Duration(r.Float64() * float64(scannerCycle.Get()))) - continue - } - ctx = lkctx.Context() - defer lkctx.Cancel() - break - // No unlock for "leader" lock. + locker := objAPI.NewNSLock(minioMetaBucket, "scanner/runDataScanner.lock") + lkctx, err := locker.GetLock(pctx, dataScannerLeaderLockTimeout) + if err != nil { + return } + ctx := lkctx.Context() + defer lkctx.Cancel() + // No unlock for "leader" lock. // Load current bloom cycle nextBloomCycle := intDataUpdateTracker.current() + 1 diff --git a/cmd/iam.go b/cmd/iam.go index de7776c70..caa9a7037 100644 --- a/cmd/iam.go +++ b/cmd/iam.go @@ -213,9 +213,6 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer, etcdClient *etc // Indicate to our routine to exit cleanly upon return. defer cancel() - // Hold the lock for migration only. - txnLk := objAPI.NewNSLock(minioMetaBucket, minioConfigPrefix+"/iam.lock") - // allocate dynamic timeout once before the loop iamLockTimeout := newDynamicTimeout(5*time.Second, 3*time.Second) @@ -223,6 +220,9 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer, etcdClient *etc // Migrate storage format if needed. for { + // Hold the lock for migration only. + txnLk := objAPI.NewNSLock(minioMetaBucket, minioConfigPrefix+"/iam.lock") + // let one of the server acquire the lock, if not let them timeout. // which shall be retried again by this loop. lkctx, err := txnLk.GetLock(retryCtx, iamLockTimeout) diff --git a/cmd/server-main.go b/cmd/server-main.go index 0910510b2..2a0b0b069 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -297,13 +297,6 @@ func initServer(ctx context.Context, newObject ObjectLayer) ([]BucketInfo, error // Once the config is fully loaded, initialize the new object layer. setObjectLayer(newObject) - // Make sure to hold lock for entire migration to avoid - // such that only one server should migrate the entire config - // at a given time, this big transaction lock ensures this - // appropriately. This is also true for rotation of encrypted - // content. - txnLk := newObject.NewNSLock(minioMetaBucket, minioConfigPrefix+"/transaction.lock") - // **** WARNING **** // Migrating to encrypted backend should happen before initialization of any // sub-systems, make sure that we do not move the above codeblock elsewhere. @@ -320,6 +313,13 @@ func initServer(ctx context.Context, newObject ObjectLayer) ([]BucketInfo, error default: } + // Make sure to hold lock for entire migration to avoid + // such that only one server should migrate the entire config + // at a given time, this big transaction lock ensures this + // appropriately. This is also true for rotation of encrypted + // content. + txnLk := newObject.NewNSLock(minioMetaBucket, minioConfigPrefix+"/transaction.lock") + // let one of the server acquire the lock, if not let them timeout. // which shall be retried again by this loop. lkctx, err := txnLk.GetLock(ctx, lockTimeout)