mirror of
https://github.com/minio/minio.git
synced 2025-01-11 15:03:22 -05:00
fix: initialize new drwMutex for each attempt in 'for {' loop. (#14009)
It is possible that GetLock() call remembers a previously failed releaseAll() when there are networking issues, now this state can have potential side effects. This PR tries to avoid this side affect by making sure to initialize NewNSLock() for each GetLock() attempts made to avoid any prior state in the memory that can interfere with the new lock grants.
This commit is contained in:
parent
f527c708f2
commit
42ba0da6b0
@ -72,14 +72,16 @@ var (
|
|||||||
// initDataScanner will start the scanner in the background.
|
// initDataScanner will start the scanner in the background.
|
||||||
func initDataScanner(ctx context.Context, objAPI ObjectLayer) {
|
func initDataScanner(ctx context.Context, objAPI ObjectLayer) {
|
||||||
go func() {
|
go func() {
|
||||||
|
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||||
// Run the data scanner in a loop
|
// Run the data scanner in a loop
|
||||||
for {
|
for {
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
runDataScanner(ctx, objAPI)
|
runDataScanner(ctx, objAPI)
|
||||||
|
duration := time.Duration(r.Float64() * float64(scannerCycle.Get()))
|
||||||
|
if duration < time.Second {
|
||||||
|
// Make sure to sleep atleast a second to avoid high CPU ticks.
|
||||||
|
duration = time.Second
|
||||||
}
|
}
|
||||||
|
time.Sleep(duration)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
@ -106,20 +108,14 @@ func (s *safeDuration) Get() time.Duration {
|
|||||||
// There should only ever be one scanner running per cluster.
|
// There should only ever be one scanner running per cluster.
|
||||||
func runDataScanner(pctx context.Context, objAPI ObjectLayer) {
|
func runDataScanner(pctx context.Context, objAPI ObjectLayer) {
|
||||||
// Make sure only 1 scanner is running on the cluster.
|
// Make sure only 1 scanner is running on the cluster.
|
||||||
locker := objAPI.NewNSLock(minioMetaBucket, "runDataScanner.lock")
|
locker := objAPI.NewNSLock(minioMetaBucket, "scanner/runDataScanner.lock")
|
||||||
var ctx context.Context
|
|
||||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
|
||||||
for {
|
|
||||||
lkctx, err := locker.GetLock(pctx, dataScannerLeaderLockTimeout)
|
lkctx, err := locker.GetLock(pctx, dataScannerLeaderLockTimeout)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
time.Sleep(time.Duration(r.Float64() * float64(scannerCycle.Get())))
|
return
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
ctx = lkctx.Context()
|
ctx := lkctx.Context()
|
||||||
defer lkctx.Cancel()
|
defer lkctx.Cancel()
|
||||||
break
|
|
||||||
// No unlock for "leader" lock.
|
// No unlock for "leader" lock.
|
||||||
}
|
|
||||||
|
|
||||||
// Load current bloom cycle
|
// Load current bloom cycle
|
||||||
nextBloomCycle := intDataUpdateTracker.current() + 1
|
nextBloomCycle := intDataUpdateTracker.current() + 1
|
||||||
|
@ -213,9 +213,6 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer, etcdClient *etc
|
|||||||
// Indicate to our routine to exit cleanly upon return.
|
// Indicate to our routine to exit cleanly upon return.
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
// Hold the lock for migration only.
|
|
||||||
txnLk := objAPI.NewNSLock(minioMetaBucket, minioConfigPrefix+"/iam.lock")
|
|
||||||
|
|
||||||
// allocate dynamic timeout once before the loop
|
// allocate dynamic timeout once before the loop
|
||||||
iamLockTimeout := newDynamicTimeout(5*time.Second, 3*time.Second)
|
iamLockTimeout := newDynamicTimeout(5*time.Second, 3*time.Second)
|
||||||
|
|
||||||
@ -223,6 +220,9 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer, etcdClient *etc
|
|||||||
|
|
||||||
// Migrate storage format if needed.
|
// Migrate storage format if needed.
|
||||||
for {
|
for {
|
||||||
|
// Hold the lock for migration only.
|
||||||
|
txnLk := objAPI.NewNSLock(minioMetaBucket, minioConfigPrefix+"/iam.lock")
|
||||||
|
|
||||||
// let one of the server acquire the lock, if not let them timeout.
|
// let one of the server acquire the lock, if not let them timeout.
|
||||||
// which shall be retried again by this loop.
|
// which shall be retried again by this loop.
|
||||||
lkctx, err := txnLk.GetLock(retryCtx, iamLockTimeout)
|
lkctx, err := txnLk.GetLock(retryCtx, iamLockTimeout)
|
||||||
|
@ -297,13 +297,6 @@ func initServer(ctx context.Context, newObject ObjectLayer) ([]BucketInfo, error
|
|||||||
// Once the config is fully loaded, initialize the new object layer.
|
// Once the config is fully loaded, initialize the new object layer.
|
||||||
setObjectLayer(newObject)
|
setObjectLayer(newObject)
|
||||||
|
|
||||||
// Make sure to hold lock for entire migration to avoid
|
|
||||||
// such that only one server should migrate the entire config
|
|
||||||
// at a given time, this big transaction lock ensures this
|
|
||||||
// appropriately. This is also true for rotation of encrypted
|
|
||||||
// content.
|
|
||||||
txnLk := newObject.NewNSLock(minioMetaBucket, minioConfigPrefix+"/transaction.lock")
|
|
||||||
|
|
||||||
// **** WARNING ****
|
// **** WARNING ****
|
||||||
// Migrating to encrypted backend should happen before initialization of any
|
// Migrating to encrypted backend should happen before initialization of any
|
||||||
// sub-systems, make sure that we do not move the above codeblock elsewhere.
|
// sub-systems, make sure that we do not move the above codeblock elsewhere.
|
||||||
@ -320,6 +313,13 @@ func initServer(ctx context.Context, newObject ObjectLayer) ([]BucketInfo, error
|
|||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Make sure to hold lock for entire migration to avoid
|
||||||
|
// such that only one server should migrate the entire config
|
||||||
|
// at a given time, this big transaction lock ensures this
|
||||||
|
// appropriately. This is also true for rotation of encrypted
|
||||||
|
// content.
|
||||||
|
txnLk := newObject.NewNSLock(minioMetaBucket, minioConfigPrefix+"/transaction.lock")
|
||||||
|
|
||||||
// let one of the server acquire the lock, if not let them timeout.
|
// let one of the server acquire the lock, if not let them timeout.
|
||||||
// which shall be retried again by this loop.
|
// which shall be retried again by this loop.
|
||||||
lkctx, err := txnLk.GetLock(ctx, lockTimeout)
|
lkctx, err := txnLk.GetLock(ctx, lockTimeout)
|
||||||
|
Loading…
Reference in New Issue
Block a user