avoid disk monitoring leaks under various conditions (#18777)

- HealFormat() was leaking healthcheck goroutines for disks, we are only interested in enabling healthcheck for the newly formatted disk, not for existing disks. - When disk is a root-disk a random disk monitor was leaking while we ignored the drive. - When loading the disk for each erasure set, we were leaking goroutines for the prepare-storage.go disks which were replaced via the globalLocalDrives slice - avoid disk monitoring utilizing health tokens that would cause exhaustion in the tokens, prematurely which were meant for incoming I/O. This is ensured by avoiding writing O_DIRECT aligned buffer instead write 2048 worth of content only as O_DSYNC, which is sufficient.
2025-11-07 12:52:58 -05:00 · 2024-01-12 01:48:36 -08:00
parent ac90a873eb
commit e5c8794b8b
3 changed files with 49 additions and 23 deletions
--- a/cmd/xl-storage-disk-id-check.go
+++ b/cmd/xl-storage-disk-id-check.go
@@ -228,8 +228,8 @@ func newXLStorageDiskIDCheck(storage *xlStorage, healthCheck bool) *xlStorageDis
 	}

 	if driveQuorum {
-		xl.totalWrites.Add(xl.storage.getWriteAttribute())
-		xl.totalDeletes.Add(xl.storage.getDeleteAttribute())
+		xl.totalWrites.Store(xl.storage.getWriteAttribute())
+		xl.totalDeletes.Store(xl.storage.getDeleteAttribute())
 	}

 	xl.diskCtx, xl.cancel = context.WithCancel(context.TODO())
@@ -1032,37 +1032,50 @@ func (p *xlStorageDiskIDCheck) checkHealth(ctx context.Context) (err error) {
 	if t > maxTimeSinceLastSuccess {
 		if atomic.CompareAndSwapInt32(&p.health.status, diskHealthOK, diskHealthFaulty) {
 			logger.LogAlwaysIf(ctx, fmt.Errorf("node(%s): taking drive %s offline, time since last response %v", globalLocalNodeName, p.storage.String(), t.Round(time.Millisecond)))
-			go p.monitorDiskStatus(t)
+			go p.monitorDiskStatus(0, mustGetUUID())
 		}
 		return errFaultyDisk
 	}
 	return nil
 }

+// Make sure we do not write O_DIRECT aligned I/O because WrIteAll() ends
+// up using O_DIRECT codepath which internally utilizes p.health.tokens
+// we need to avoid using incoming I/O tokens as part of the healthcheck
+// monitoring I/O.
+var toWrite = []byte{2048: 42}
+
 // monitorDiskStatus should be called once when a drive has been marked offline.
 // Once the disk has been deemed ok, it will return to online status.
-func (p *xlStorageDiskIDCheck) monitorDiskStatus(spent time.Duration) {
+func (p *xlStorageDiskIDCheck) monitorDiskStatus(spent time.Duration, fn string) {
 	t := time.NewTicker(5 * time.Second)
 	defer t.Stop()

-	fn := mustGetUUID()
 	for range t.C {
+		if contextCanceled(p.diskCtx) {
+			return
+		}
+
 		if len(p.health.tokens) == 0 {
 			// Queue is still full, no need to check.
 			continue
 		}
-		err := p.storage.WriteAll(context.Background(), minioMetaTmpBucket, fn, []byte{10000: 42})
+
+		err := p.storage.WriteAll(context.Background(), minioMetaTmpBucket, fn, toWrite)
 		if err != nil {
 			continue
 		}
+
 		b, err := p.storage.ReadAll(context.Background(), minioMetaTmpBucket, fn)
-		if err != nil || len(b) != 10001 {
+		if err != nil || len(b) != len(toWrite) {
 			continue
 		}
+
 		err = p.storage.Delete(context.Background(), minioMetaTmpBucket, fn, DeleteOptions{
 			Recursive: false,
 			Immediate: false,
 		})
+
 		if err == nil {
 			t := time.Unix(0, atomic.LoadInt64(&p.health.lastSuccess))
 			if spent > 0 {
@@ -1108,8 +1121,6 @@ func (p *xlStorageDiskIDCheck) monitorDiskWritable(ctx context.Context) {
 	defer t.Stop()
 	fn := mustGetUUID()

-	// Be just above directio size.
-	toWrite := []byte{xioutil.DirectioAlignSize + 1: 42}
 	rng := rand.New(rand.NewSource(time.Now().UnixNano()))

 	monitor := func() bool {
@@ -1129,7 +1140,7 @@ func (p *xlStorageDiskIDCheck) monitorDiskWritable(ctx context.Context) {
 		goOffline := func(err error, spent time.Duration) {
 			if atomic.CompareAndSwapInt32(&p.health.status, diskHealthOK, diskHealthFaulty) {
 				logger.LogAlwaysIf(ctx, fmt.Errorf("node(%s): taking drive %s offline: %v", globalLocalNodeName, p.storage.String(), err))
-				go p.monitorDiskStatus(spent)
+				go p.monitorDiskStatus(spent, fn)
 			}
 		}