Add metadata parsing to be inside mutex to slow down (#8952)

Adding mutex slows down the crawler to avoid large spikes in CPU, also add millisecond interval jitter in calculation of disk usage to slow down the spikes further.
2025-10-30 00:05:02 -04:00 · 2020-02-06 13:52:11 +05:30 · 2020-02-06 13:52:11 +05:30 · 49df290270
commit 49df290270
parent b1bfd75fcf
1 changed files with 10 additions and 2 deletions
--- a/cmd/data-usage.go
+++ b/cmd/data-usage.go
@ -20,6 +20,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"math/rand"
 	"os"
 	"path/filepath"
 	"sync"
@ -159,13 +160,19 @@ func updateUsage(basePath string, doneCh <-chan struct{}, waitForLowActiveIO fun
 	}

 	numWorkers := 4
+	walkInterval := 1 * time.Millisecond

 	var mutex sync.Mutex // Mutex to update dataUsageInfo

+	r := rand.New(rand.NewSource(UTCNow().UnixNano()))
+
 	fastWalk(basePath, numWorkers, doneCh, func(path string, typ os.FileMode) error {
 		// Wait for I/O to go down.
 		waitForLowActiveIO()

+		// Randomize sleep intervals, to stagger the walk.
+		defer time.Sleep(time.Duration(r.Float64() * float64(walkInterval)))
+
 		bucket, entry := path2BucketObjectWithBasePath(basePath, path)
 		if bucket == "" {
 			return nil
@ -183,6 +190,9 @@ func updateUsage(basePath string, doneCh <-chan struct{}, waitForLowActiveIO fun
 			return nil
 		}

+		mutex.Lock()
+		defer mutex.Unlock()
+
 		if typ&os.ModeDir != 0 {
 			return nil
 		}
@ -192,12 +202,10 @@ func updateUsage(basePath string, doneCh <-chan struct{}, waitForLowActiveIO fun
 			return errSkipFile
 		}

-		mutex.Lock()
 		dataUsageInfo.ObjectsCount++
 		dataUsageInfo.ObjectsTotalSize += uint64(size)
 		dataUsageInfo.BucketsSizes[bucket] += uint64(size)
 		dataUsageInfo.ObjectsSizesHistogram[objSizeToHistoInterval(uint64(size))]++
-		mutex.Unlock()
 		return nil
 	})