Add metadata parsing to be inside mutex to slow down (#8952)

Adding mutex slows down the crawler to avoid large
spikes in CPU, also add millisecond interval jitter
in calculation of disk usage to slow down the spikes
further.
This commit is contained in:
Harshavardhana 2020-02-06 13:52:11 +05:30 committed by GitHub
parent b1bfd75fcf
commit 49df290270

View File

@ -20,6 +20,7 @@ import (
"bytes"
"context"
"encoding/json"
"math/rand"
"os"
"path/filepath"
"sync"
@ -159,13 +160,19 @@ func updateUsage(basePath string, doneCh <-chan struct{}, waitForLowActiveIO fun
}
numWorkers := 4
walkInterval := 1 * time.Millisecond
var mutex sync.Mutex // Mutex to update dataUsageInfo
r := rand.New(rand.NewSource(UTCNow().UnixNano()))
fastWalk(basePath, numWorkers, doneCh, func(path string, typ os.FileMode) error {
// Wait for I/O to go down.
waitForLowActiveIO()
// Randomize sleep intervals, to stagger the walk.
defer time.Sleep(time.Duration(r.Float64() * float64(walkInterval)))
bucket, entry := path2BucketObjectWithBasePath(basePath, path)
if bucket == "" {
return nil
@ -183,6 +190,9 @@ func updateUsage(basePath string, doneCh <-chan struct{}, waitForLowActiveIO fun
return nil
}
mutex.Lock()
defer mutex.Unlock()
if typ&os.ModeDir != 0 {
return nil
}
@ -192,12 +202,10 @@ func updateUsage(basePath string, doneCh <-chan struct{}, waitForLowActiveIO fun
return errSkipFile
}
mutex.Lock()
dataUsageInfo.ObjectsCount++
dataUsageInfo.ObjectsTotalSize += uint64(size)
dataUsageInfo.BucketsSizes[bucket] += uint64(size)
dataUsageInfo.ObjectsSizesHistogram[objSizeToHistoInterval(uint64(size))]++
mutex.Unlock()
return nil
})