crawler: Remove waitForLowActiveIO (#10667)

Only use dynamic delays for the crawler. Even though the max wait was 1 second the number 
of waits could severely impact crawler speed.

Instead of relying on a global metric, we use the stateless local delays to keep the crawler 
running at a speed more adjusted to current conditions.

The only case we keep it is before bitrot checks when enabled.
This commit is contained in:
Klaus Post 2020-10-13 13:45:08 -07:00 committed by GitHub
parent 9c042a503b
commit 03991c5d41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 19 additions and 21 deletions

View File

@ -135,12 +135,11 @@ type cachedFolder struct {
} }
type folderScanner struct { type folderScanner struct {
root string root string
getSize getSizeFn getSize getSizeFn
oldCache dataUsageCache oldCache dataUsageCache
newCache dataUsageCache newCache dataUsageCache
withFilter *bloomFilter withFilter *bloomFilter
waitForLowActiveIO func()
dataUsageCrawlMult float64 dataUsageCrawlMult float64
dataUsageCrawlDebug bool dataUsageCrawlDebug bool
@ -155,7 +154,7 @@ type folderScanner struct {
// The returned cache will always be valid, but may not be updated from the existing. // The returned cache will always be valid, but may not be updated from the existing.
// Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler. // Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler.
// If the supplied context is canceled the function will return at the first chance. // If the supplied context is canceled the function will return at the first chance.
func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache, waitForLowActiveIO func(), getSize getSizeFn) (dataUsageCache, error) { func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache, getSize getSizeFn) (dataUsageCache, error) {
t := UTCNow() t := UTCNow()
logPrefix := color.Green("data-usage: ") logPrefix := color.Green("data-usage: ")
@ -183,7 +182,6 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
getSize: getSize, getSize: getSize,
oldCache: cache, oldCache: cache,
newCache: dataUsageCache{Info: cache.Info}, newCache: dataUsageCache{Info: cache.Info},
waitForLowActiveIO: waitForLowActiveIO,
newFolders: nil, newFolders: nil,
existingFolders: nil, existingFolders: nil,
dataUsageCrawlMult: delayMult, dataUsageCrawlMult: delayMult,
@ -376,7 +374,6 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
} }
} }
} }
f.waitForLowActiveIO()
sleepDuration(dataCrawlSleepPerFolder, f.dataUsageCrawlMult) sleepDuration(dataCrawlSleepPerFolder, f.dataUsageCrawlMult)
cache := dataUsageEntry{} cache := dataUsageEntry{}
@ -424,7 +421,6 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
} }
return nil return nil
} }
f.waitForLowActiveIO()
// Dynamic time delay. // Dynamic time delay.
t := UTCNow() t := UTCNow()
@ -484,7 +480,9 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
// If that doesn't bring it back we remove the folder and assume it was deleted. // If that doesn't bring it back we remove the folder and assume it was deleted.
// This means that the next run will not look for it. // This means that the next run will not look for it.
for k := range existing { for k := range existing {
f.waitForLowActiveIO() // Dynamic time delay.
t := UTCNow()
bucket, prefix := path2BucketObject(k) bucket, prefix := path2BucketObject(k)
if f.dataUsageCrawlDebug { if f.dataUsageCrawlDebug {
logger.Info(color.Green("folder-scanner:")+" checking disappeared folder: %v/%v", bucket, prefix) logger.Info(color.Green("folder-scanner:")+" checking disappeared folder: %v/%v", bucket, prefix)
@ -498,6 +496,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
versionID: versionID, versionID: versionID,
}, madmin.HealItemObject) }, madmin.HealItemObject)
}) })
sleepDuration(time.Since(t), f.dataUsageCrawlMult)
if f.dataUsageCrawlDebug && err != nil { if f.dataUsageCrawlDebug && err != nil {
logger.Info(color.Green("healObjects:")+" checking returned value %v", err) logger.Info(color.Green("healObjects:")+" checking returned value %v", err)
@ -535,7 +534,6 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder)
default: default:
} }
f.waitForLowActiveIO()
if typ&os.ModeDir != 0 { if typ&os.ModeDir != 0 {
dirStack = append(dirStack, entName) dirStack = append(dirStack, entName)
err := readDirFn(path.Join(dirStack...), addDir) err := readDirFn(path.Join(dirStack...), addDir)

View File

@ -62,7 +62,7 @@ func TestDataUsageUpdate(t *testing.T) {
return 0, nil return 0, nil
} }
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, func() {}, getSize) got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -183,7 +183,7 @@ func TestDataUsageUpdate(t *testing.T) {
}, },
} }
createUsageTestFiles(t, base, bucket, files) createUsageTestFiles(t, base, bucket, files)
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize) got, err = crawlDataFolder(context.Background(), base, got, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -268,7 +268,7 @@ func TestDataUsageUpdate(t *testing.T) {
} }
// Changed dir must be picked up in this many cycles. // Changed dir must be picked up in this many cycles.
for i := 0; i < dataUsageUpdateDirCycles; i++ { for i := 0; i < dataUsageUpdateDirCycles; i++ {
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize) got, err = crawlDataFolder(context.Background(), base, got, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -355,7 +355,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
} }
return 0, nil return 0, nil
} }
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, func() {}, getSize) got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -465,7 +465,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
}, },
} }
createUsageTestFiles(t, base, "", files) createUsageTestFiles(t, base, "", files)
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize) got, err = crawlDataFolder(context.Background(), base, got, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -548,7 +548,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
} }
// Changed dir must be picked up in this many cycles. // Changed dir must be picked up in this many cycles.
for i := 0; i < dataUsageUpdateDirCycles; i++ { for i := 0; i < dataUsageUpdateDirCycles; i++ {
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize) got, err = crawlDataFolder(context.Background(), base, got, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -652,7 +652,7 @@ func TestDataUsageCacheSerialize(t *testing.T) {
} }
return 0, nil return 0, nil
} }
want, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, func() {}, getSize) want, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

View File

@ -327,7 +327,7 @@ func (fs *FSObjects) crawlBucket(ctx context.Context, bucket string, cache dataU
} }
// Load bucket info. // Load bucket info.
cache, err = crawlDataFolder(ctx, fs.fsPath, cache, fs.waitForLowActiveIO, func(item crawlItem) (int64, error) { cache, err = crawlDataFolder(ctx, fs.fsPath, cache, func(item crawlItem) (int64, error) {
bucket, object := item.bucket, item.objectPath() bucket, object := item.bucket, item.objectPath()
fsMetaBytes, err := ioutil.ReadFile(pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile)) fsMetaBytes, err := ioutil.ReadFile(pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile))
if err != nil && !os.IsNotExist(err) { if err != nil && !os.IsNotExist(err) {

View File

@ -372,7 +372,7 @@ func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCac
} }
opts := globalCrawlerConfig opts := globalCrawlerConfig
dataUsageInfo, err := crawlDataFolder(ctx, s.diskPath, cache, s.waitForLowActiveIO, func(item crawlItem) (int64, error) { dataUsageInfo, err := crawlDataFolder(ctx, s.diskPath, cache, func(item crawlItem) (int64, error) {
// Look for `xl.meta/xl.json' at the leaf. // Look for `xl.meta/xl.json' at the leaf.
if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) && if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) &&
!strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) { !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) {