fix: reduce crawler memory usage by orders of magnitude (#11556)

currently crawler waits for an entire readdir call to
return until it processes usage, lifecycle, replication
and healing - instead we should pass the applicator all
the way down to avoid building any special stack for all
the contents in a single directory.

This allows for

- no need to remember the entire list of entries per directory
  before applying the required functions
- no need to wait for entire readdir() call to finish before
  applying the required functions
This commit is contained in:
Harshavardhana
2021-02-17 15:34:42 -08:00
committed by GitHub
parent e07918abe3
commit 289e1d8b2a
8 changed files with 47 additions and 63 deletions

View File

@@ -407,19 +407,19 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
if f.dataUsageCrawlDebug {
console.Debugf(scannerLogPrefix+" no bucket (%s,%s)\n", f.root, entName)
}
return nil
return errDoneForNow
}
if isReservedOrInvalidBucket(bucket, false) {
if f.dataUsageCrawlDebug {
console.Debugf(scannerLogPrefix+" invalid bucket: %v, entry: %v\n", bucket, entName)
}
return nil
return errDoneForNow
}
select {
case <-done:
return ctx.Err()
return errDoneForNow
default:
}
@@ -682,7 +682,7 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder,
addDir = func(entName string, typ os.FileMode) error {
select {
case <-done:
return ctx.Err()
return errDoneForNow
default:
}