mirror of
https://github.com/minio/minio.git
synced 2025-11-21 02:09:08 -05:00
fix: reduce crawler memory usage by orders of magnitude (#11556)
currently crawler waits for an entire readdir call to return until it processes usage, lifecycle, replication and healing - instead we should pass the applicator all the way down to avoid building any special stack for all the contents in a single directory. This allows for - no need to remember the entire list of entries per directory before applying the required functions - no need to wait for entire readdir() call to finish before applying the required functions
This commit is contained in:
@@ -407,19 +407,19 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
|
||||
if f.dataUsageCrawlDebug {
|
||||
console.Debugf(scannerLogPrefix+" no bucket (%s,%s)\n", f.root, entName)
|
||||
}
|
||||
return nil
|
||||
return errDoneForNow
|
||||
}
|
||||
|
||||
if isReservedOrInvalidBucket(bucket, false) {
|
||||
if f.dataUsageCrawlDebug {
|
||||
console.Debugf(scannerLogPrefix+" invalid bucket: %v, entry: %v\n", bucket, entName)
|
||||
}
|
||||
return nil
|
||||
return errDoneForNow
|
||||
}
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
return ctx.Err()
|
||||
return errDoneForNow
|
||||
default:
|
||||
}
|
||||
|
||||
@@ -682,7 +682,7 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder,
|
||||
addDir = func(entName string, typ os.FileMode) error {
|
||||
select {
|
||||
case <-done:
|
||||
return ctx.Err()
|
||||
return errDoneForNow
|
||||
default:
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user