fix: reduce crawler memory usage by orders of magnitude (#11556)

currently crawler waits for an entire readdir call to return until it processes usage, lifecycle, replication and healing - instead we should pass the applicator all the way down to avoid building any special stack for all the contents in a single directory. This allows for - no need to remember the entire list of entries per directory before applying the required functions - no need to wait for entire readdir() call to finish before applying the required functions
2025-11-24 03:27:44 -05:00 · 2021-02-17 15:34:42 -08:00
parent e07918abe3
commit 289e1d8b2a
8 changed files with 47 additions and 63 deletions
--- a/cmd/disk-cache-backend.go
+++ b/cmd/disk-cache-backend.go
@@ -23,7 +23,6 @@ import (
 	"crypto/rand"
 	"encoding/base64"
 	"encoding/hex"
-	"errors"
 	"fmt"
 	"io"
 	"io/ioutil"
@@ -269,10 +268,6 @@ func (c *diskCache) toClear() uint64 {
 	return bytesToClear(int64(di.Total), int64(di.Free), uint64(c.quotaPct), uint64(c.lowWatermark), uint64(c.highWatermark))
 }

-var (
-	errDoneForNow = errors.New("done for now")
-)
-
 func (c *diskCache) purgeWait(ctx context.Context) {
 	for {
 		select {
@@ -382,7 +377,7 @@ func (c *diskCache) purge(ctx context.Context) {
 		return nil
 	}

-	if err := readDirFilterFn(c.dir, filterFn); err != nil {
+	if err := readDirFn(c.dir, filterFn); err != nil {
 		logger.LogIf(ctx, err)
 		return
 	}
@@ -1025,7 +1020,7 @@ func (c *diskCache) scanCacheWritebackFailures(ctx context.Context) {
 		return nil
 	}

-	if err := readDirFilterFn(c.dir, filterFn); err != nil {
+	if err := readDirFn(c.dir, filterFn); err != nil {
 		logger.LogIf(ctx, err)
 		return
 	}