fix: reduce crawler memory usage by orders of magnitude (#11556)

currently crawler waits for an entire readdir call to
return until it processes usage, lifecycle, replication
and healing - instead we should pass the applicator all
the way down to avoid building any special stack for all
the contents in a single directory.

This allows for

- no need to remember the entire list of entries per directory
  before applying the required functions
- no need to wait for entire readdir() call to finish before
  applying the required functions
This commit is contained in:
Harshavardhana
2021-02-17 15:34:42 -08:00
committed by GitHub
parent e07918abe3
commit 289e1d8b2a
8 changed files with 47 additions and 63 deletions

View File

@@ -23,7 +23,6 @@ import (
"crypto/rand"
"encoding/base64"
"encoding/hex"
"errors"
"fmt"
"io"
"io/ioutil"
@@ -269,10 +268,6 @@ func (c *diskCache) toClear() uint64 {
return bytesToClear(int64(di.Total), int64(di.Free), uint64(c.quotaPct), uint64(c.lowWatermark), uint64(c.highWatermark))
}
var (
errDoneForNow = errors.New("done for now")
)
func (c *diskCache) purgeWait(ctx context.Context) {
for {
select {
@@ -382,7 +377,7 @@ func (c *diskCache) purge(ctx context.Context) {
return nil
}
if err := readDirFilterFn(c.dir, filterFn); err != nil {
if err := readDirFn(c.dir, filterFn); err != nil {
logger.LogIf(ctx, err)
return
}
@@ -1025,7 +1020,7 @@ func (c *diskCache) scanCacheWritebackFailures(ctx context.Context) {
return nil
}
if err := readDirFilterFn(c.dir, filterFn); err != nil {
if err := readDirFn(c.dir, filterFn); err != nil {
logger.LogIf(ctx, err)
return
}