use optimal memory while purging cache (#9426)

re-implement the cache purging routine to
avoid using ioutil.ReadDir which can lead
to high allocations when there are cache
directories with lots of content, or
when cache is installed in memory constrainted
environments.

Instead rely on a callback function where we
are not using memory no-more than 8KiB per
cycle.

Precursor for this change refer #9425, original
issue pointed by Caleb Case <caleb@storj.io>
This commit is contained in:
Harshavardhana 2020-04-23 12:26:13 -07:00 committed by GitHub
parent ac5061df2c
commit 957ecb1b64
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 188 additions and 17 deletions

View File

@ -21,10 +21,10 @@ import (
"context"
"crypto/rand"
"encoding/hex"
"errors"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"os"
"strings"
@ -228,6 +228,10 @@ func (c *diskCache) toClear() uint64 {
return bytesToClear(int64(di.Total), int64(di.Free), uint64(c.quotaPct), uint64(c.lowWatermark))
}
var (
errDoneForNow = errors.New("done for now")
)
// Purge cache entries that were not accessed.
func (c *diskCache) purge(ctx context.Context) {
if c.diskUsageLow() {
@ -263,25 +267,24 @@ func (c *diskCache) purge(ctx context.Context) {
}
return fm
}
objDirs, err := ioutil.ReadDir(c.dir)
if err != nil {
log.Fatal(err)
}
for _, obj := range objDirs {
if obj.Name() == minioMetaBucket {
continue
filterFn := func(name string, typ os.FileMode) error {
if name == minioMetaBucket {
// Proceed to next file.
return nil
}
cacheDir := pathJoin(c.dir, obj.Name())
cacheDir := pathJoin(c.dir, name)
meta, _, numHits, err := c.statCachedMeta(ctx, cacheDir)
if err != nil {
// delete any partially filled cache entry left behind.
removeAll(cacheDir)
continue
// Proceed to next file.
return nil
}
// stat all cached file ranges and cacheDataFile.
cachedFiles := fiStatFn(meta.Ranges, cacheDataFile, pathJoin(c.dir, obj.Name()))
cachedFiles := fiStatFn(meta.Ranges, cacheDataFile, pathJoin(c.dir, name))
objInfo := meta.ToObjectInfo("", "")
cc := cacheControlOpts(objInfo)
for fname, fi := range cachedFiles {
@ -291,9 +294,11 @@ func (c *diskCache) purge(ctx context.Context) {
logger.LogIf(ctx, err)
}
scorer.adjustSaveBytes(-fi.Size())
// break early if sufficient disk space reclaimed.
if c.diskUsageLow() {
return
// if we found disk usage is already low, we return nil filtering is complete.
return errDoneForNow
}
}
continue
@ -305,12 +310,24 @@ func (c *diskCache) purge(ctx context.Context) {
if err != nil || (fi.ModTime().Before(expiry) && len(cachedFiles) == 0) {
removeAll(cacheDir)
scorer.adjustSaveBytes(-fi.Size())
continue
// Proceed to next file.
return nil
}
// if we found disk usage is already low, we return nil filtering is complete.
if c.diskUsageLow() {
return
return errDoneForNow
}
// Proceed to next file.
return nil
}
if err := readDirFilterFn(c.dir, filterFn); err != nil {
logger.LogIf(ctx, err)
return
}
for _, path := range scorer.fileNames() {
removeAll(path)
slashIdx := strings.LastIndex(path, SlashSeparator)

View File

@ -1,7 +1,7 @@
// +build plan9 solaris
/*
* MinIO Cloud Storage, (C) 2016, 2017, 2018 MinIO, Inc.
* MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -32,6 +32,44 @@ func readDir(dirPath string) (entries []string, err error) {
return readDirN(dirPath, -1)
}
// readDir applies the filter function on each entries at dirPath, doesn't recurse into
// the directory itself.
func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) error) error {
d, err := os.Open(dirPath)
if err != nil {
// File is really not found.
if os.IsNotExist(err) {
return errFileNotFound
}
// File path cannot be verified since one of the parents is a file.
if strings.Contains(err.Error(), "not a directory") {
return errFileNotFound
}
return err
}
defer d.Close()
maxEntries := 1000
for {
// Read up to max number of entries.
fis, err := d.Readdir(maxEntries)
if err != nil {
if err == io.EOF {
break
}
return err
}
for _, fi := range fis {
if err = filter(fi.Name(), fi.Mode()); err == errDoneForNow {
// filtering requested to return by caller.
return nil
}
}
}
return nil
}
// Return N entries at the directory dirPath. If count is -1, return all entries
func readDirN(dirPath string, count int) (entries []string, err error) {
d, err := os.Open(dirPath)

View File

@ -1,7 +1,7 @@
// +build linux,!appengine darwin freebsd netbsd openbsd
/*
* MinIO Cloud Storage, (C) 2016, 2017, 2018 MinIO, Inc.
* MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -75,6 +75,59 @@ func readDir(dirPath string) (entries []string, err error) {
return readDirN(dirPath, -1)
}
// readDir applies the filter function on each entries at dirPath, doesn't recurse into
// the directory itself.
func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) error) error {
fd, err := syscall.Open(dirPath, 0, 0)
if err != nil {
if os.IsNotExist(err) || isSysErrNotDir(err) {
return errFileNotFound
}
if os.IsPermission(err) {
return errFileAccessDenied
}
return err
}
defer syscall.Close(fd)
buf := make([]byte, blockSize) // stack-allocated; doesn't escape
boff := 0 // starting read position in buf
nbuf := 0 // end valid data in buf
for {
if boff >= nbuf {
boff = 0
nbuf, err = syscall.ReadDirent(fd, buf)
if err != nil {
if isSysErrNotDir(err) {
return errFileNotFound
}
return err
}
if nbuf <= 0 {
break
}
}
consumed, name, typ, err := parseDirEnt(buf[boff:nbuf])
if err != nil {
return err
}
boff += consumed
if name == "" || name == "." || name == ".." {
continue
}
if typ&os.ModeSymlink == os.ModeSymlink {
continue
}
if err = filter(name, typ); err == errDoneForNow {
// filtering requested to return by caller.
return nil
}
}
return err
}
// Return count entries at the directory dirPath and all entries
// if count is set to -1
func readDirN(dirPath string, count int) (entries []string, err error) {

View File

@ -1,7 +1,7 @@
// +build windows
/*
* MinIO Cloud Storage, (C) 2016, 2017, 2018 MinIO, Inc.
* MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -29,6 +29,69 @@ func readDir(dirPath string) (entries []string, err error) {
return readDirN(dirPath, -1)
}
// readDir applies the filter function on each entries at dirPath, doesn't recurse into
// the directory itself.
func readDirFilterFn(dirPath string, filter func(name string, typ os.FileMode) error) error {
d, err := os.Open(dirPath)
if err != nil {
// File is really not found.
if os.IsNotExist(err) {
return errFileNotFound
}
// File path cannot be verified since one of the parents is a file.
if strings.Contains(err.Error(), "not a directory") {
return errFileNotFound
}
return err
}
defer d.Close()
st, err := d.Stat()
if err != nil {
return err
}
// Not a directory return error.
if !st.IsDir() {
return errFileAccessDenied
}
data := &syscall.Win32finddata{}
for {
e := syscall.FindNextFile(syscall.Handle(d.Fd()), data)
if e != nil {
if e == syscall.ERROR_NO_MORE_FILES {
break
} else {
err = &os.PathError{
Op: "FindNextFile",
Path: dirPath,
Err: e,
}
return err
}
}
name := syscall.UTF16ToString(data.FileName[0:])
if name == "" || name == "." || name == ".." { // Useless names
continue
}
if data.FileAttributes&syscall.FILE_ATTRIBUTE_REPARSE_POINT != 0 {
continue
}
var typ os.FileMode = 0 // regular file
if data.FileAttributes&syscall.FILE_ATTRIBUTE_DIRECTORY != 0 {
typ = os.ModeDir
}
if err = filter(name, typ); err == errDoneForNow {
// filtering requested to return by caller.
return nil
}
}
return err
}
// Return N entries at the directory dirPath. If count is -1, return all entries
func readDirN(dirPath string, count int) (entries []string, err error) {
d, err := os.Open(dirPath)