From 38027c8f52e1c884976c3fb25c158d284b27794a Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Tue, 28 Sep 2021 10:02:56 -0700 Subject: [PATCH] use fadvise to control Linux page-cache (#13312) This PR brings two optimizations mainly for page-cache build-up and how to avoid getting OOM killed in the process. Although these memories are reclaimable Linux is not fast enough to reclaim them as needed on a very busy system. fadvise is a system call implemented in Linux to advise page-cache to avoid overload as we get significant amount of requests on the server. - FADV_SEQUENTIAL tells that all I/O from now is going to be sequential, allowing for more resposive throughput. - FADV_NOREUSE tells kernel to start removing things for this 'fd' from page-cache. --- cmd/os-readdir_unix.go | 12 ++++++++++++ cmd/xl-storage.go | 18 +++++++++++++++++- internal/disk/fdatasync_linux.go | 14 ++++++++++++++ internal/disk/fdatasync_unix.go | 12 ++++++++++++ internal/disk/fdatasync_unsupported.go | 12 ++++++++++++ internal/ioutil/read_file.go | 6 ++++++ 6 files changed, 73 insertions(+), 1 deletion(-) diff --git a/cmd/os-readdir_unix.go b/cmd/os-readdir_unix.go index 437726d52..0b6d82827 100644 --- a/cmd/os-readdir_unix.go +++ b/cmd/os-readdir_unix.go @@ -28,6 +28,7 @@ import ( "syscall" "unsafe" + "github.com/minio/minio/internal/disk" "golang.org/x/sys/unix" ) @@ -110,6 +111,11 @@ func readDirFn(dirPath string, fn func(name string, typ os.FileMode) error) erro } return osErrToFileErr(err) } + if err := disk.Fadvise(f, disk.FadvSequential); err != nil { + return err + } + + defer disk.Fadvise(f, disk.FadvNoReuse) defer f.Close() bufp := direntPool.Get().(*[]byte) @@ -185,6 +191,12 @@ func readDirWithOpts(dirPath string, opts readDirOpts) (entries []string, err er if err != nil { return nil, osErrToFileErr(err) } + + if err := disk.Fadvise(f, disk.FadvSequential); err != nil { + return nil, err + } + + defer disk.Fadvise(f, disk.FadvNoReuse) defer f.Close() bufp := direntPool.Get().(*[]byte) diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index 6dba949f6..b6e7975f8 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -387,7 +387,7 @@ func (s *xlStorage) SetDiskLoc(poolIdx, setIdx, diskIdx int) { func (s *xlStorage) Healing() *healingTracker { healingFile := pathJoin(s.diskPath, minioMetaBucket, bucketMetaPrefix, healingTrackerFilename) - b, err := ioutil.ReadFile(healingFile) + b, err := xioutil.ReadFile(healingFile) if err != nil { return nil } @@ -410,6 +410,12 @@ func (s *xlStorage) readMetadata(ctx context.Context, itemPath string) ([]byte, if err != nil { return nil, err } + + if err := disk.Fadvise(f, disk.FadvSequential); err != nil { + return nil, err + } + + defer disk.Fadvise(f, disk.FadvNoReuse) defer f.Close() stat, err := f.Stat() if err != nil { @@ -1228,6 +1234,10 @@ func (s *xlStorage) readAllData(volumeDir string, filePath string) (buf []byte, } return nil, err } + if err := disk.Fadvise(f, disk.FadvSequential); err != nil { + return nil, err + } + defer disk.Fadvise(f, disk.FadvNoReuse) r := &odirectReader{f, nil, nil, true, true, s, nil} defer r.Close() buf, err = ioutil.ReadAll(r) @@ -1547,6 +1557,11 @@ func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, off return nil, errIsNotRegular } + // Enable sequential read access pattern - only applicable on Linux. + if err := disk.Fadvise(file, disk.FadvSequential); err != nil { + return nil, err + } + if offset == 0 { or := &odirectReader{file, nil, nil, true, false, s, nil} if length <= smallFileThreshold { @@ -1565,6 +1580,7 @@ func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, off io.Reader io.Closer }{Reader: io.LimitReader(file, length), Closer: closeWrapper(func() error { + disk.Fadvise(file, disk.FadvNoReuse) return file.Close() })} diff --git a/internal/disk/fdatasync_linux.go b/internal/disk/fdatasync_linux.go index 19b3982d5..e995a137c 100644 --- a/internal/disk/fdatasync_linux.go +++ b/internal/disk/fdatasync_linux.go @@ -23,6 +23,8 @@ package disk import ( "os" "syscall" + + "golang.org/x/sys/unix" ) // Fdatasync - fdatasync() is similar to fsync(), but does not flush modified metadata @@ -38,3 +40,15 @@ import ( func Fdatasync(f *os.File) error { return syscall.Fdatasync(int(f.Fd())) } + +// fdavise advice constants +const ( + FadvSequential = unix.FADV_SEQUENTIAL + FadvNoReuse = unix.FADV_NOREUSE +) + +// Fadvise implements possibility of choosing +// offset: 0, length: 0 +func Fadvise(f *os.File, advice int) error { + return unix.Fadvise(int(f.Fd()), 0, 0, advice) +} diff --git a/internal/disk/fdatasync_unix.go b/internal/disk/fdatasync_unix.go index 80e3e3cd7..6ec9b4356 100644 --- a/internal/disk/fdatasync_unix.go +++ b/internal/disk/fdatasync_unix.go @@ -29,3 +29,15 @@ import ( func Fdatasync(f *os.File) error { return syscall.Fsync(int(f.Fd())) } + +// fdavise advice constants +const ( + FadvSequential = 0 + FadvNoReuse = 0 +) + +// Fadvise implements possibility of choosing +// offset: 0, length: 0 +func Fadvise(f *os.File, advice int) error { + return nil +} diff --git a/internal/disk/fdatasync_unsupported.go b/internal/disk/fdatasync_unsupported.go index a3f8b6b38..3c317f7fe 100644 --- a/internal/disk/fdatasync_unsupported.go +++ b/internal/disk/fdatasync_unsupported.go @@ -28,3 +28,15 @@ import ( func Fdatasync(f *os.File) error { return nil } + +// fdavise advice constants +const ( + FadvSequential = 0 + FadvNoReuse = 0 +) + +// Fadvise implements possibility of choosing +// offset: 0, length: 0 +func Fadvise(f *os.File, advice int) error { + return nil +} diff --git a/internal/ioutil/read_file.go b/internal/ioutil/read_file.go index be4ed76cd..055800d91 100644 --- a/internal/ioutil/read_file.go +++ b/internal/ioutil/read_file.go @@ -20,6 +20,8 @@ package ioutil import ( "io" "os" + + "github.com/minio/minio/internal/disk" ) // ReadFile reads the named file and returns the contents. @@ -33,6 +35,10 @@ func ReadFile(name string) ([]byte, error) { if err != nil { return nil, err } + if err := disk.Fadvise(f, disk.FadvSequential); err != nil { + return nil, err + } + defer disk.Fadvise(f, disk.FadvNoReuse) defer f.Close() st, err := f.Stat() if err != nil {