use fadvise to control Linux page-cache (#13312)

This PR brings two optimizations mainly
for page-cache build-up and how to avoid
getting OOM killed in the process. Although
these memories are reclaimable Linux is not
fast enough to reclaim them as needed on a
very busy system. fadvise is a system call
implemented in Linux to advise page-cache to
avoid overload as we get significant amount
of requests on the server.

- FADV_SEQUENTIAL tells that all I/O from now
  is going to be sequential, allowing for more
  resposive throughput.

- FADV_NOREUSE tells kernel to start removing
  things for this 'fd' from page-cache.
This commit is contained in:
Harshavardhana 2021-09-28 10:02:56 -07:00 committed by GitHub
parent dd5804c10e
commit 38027c8f52
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 73 additions and 1 deletions

View File

@ -28,6 +28,7 @@ import (
"syscall" "syscall"
"unsafe" "unsafe"
"github.com/minio/minio/internal/disk"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
@ -110,6 +111,11 @@ func readDirFn(dirPath string, fn func(name string, typ os.FileMode) error) erro
} }
return osErrToFileErr(err) return osErrToFileErr(err)
} }
if err := disk.Fadvise(f, disk.FadvSequential); err != nil {
return err
}
defer disk.Fadvise(f, disk.FadvNoReuse)
defer f.Close() defer f.Close()
bufp := direntPool.Get().(*[]byte) bufp := direntPool.Get().(*[]byte)
@ -185,6 +191,12 @@ func readDirWithOpts(dirPath string, opts readDirOpts) (entries []string, err er
if err != nil { if err != nil {
return nil, osErrToFileErr(err) return nil, osErrToFileErr(err)
} }
if err := disk.Fadvise(f, disk.FadvSequential); err != nil {
return nil, err
}
defer disk.Fadvise(f, disk.FadvNoReuse)
defer f.Close() defer f.Close()
bufp := direntPool.Get().(*[]byte) bufp := direntPool.Get().(*[]byte)

View File

@ -387,7 +387,7 @@ func (s *xlStorage) SetDiskLoc(poolIdx, setIdx, diskIdx int) {
func (s *xlStorage) Healing() *healingTracker { func (s *xlStorage) Healing() *healingTracker {
healingFile := pathJoin(s.diskPath, minioMetaBucket, healingFile := pathJoin(s.diskPath, minioMetaBucket,
bucketMetaPrefix, healingTrackerFilename) bucketMetaPrefix, healingTrackerFilename)
b, err := ioutil.ReadFile(healingFile) b, err := xioutil.ReadFile(healingFile)
if err != nil { if err != nil {
return nil return nil
} }
@ -410,6 +410,12 @@ func (s *xlStorage) readMetadata(ctx context.Context, itemPath string) ([]byte,
if err != nil { if err != nil {
return nil, err return nil, err
} }
if err := disk.Fadvise(f, disk.FadvSequential); err != nil {
return nil, err
}
defer disk.Fadvise(f, disk.FadvNoReuse)
defer f.Close() defer f.Close()
stat, err := f.Stat() stat, err := f.Stat()
if err != nil { if err != nil {
@ -1228,6 +1234,10 @@ func (s *xlStorage) readAllData(volumeDir string, filePath string) (buf []byte,
} }
return nil, err return nil, err
} }
if err := disk.Fadvise(f, disk.FadvSequential); err != nil {
return nil, err
}
defer disk.Fadvise(f, disk.FadvNoReuse)
r := &odirectReader{f, nil, nil, true, true, s, nil} r := &odirectReader{f, nil, nil, true, true, s, nil}
defer r.Close() defer r.Close()
buf, err = ioutil.ReadAll(r) buf, err = ioutil.ReadAll(r)
@ -1547,6 +1557,11 @@ func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, off
return nil, errIsNotRegular return nil, errIsNotRegular
} }
// Enable sequential read access pattern - only applicable on Linux.
if err := disk.Fadvise(file, disk.FadvSequential); err != nil {
return nil, err
}
if offset == 0 { if offset == 0 {
or := &odirectReader{file, nil, nil, true, false, s, nil} or := &odirectReader{file, nil, nil, true, false, s, nil}
if length <= smallFileThreshold { if length <= smallFileThreshold {
@ -1565,6 +1580,7 @@ func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, off
io.Reader io.Reader
io.Closer io.Closer
}{Reader: io.LimitReader(file, length), Closer: closeWrapper(func() error { }{Reader: io.LimitReader(file, length), Closer: closeWrapper(func() error {
disk.Fadvise(file, disk.FadvNoReuse)
return file.Close() return file.Close()
})} })}

View File

@ -23,6 +23,8 @@ package disk
import ( import (
"os" "os"
"syscall" "syscall"
"golang.org/x/sys/unix"
) )
// Fdatasync - fdatasync() is similar to fsync(), but does not flush modified metadata // Fdatasync - fdatasync() is similar to fsync(), but does not flush modified metadata
@ -38,3 +40,15 @@ import (
func Fdatasync(f *os.File) error { func Fdatasync(f *os.File) error {
return syscall.Fdatasync(int(f.Fd())) return syscall.Fdatasync(int(f.Fd()))
} }
// fdavise advice constants
const (
FadvSequential = unix.FADV_SEQUENTIAL
FadvNoReuse = unix.FADV_NOREUSE
)
// Fadvise implements possibility of choosing
// offset: 0, length: 0
func Fadvise(f *os.File, advice int) error {
return unix.Fadvise(int(f.Fd()), 0, 0, advice)
}

View File

@ -29,3 +29,15 @@ import (
func Fdatasync(f *os.File) error { func Fdatasync(f *os.File) error {
return syscall.Fsync(int(f.Fd())) return syscall.Fsync(int(f.Fd()))
} }
// fdavise advice constants
const (
FadvSequential = 0
FadvNoReuse = 0
)
// Fadvise implements possibility of choosing
// offset: 0, length: 0
func Fadvise(f *os.File, advice int) error {
return nil
}

View File

@ -28,3 +28,15 @@ import (
func Fdatasync(f *os.File) error { func Fdatasync(f *os.File) error {
return nil return nil
} }
// fdavise advice constants
const (
FadvSequential = 0
FadvNoReuse = 0
)
// Fadvise implements possibility of choosing
// offset: 0, length: 0
func Fadvise(f *os.File, advice int) error {
return nil
}

View File

@ -20,6 +20,8 @@ package ioutil
import ( import (
"io" "io"
"os" "os"
"github.com/minio/minio/internal/disk"
) )
// ReadFile reads the named file and returns the contents. // ReadFile reads the named file and returns the contents.
@ -33,6 +35,10 @@ func ReadFile(name string) ([]byte, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
if err := disk.Fadvise(f, disk.FadvSequential); err != nil {
return nil, err
}
defer disk.Fadvise(f, disk.FadvNoReuse)
defer f.Close() defer f.Close()
st, err := f.Stat() st, err := f.Stat()
if err != nil { if err != nil {