mirror of
https://github.com/minio/minio.git
synced 2025-01-12 15:33:22 -05:00
remove serializing WalkDir() across all buckets/prefixes on SSDs (#17707)
slower drives get knocked off because they are too slow via active monitoring, we do not need to block calls arbitrarily. Serializing adds latencies for already slow calls, remove it for SSDs/NVMEs Also, add a selection with context when writing to `out <-` channel, to avoid any potential blocks.
This commit is contained in:
parent
a7fb3a3853
commit
14e1ace552
@ -98,6 +98,15 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
|
|||||||
objsReturned++
|
objsReturned++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
send := func(entry metaCacheEntry) error {
|
||||||
|
objReturned(entry.metadata)
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
case out <- entry:
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Fast exit track to check if we are listing an object with
|
// Fast exit track to check if we are listing an object with
|
||||||
// a trailing slash, this will avoid to list the object content.
|
// a trailing slash, this will avoid to list the object content.
|
||||||
@ -109,11 +118,12 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
|
|||||||
// if baseDir is already a directory object, consider it
|
// if baseDir is already a directory object, consider it
|
||||||
// as part of the list call, this is AWS S3 specific
|
// as part of the list call, this is AWS S3 specific
|
||||||
// behavior.
|
// behavior.
|
||||||
out <- metaCacheEntry{
|
if err := send(metaCacheEntry{
|
||||||
name: opts.BaseDir,
|
name: opts.BaseDir,
|
||||||
metadata: metadata,
|
metadata: metadata,
|
||||||
|
}); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
objReturned(metadata)
|
|
||||||
} else {
|
} else {
|
||||||
st, sterr := Lstat(pathJoin(volumeDir, opts.BaseDir, xlStorageFormatFile))
|
st, sterr := Lstat(pathJoin(volumeDir, opts.BaseDir, xlStorageFormatFile))
|
||||||
if sterr == nil && st.Mode().IsRegular() {
|
if sterr == nil && st.Mode().IsRegular() {
|
||||||
@ -143,19 +153,25 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if s.walkMu != nil {
|
||||||
s.walkMu.Lock()
|
s.walkMu.Lock()
|
||||||
|
}
|
||||||
entries, err := s.ListDir(ctx, opts.Bucket, current, -1)
|
entries, err := s.ListDir(ctx, opts.Bucket, current, -1)
|
||||||
|
if s.walkMu != nil {
|
||||||
s.walkMu.Unlock()
|
s.walkMu.Unlock()
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Folder could have gone away in-between
|
// Folder could have gone away in-between
|
||||||
if err != errVolumeNotFound && err != errFileNotFound {
|
if err != errVolumeNotFound && err != errFileNotFound {
|
||||||
logger.LogOnceIf(ctx, err, "metacache-walk-scan-dir")
|
logger.LogOnceIf(ctx, err, "metacache-walk-scan-dir")
|
||||||
}
|
}
|
||||||
if opts.ReportNotFound && err == errFileNotFound && current == opts.BaseDir {
|
if opts.ReportNotFound && err == errFileNotFound && current == opts.BaseDir {
|
||||||
return errFileNotFound
|
err = errFileNotFound
|
||||||
|
} else {
|
||||||
|
err = nil
|
||||||
}
|
}
|
||||||
// Forward some errors?
|
diskHealthCheckOK(ctx, err)
|
||||||
return nil
|
return err
|
||||||
}
|
}
|
||||||
diskHealthCheckOK(ctx, err)
|
diskHealthCheckOK(ctx, err)
|
||||||
if len(entries) == 0 {
|
if len(entries) == 0 {
|
||||||
@ -202,9 +218,13 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
|
|||||||
// If root was an object return it as such.
|
// If root was an object return it as such.
|
||||||
if HasSuffix(entry, xlStorageFormatFile) {
|
if HasSuffix(entry, xlStorageFormatFile) {
|
||||||
var meta metaCacheEntry
|
var meta metaCacheEntry
|
||||||
|
if s.walkReadMu != nil {
|
||||||
s.walkReadMu.Lock()
|
s.walkReadMu.Lock()
|
||||||
|
}
|
||||||
meta.metadata, err = s.readMetadata(ctx, pathJoinBuf(&sb, volumeDir, current, entry))
|
meta.metadata, err = s.readMetadata(ctx, pathJoinBuf(&sb, volumeDir, current, entry))
|
||||||
|
if s.walkReadMu != nil {
|
||||||
s.walkReadMu.Unlock()
|
s.walkReadMu.Unlock()
|
||||||
|
}
|
||||||
diskHealthCheckOK(ctx, err)
|
diskHealthCheckOK(ctx, err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// It is totally possible that xl.meta was overwritten
|
// It is totally possible that xl.meta was overwritten
|
||||||
@ -219,17 +239,15 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
|
|||||||
meta.name = strings.TrimSuffix(meta.name, SlashSeparator)
|
meta.name = strings.TrimSuffix(meta.name, SlashSeparator)
|
||||||
meta.name = pathJoinBuf(&sb, current, meta.name)
|
meta.name = pathJoinBuf(&sb, current, meta.name)
|
||||||
meta.name = decodeDirObject(meta.name)
|
meta.name = decodeDirObject(meta.name)
|
||||||
|
if err := send(meta); err != nil {
|
||||||
objReturned(meta.metadata)
|
return err
|
||||||
out <- meta
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// Check legacy.
|
// Check legacy.
|
||||||
if HasSuffix(entry, xlStorageFormatFileV1) {
|
if HasSuffix(entry, xlStorageFormatFileV1) {
|
||||||
var meta metaCacheEntry
|
var meta metaCacheEntry
|
||||||
s.walkReadMu.Lock()
|
|
||||||
meta.metadata, err = xioutil.ReadFile(pathJoinBuf(&sb, volumeDir, current, entry))
|
meta.metadata, err = xioutil.ReadFile(pathJoinBuf(&sb, volumeDir, current, entry))
|
||||||
s.walkReadMu.Unlock()
|
|
||||||
diskHealthCheckOK(ctx, err)
|
diskHealthCheckOK(ctx, err)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if !IsErrIgnored(err, io.EOF, io.ErrUnexpectedEOF) {
|
if !IsErrIgnored(err, io.EOF, io.ErrUnexpectedEOF) {
|
||||||
@ -240,9 +258,9 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
|
|||||||
meta.name = strings.TrimSuffix(entry, xlStorageFormatFileV1)
|
meta.name = strings.TrimSuffix(entry, xlStorageFormatFileV1)
|
||||||
meta.name = strings.TrimSuffix(meta.name, SlashSeparator)
|
meta.name = strings.TrimSuffix(meta.name, SlashSeparator)
|
||||||
meta.name = pathJoinBuf(&sb, current, meta.name)
|
meta.name = pathJoinBuf(&sb, current, meta.name)
|
||||||
objReturned(meta.metadata)
|
if err := send(meta); err != nil {
|
||||||
|
return err
|
||||||
out <- meta
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// Skip all other files.
|
// Skip all other files.
|
||||||
@ -295,9 +313,13 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
|
|||||||
meta.name = meta.name[:len(meta.name)-1] + globalDirSuffixWithSlash
|
meta.name = meta.name[:len(meta.name)-1] + globalDirSuffixWithSlash
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if s.walkReadMu != nil {
|
||||||
s.walkReadMu.Lock()
|
s.walkReadMu.Lock()
|
||||||
|
}
|
||||||
meta.metadata, err = s.readMetadata(ctx, pathJoinBuf(&sb, volumeDir, meta.name, xlStorageFormatFile))
|
meta.metadata, err = s.readMetadata(ctx, pathJoinBuf(&sb, volumeDir, meta.name, xlStorageFormatFile))
|
||||||
|
if s.walkReadMu != nil {
|
||||||
s.walkReadMu.Unlock()
|
s.walkReadMu.Unlock()
|
||||||
|
}
|
||||||
diskHealthCheckOK(ctx, err)
|
diskHealthCheckOK(ctx, err)
|
||||||
switch {
|
switch {
|
||||||
case err == nil:
|
case err == nil:
|
||||||
@ -305,17 +327,17 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
|
|||||||
if isDirObj {
|
if isDirObj {
|
||||||
meta.name = strings.TrimSuffix(meta.name, globalDirSuffixWithSlash) + slashSeparator
|
meta.name = strings.TrimSuffix(meta.name, globalDirSuffixWithSlash) + slashSeparator
|
||||||
}
|
}
|
||||||
objReturned(meta.metadata)
|
if err := send(meta); err != nil {
|
||||||
|
return err
|
||||||
out <- meta
|
}
|
||||||
case osIsNotExist(err), isSysErrIsDir(err):
|
case osIsNotExist(err), isSysErrIsDir(err):
|
||||||
meta.metadata, err = xioutil.ReadFile(pathJoinBuf(&sb, volumeDir, meta.name, xlStorageFormatFileV1))
|
meta.metadata, err = xioutil.ReadFile(pathJoinBuf(&sb, volumeDir, meta.name, xlStorageFormatFileV1))
|
||||||
diskHealthCheckOK(ctx, err)
|
diskHealthCheckOK(ctx, err)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
// It was an object
|
// It was an object
|
||||||
objReturned(meta.metadata)
|
if err := send(meta); err != nil {
|
||||||
|
return err
|
||||||
out <- meta
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,8 +115,8 @@ type xlStorage struct {
|
|||||||
formatData []byte
|
formatData []byte
|
||||||
|
|
||||||
// mutex to prevent concurrent read operations overloading walks.
|
// mutex to prevent concurrent read operations overloading walks.
|
||||||
walkMu sync.Mutex
|
walkMu *sync.Mutex
|
||||||
walkReadMu sync.Mutex
|
walkReadMu *sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkPathLength - returns error if given path name length more than 255
|
// checkPathLength - returns error if given path name length more than 255
|
||||||
@ -216,18 +216,17 @@ func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var rootDisk bool
|
|
||||||
if !globalIsCICD && !globalIsErasureSD {
|
|
||||||
if globalRootDiskThreshold > 0 {
|
|
||||||
// Use MINIO_ROOTDISK_THRESHOLD_SIZE to figure out if
|
|
||||||
// this disk is a root disk.
|
|
||||||
info, err := disk.GetInfo(path)
|
info, err := disk.GetInfo(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// treat those disks with size less than or equal to the
|
var rootDisk bool
|
||||||
// threshold as rootDisks.
|
if !globalIsCICD && !globalIsErasureSD {
|
||||||
|
if globalRootDiskThreshold > 0 {
|
||||||
|
// Use MINIO_ROOTDISK_THRESHOLD_SIZE to figure out if
|
||||||
|
// this disk is a root disk. treat those disks with
|
||||||
|
// size less than or equal to the threshold as rootDisks.
|
||||||
rootDisk = info.Total <= globalRootDiskThreshold
|
rootDisk = info.Total <= globalRootDiskThreshold
|
||||||
} else {
|
} else {
|
||||||
rootDisk, err = disk.IsRootDisk(path, SlashSeparator)
|
rootDisk, err = disk.IsRootDisk(path, SlashSeparator)
|
||||||
@ -247,6 +246,12 @@ func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) {
|
|||||||
diskIndex: -1,
|
diskIndex: -1,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We stagger listings only on HDDs.
|
||||||
|
if info.Rotational == nil || *info.Rotational {
|
||||||
|
s.walkMu = &sync.Mutex{}
|
||||||
|
s.walkReadMu = &sync.Mutex{}
|
||||||
|
}
|
||||||
|
|
||||||
if cleanUp {
|
if cleanUp {
|
||||||
bgFormatErasureCleanupTmp(s.diskPath) // cleanup any old data.
|
bgFormatErasureCleanupTmp(s.diskPath) // cleanup any old data.
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,9 @@ package disk
|
|||||||
// Files - total inodes available
|
// Files - total inodes available
|
||||||
// Ffree - free inodes available
|
// Ffree - free inodes available
|
||||||
// FSType - file system type
|
// FSType - file system type
|
||||||
|
// Major - major dev id
|
||||||
|
// Minor - minor dev id
|
||||||
|
// Devname - device name
|
||||||
type Info struct {
|
type Info struct {
|
||||||
Total uint64
|
Total uint64
|
||||||
Free uint64
|
Free uint64
|
||||||
@ -32,6 +35,8 @@ type Info struct {
|
|||||||
FSType string
|
FSType string
|
||||||
Major uint32
|
Major uint32
|
||||||
Minor uint32
|
Minor uint32
|
||||||
|
Name string
|
||||||
|
Rotational *bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// DevID is the drive major and minor ids
|
// DevID is the drive major and minor ids
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
//go:build linux && !s390x && !arm && !386
|
//go:build linux && !s390x && !arm && !386
|
||||||
// +build linux,!s390x,!arm,!386
|
// +build linux,!s390x,!arm,!386
|
||||||
|
|
||||||
// Copyright (c) 2015-2021 MinIO, Inc.
|
// Copyright (c) 2015-2023 MinIO, Inc.
|
||||||
//
|
//
|
||||||
// This file is part of MinIO Object Storage stack
|
// This file is part of MinIO Object Storage stack
|
||||||
//
|
//
|
||||||
@ -28,6 +28,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
|
"github.com/prometheus/procfs/blockdevice"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -47,14 +48,6 @@ func GetInfo(path string) (info Info, err error) {
|
|||||||
//nolint:unconvert
|
//nolint:unconvert
|
||||||
FSType: getFSType(int64(s.Type)),
|
FSType: getFSType(int64(s.Type)),
|
||||||
}
|
}
|
||||||
// Check for overflows.
|
|
||||||
// https://github.com/minio/minio/issues/8035
|
|
||||||
// XFS can show wrong values at times error out
|
|
||||||
// in such scenarios.
|
|
||||||
if info.Free > info.Total {
|
|
||||||
return info, fmt.Errorf("detected free space (%d) > total drive space (%d), fs corruption at (%s). please run 'fsck'", info.Free, info.Total, path)
|
|
||||||
}
|
|
||||||
info.Used = info.Total - info.Free
|
|
||||||
|
|
||||||
st := syscall.Stat_t{}
|
st := syscall.Stat_t{}
|
||||||
err = syscall.Stat(path, &st)
|
err = syscall.Stat(path, &st)
|
||||||
@ -65,6 +58,37 @@ func GetInfo(path string) (info Info, err error) {
|
|||||||
devID := uint64(st.Dev) // Needed to support multiple GOARCHs
|
devID := uint64(st.Dev) // Needed to support multiple GOARCHs
|
||||||
info.Major = unix.Major(devID)
|
info.Major = unix.Major(devID)
|
||||||
info.Minor = unix.Minor(devID)
|
info.Minor = unix.Minor(devID)
|
||||||
|
|
||||||
|
// Check for overflows.
|
||||||
|
// https://github.com/minio/minio/issues/8035
|
||||||
|
// XFS can show wrong values at times error out
|
||||||
|
// in such scenarios.
|
||||||
|
if info.Free > info.Total {
|
||||||
|
return info, fmt.Errorf("detected free space (%d) > total drive space (%d), fs corruption at (%s). please run 'fsck'", info.Free, info.Total, path)
|
||||||
|
}
|
||||||
|
info.Used = info.Total - info.Free
|
||||||
|
|
||||||
|
bfs, err := blockdevice.NewDefaultFS()
|
||||||
|
if err == nil {
|
||||||
|
diskstats, _ := bfs.ProcDiskstats()
|
||||||
|
for _, dstat := range diskstats {
|
||||||
|
// ignore all loop devices
|
||||||
|
if strings.HasPrefix(dstat.DeviceName, "loop") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
qst, err := bfs.SysBlockDeviceQueueStats(dstat.DeviceName)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
rot := qst.Rotational == 1 // Rotational is '1' if the device is HDD
|
||||||
|
if dstat.MajorNumber == info.Major && dstat.MinorNumber == info.Minor {
|
||||||
|
info.Name = dstat.DeviceName
|
||||||
|
info.Rotational = &rot
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return info, nil
|
return info, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user