Add local disk health checks (#14447)

The main goal of this PR is to solve the situation where disks stop 
responding to operations. This generally causes an FD build-up and 
eventually will crash the server.

This adds detection of hung disks, where calls on disk get stuck.

We add functionality to `xlStorageDiskIDCheck` where it keeps 
track of the number of concurrent requests on a given disk.

A total number of 100 operations are allowed. If this limit is reached 
we will block (but not reject) new requests, but we will monitor the 
state of the disk.

If no requests have been completed or updated within a 15-second 
window, we mark the disk as offline. Requests that are blocked will be 
unblocked and return an error as "faulty disk".

New requests will be rejected until the disk is marked OK again.

Once a disk has been marked faulty, a check will run every 5 seconds that 
will attempt to write and read back a file. As long as this fails the disk will 
remain faulty.

To prevent lots of long-running requests to mark the disk faulty we 
implement a callback feature that allows updating the status as parts 
of these operations are running.

We add a reader and writer wrapper that will update the status of each 
successful read/write operation. This should allow fine enough granularity 
that a slow, but still operational disk will not reach 15 seconds where 
50 operations have not progressed.

Note that errors themselves are not enough to mark a disk faulty. 
A nil (or io.EOF) error will mark a disk as "good".

* Make concurrent disk setting configurable via `_MINIO_DISK_MAX_CONCURRENT`.

* de-couple IsOnline() from disk health tracker

The purpose of IsOnline() is to ensure that we
reconnect the drive only when the "drive" was

- disconnected from network we need to validate
  if the drive is "correct" and is the same drive
  which belongs to this server.

- drive was replaced we have to format it - we
  support hot swapping of the drives.

IsOnline() is not meant for taking the drive offline
when it is hung, it is not useful we can let the
drive be online instead "return" errors for relevant
calls.

* return errFaultyDisk for DiskInfo() call

Co-authored-by: Harshavardhana <harsha@minio.io>

Possible future Improvements:

* Unify the REST server and local xlStorageDiskIDCheck. This would also improve stats significantly.
* Allow reads/writes to be aborted by the context.
* Add usage stats, concurrent count, blocked operations, etc.
This commit is contained in:
Klaus Post
2022-03-09 11:38:54 -08:00
committed by GitHub
parent 0e3a570b85
commit b890bbfa63
7 changed files with 478 additions and 200 deletions

View File

@@ -683,9 +683,11 @@ func (s *xlStorage) SetDiskID(id string) {
func (s *xlStorage) MakeVolBulk(ctx context.Context, volumes ...string) error {
for _, volume := range volumes {
if err := s.MakeVol(ctx, volume); err != nil && !errors.Is(err, errVolumeExists) {
err := s.MakeVol(ctx, volume)
if err != nil && !errors.Is(err, errVolumeExists) {
return err
}
diskHealthCheckOK(ctx, err)
}
return nil
}
@@ -943,6 +945,7 @@ func (s *xlStorage) DeleteVersions(ctx context.Context, volume string, versions
if err := s.deleteVersions(ctx, volume, fiv.Name, fiv.Versions...); err != nil {
errs[i] = err
}
diskHealthCheckOK(ctx, errs[i])
}
return errs
@@ -1382,7 +1385,7 @@ func (s *xlStorage) readAllData(ctx context.Context, volumeDir string, filePath
buf = make([]byte, sz)
}
// Read file...
_, err = io.ReadFull(r, buf)
_, err = io.ReadFull(diskHealthReader(ctx, r), buf)
return buf, stat.ModTime().UTC(), osErrToFileErr(err)
}
@@ -1693,7 +1696,7 @@ func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, off
r := struct {
io.Reader
io.Closer
}{Reader: io.LimitReader(or, length), Closer: closeWrapper(func() error {
}{Reader: io.LimitReader(diskHealthReader(ctx, or), length), Closer: closeWrapper(func() error {
if !alignment || offset+length%xioutil.DirectioAlignSize != 0 {
// invalidate page-cache for unaligned reads.
if !globalAPIConfig.isDisableODirect() {
@@ -1792,7 +1795,7 @@ func (s *xlStorage) CreateFile(ctx context.Context, volume, path string, fileSiz
defer xioutil.ODirectPoolLarge.Put(bufp)
}
written, err := xioutil.CopyAligned(w, r, *bufp, fileSize)
written, err := xioutil.CopyAligned(diskHealthWriter(ctx, w), r, *bufp, fileSize, w)
if err != nil {
return err
}
@@ -2269,6 +2272,7 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
}
return osErrToFileErr(err)
}
diskHealthCheckOK(ctx, err)
// renameAll only for objects that have xl.meta not saved inline.
if len(fi.Data) == 0 && fi.Size > 0 {
@@ -2406,7 +2410,7 @@ func (s *xlStorage) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolum
return nil
}
func (s *xlStorage) bitrotVerify(partPath string, partSize int64, algo BitrotAlgorithm, sum []byte, shardSize int64) error {
func (s *xlStorage) bitrotVerify(ctx context.Context, partPath string, partSize int64, algo BitrotAlgorithm, sum []byte, shardSize int64) error {
// Open the file for reading.
file, err := Open(partPath)
if err != nil {
@@ -2421,7 +2425,7 @@ func (s *xlStorage) bitrotVerify(partPath string, partSize int64, algo BitrotAlg
// for healing code to fix this file.
return err
}
return bitrotVerify(file, fi.Size(), partSize, algo, sum, shardSize)
return bitrotVerify(diskHealthReader(ctx, file), fi.Size(), partSize, algo, sum, shardSize)
}
func (s *xlStorage) VerifyFile(ctx context.Context, volume, path string, fi FileInfo) (err error) {
@@ -2446,7 +2450,7 @@ func (s *xlStorage) VerifyFile(ctx context.Context, volume, path string, fi File
for _, part := range fi.Parts {
checksumInfo := erasure.GetChecksumInfo(part.Number)
partPath := pathJoin(volumeDir, path, fi.DataDir, fmt.Sprintf("part.%d", part.Number))
if err := s.bitrotVerify(partPath,
if err := s.bitrotVerify(ctx, partPath,
erasure.ShardFileSize(part.Size),
checksumInfo.Algorithm,
checksumInfo.Hash, erasure.ShardSize()); err != nil {