Only heal on disks where we are sure that healing is needed (#7148)

This commit is contained in:
Krishna Srinivas 2019-01-30 10:53:57 -08:00 committed by kannappanr
parent 2d9860e875
commit b18c0478e7
2 changed files with 42 additions and 10 deletions

View File

@ -29,6 +29,9 @@ import (
"encoding/gob" "encoding/gob"
"encoding/hex" "encoding/hex"
"fmt"
"strings"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/cmd/rest" "github.com/minio/minio/cmd/rest"
xnet "github.com/minio/minio/pkg/net" xnet "github.com/minio/minio/pkg/net"
@ -101,6 +104,15 @@ func toStorageErr(err error) error {
case errServerTimeMismatch.Error(): case errServerTimeMismatch.Error():
return errServerTimeMismatch return errServerTimeMismatch
} }
if strings.Contains(err.Error(), "Bitrot verification mismatch") {
var expected string
var received string
fmt.Sscanf(err.Error(), "Bitrot verification mismatch - expected %s received %s", &expected, &received)
// Go's Sscanf %s scans "," that comes after the expected hash, hence remove it. Providing "," in the format string does not help.
expected = strings.TrimSuffix(expected, ",")
bitrotErr := hashMismatchError{expected, received}
return bitrotErr
}
return err return err
} }

View File

@ -22,6 +22,7 @@ import (
"io" "io"
"path" "path"
"sync" "sync"
"time"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/madmin" "github.com/minio/minio/pkg/madmin"
@ -237,6 +238,30 @@ func listAllBuckets(storageDisks []StorageAPI) (buckets map[string]VolInfo,
return buckets, bucketsOcc, nil return buckets, bucketsOcc, nil
} }
// Only heal on disks where we are sure that healing is needed. We can expand
// this list as and when we figure out more errors can be added to this list safely.
func shouldHealObjectOnDisk(xlErr, dataErr error, meta xlMetaV1, quorumModTime time.Time) bool {
switch xlErr {
case errFileNotFound:
return true
case errCorruptedFormat:
return true
}
if xlErr == nil {
// If xl.json was read fine but there is some problem with the part.N files.
if dataErr == errFileNotFound {
return true
}
if _, ok := dataErr.(hashMismatchError); ok {
return true
}
if quorumModTime != meta.Stat.ModTime {
return true
}
}
return false
}
// Heals an object by re-writing corrupt/missing erasure blocks. // Heals an object by re-writing corrupt/missing erasure blocks.
func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, object string, func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, object string,
quorum int, dryRun bool) (result madmin.HealResultItem, err error) { quorum int, dryRun bool) (result madmin.HealResultItem, err error) {
@ -305,17 +330,13 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
driveState = madmin.DriveStateCorrupt driveState = madmin.DriveStateCorrupt
} }
// an online disk without valid data/metadata is var drive string
// outdated and can be healed. if storageDisks[i] != nil {
if errs[i] != errDiskNotFound && v == nil { drive = storageDisks[i].String()
}
if shouldHealObjectOnDisk(errs[i], dataErrs[i], partsMetadata[i], modTime) {
outDatedDisks[i] = storageDisks[i] outDatedDisks[i] = storageDisks[i]
disksToHealCount++ disksToHealCount++
}
var drive string
if v == nil {
if errs[i] != errDiskNotFound {
drive = outDatedDisks[i].String()
}
result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{ result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{
UUID: "", UUID: "",
Endpoint: drive, Endpoint: drive,
@ -328,7 +349,6 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
}) })
continue continue
} }
drive = v.String()
result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{ result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{
UUID: "", UUID: "",
Endpoint: drive, Endpoint: drive,