From b18c0478e72bf46b2dbe80ef51c6859bddeee2c0 Mon Sep 17 00:00:00 2001 From: Krishna Srinivas <634494+krishnasrinivas@users.noreply.github.com> Date: Wed, 30 Jan 2019 10:53:57 -0800 Subject: [PATCH] Only heal on disks where we are sure that healing is needed (#7148) --- cmd/storage-rest-client.go | 12 ++++++++++++ cmd/xl-v1-healing.go | 40 ++++++++++++++++++++++++++++---------- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/cmd/storage-rest-client.go b/cmd/storage-rest-client.go index 221c9c8c2..1184a58d5 100644 --- a/cmd/storage-rest-client.go +++ b/cmd/storage-rest-client.go @@ -29,6 +29,9 @@ import ( "encoding/gob" "encoding/hex" + "fmt" + "strings" + "github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/rest" xnet "github.com/minio/minio/pkg/net" @@ -101,6 +104,15 @@ func toStorageErr(err error) error { case errServerTimeMismatch.Error(): return errServerTimeMismatch } + if strings.Contains(err.Error(), "Bitrot verification mismatch") { + var expected string + var received string + fmt.Sscanf(err.Error(), "Bitrot verification mismatch - expected %s received %s", &expected, &received) + // Go's Sscanf %s scans "," that comes after the expected hash, hence remove it. Providing "," in the format string does not help. + expected = strings.TrimSuffix(expected, ",") + bitrotErr := hashMismatchError{expected, received} + return bitrotErr + } return err } diff --git a/cmd/xl-v1-healing.go b/cmd/xl-v1-healing.go index 879cbe244..3ea0ead9a 100644 --- a/cmd/xl-v1-healing.go +++ b/cmd/xl-v1-healing.go @@ -22,6 +22,7 @@ import ( "io" "path" "sync" + "time" "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/madmin" @@ -237,6 +238,30 @@ func listAllBuckets(storageDisks []StorageAPI) (buckets map[string]VolInfo, return buckets, bucketsOcc, nil } +// Only heal on disks where we are sure that healing is needed. We can expand +// this list as and when we figure out more errors can be added to this list safely. +func shouldHealObjectOnDisk(xlErr, dataErr error, meta xlMetaV1, quorumModTime time.Time) bool { + switch xlErr { + case errFileNotFound: + return true + case errCorruptedFormat: + return true + } + if xlErr == nil { + // If xl.json was read fine but there is some problem with the part.N files. + if dataErr == errFileNotFound { + return true + } + if _, ok := dataErr.(hashMismatchError); ok { + return true + } + if quorumModTime != meta.Stat.ModTime { + return true + } + } + return false +} + // Heals an object by re-writing corrupt/missing erasure blocks. func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, object string, quorum int, dryRun bool) (result madmin.HealResultItem, err error) { @@ -305,17 +330,13 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o driveState = madmin.DriveStateCorrupt } - // an online disk without valid data/metadata is - // outdated and can be healed. - if errs[i] != errDiskNotFound && v == nil { + var drive string + if storageDisks[i] != nil { + drive = storageDisks[i].String() + } + if shouldHealObjectOnDisk(errs[i], dataErrs[i], partsMetadata[i], modTime) { outDatedDisks[i] = storageDisks[i] disksToHealCount++ - } - var drive string - if v == nil { - if errs[i] != errDiskNotFound { - drive = outDatedDisks[i].String() - } result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{ UUID: "", Endpoint: drive, @@ -328,7 +349,6 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o }) continue } - drive = v.String() result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{ UUID: "", Endpoint: drive,