From c27ece409bdac12ffa5ef4d10a2a0131ef038417 Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Fri, 24 Mar 2017 21:10:44 +0530 Subject: [PATCH] heal: Check if all parts are available and valid (#3967) In the algorithm to check if an object requires healing, in addition to checking if all disks have xl.json present we should check if all parts of the object are present and have valid blake2b checksums. Also fixed a minor compilation error in heal-objects-list.go. --- cmd/xl-v1-healing-common.go | 43 ++++++++++++++++++------ cmd/xl-v1-healing.go | 2 +- cmd/xl-v1-list-objects-heal.go | 6 ++-- pkg/madmin/examples/heal-objects-list.go | 4 +-- 4 files changed, 39 insertions(+), 16 deletions(-) diff --git a/cmd/xl-v1-healing-common.go b/cmd/xl-v1-healing-common.go index d1ef3335e..acd105cf6 100644 --- a/cmd/xl-v1-healing-common.go +++ b/cmd/xl-v1-healing-common.go @@ -141,19 +141,38 @@ func outDatedDisks(disks, latestDisks []StorageAPI, errs []error, partsMetadata } // Returns if the object should be healed. -func xlShouldHeal(partsMetadata []xlMetaV1, errs []error) bool { - modTime, _ := commonTime(listObjectModtimes(partsMetadata, errs)) - for index := range partsMetadata { - if errs[index] == errDiskNotFound { - continue - } - if errs[index] != nil { - return true - } - if modTime != partsMetadata[index].Stat.ModTime { +func xlShouldHeal(disks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket, object string) bool { + onlineDisks, _ := listOnlineDisks(disks, partsMetadata, + errs) + // Return true even if one of the disks have stale data. + for _, disk := range onlineDisks { + if disk == nil { return true } } + + // Check if all parts of an object are available and their + // checksums are valid. + availableDisks, _, err := disksWithAllParts(onlineDisks, partsMetadata, + errs, bucket, object) + if err != nil { + // Note: This error is due to failure of blake2b + // checksum computation of a part. It doesn't clearly + // indicate if the object needs healing. At this + // juncture healing could fail with the same + // error. So, we choose to return that there is no + // need to heal. + return false + } + + // Return true even if one disk has xl.json or one or more + // parts missing. + for _, disk := range availableDisks { + if disk == nil { + return true + } + } + return false } @@ -232,7 +251,8 @@ func disksWithAllParts(onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs blakeBytes, hErr := hashSum(onlineDisk, bucket, partPath, hash) if hErr == errFileNotFound { errs[index] = errFileNotFound - continue + availableDisks[index] = nil + break } if hErr != nil && hErr != errFileNotFound { @@ -246,6 +266,7 @@ func disksWithAllParts(onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs // healing. if blakeSum != partChecksum { errs[index] = errFileNotFound + availableDisks[index] = nil break } availableDisks[index] = onlineDisk diff --git a/cmd/xl-v1-healing.go b/cmd/xl-v1-healing.go index c2a833775..c4d62479f 100644 --- a/cmd/xl-v1-healing.go +++ b/cmd/xl-v1-healing.go @@ -322,7 +322,7 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum return toObjectErr(reducedErr, bucket, object) } - if !xlShouldHeal(partsMetadata, errs) { + if !xlShouldHeal(storageDisks, partsMetadata, errs, bucket, object) { // There is nothing to heal. return nil } diff --git a/cmd/xl-v1-list-objects-heal.go b/cmd/xl-v1-list-objects-heal.go index 949e95c44..029a2ce8a 100644 --- a/cmd/xl-v1-list-objects-heal.go +++ b/cmd/xl-v1-list-objects-heal.go @@ -144,7 +144,7 @@ func (xl xlObjects) listObjectsHeal(bucket, prefix, marker, delimiter string, ma objectLock := globalNSMutex.NewNSLock(bucket, objInfo.Name) objectLock.RLock() partsMetadata, errs := readAllXLMetadata(xl.storageDisks, bucket, objInfo.Name) - if xlShouldHeal(partsMetadata, errs) { + if xlShouldHeal(xl.storageDisks, partsMetadata, errs, bucket, objInfo.Name) { healStat := xlHealStat(xl, partsMetadata, errs) result.Objects = append(result.Objects, ObjectInfo{ Name: objInfo.Name, @@ -377,7 +377,9 @@ func (xl xlObjects) listMultipartUploadsHeal(bucket, prefix, keyMarker, uploadIDPath := filepath.Join(bucket, upload.Object, upload.UploadID) partsMetadata, errs := readAllXLMetadata(xl.storageDisks, minioMetaMultipartBucket, uploadIDPath) - if xlShouldHeal(partsMetadata, errs) { + if xlShouldHeal(xl.storageDisks, partsMetadata, errs, + minioMetaMultipartBucket, uploadIDPath) { + healUploadInfo := xlHealStat(xl, partsMetadata, errs) upload.HealUploadInfo = &healUploadInfo result.Uploads = append(result.Uploads, upload) diff --git a/pkg/madmin/examples/heal-objects-list.go b/pkg/madmin/examples/heal-objects-list.go index 87a34f7fd..b2ccc4651 100644 --- a/pkg/madmin/examples/heal-objects-list.go +++ b/pkg/madmin/examples/heal-objects-list.go @@ -61,8 +61,8 @@ func main() { return } - if object.HealInfo != nil { - switch healInfo := *object.HealInfo; healInfo.Status { + if object.HealObjectInfo != nil { + switch healInfo := *object.HealObjectInfo; healInfo.Status { case madmin.CanHeal: fmt.Println(object.Key, " can be healed.") case madmin.QuorumUnavailable: