heal: Check if all parts are available and valid (#3967)

In the algorithm to check if an object requires healing, in addition to
checking if all disks have xl.json present we should check if all parts
of the object are present and have valid blake2b checksums.

Also fixed a minor compilation error in heal-objects-list.go.
This commit is contained in:
Krishnan Parthasarathi 2017-03-24 21:10:44 +05:30 committed by Harshavardhana
parent 8c0ce2fee9
commit c27ece409b
4 changed files with 39 additions and 16 deletions

View File

@ -141,19 +141,38 @@ func outDatedDisks(disks, latestDisks []StorageAPI, errs []error, partsMetadata
}
// Returns if the object should be healed.
func xlShouldHeal(partsMetadata []xlMetaV1, errs []error) bool {
modTime, _ := commonTime(listObjectModtimes(partsMetadata, errs))
for index := range partsMetadata {
if errs[index] == errDiskNotFound {
continue
}
if errs[index] != nil {
return true
}
if modTime != partsMetadata[index].Stat.ModTime {
func xlShouldHeal(disks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket, object string) bool {
onlineDisks, _ := listOnlineDisks(disks, partsMetadata,
errs)
// Return true even if one of the disks have stale data.
for _, disk := range onlineDisks {
if disk == nil {
return true
}
}
// Check if all parts of an object are available and their
// checksums are valid.
availableDisks, _, err := disksWithAllParts(onlineDisks, partsMetadata,
errs, bucket, object)
if err != nil {
// Note: This error is due to failure of blake2b
// checksum computation of a part. It doesn't clearly
// indicate if the object needs healing. At this
// juncture healing could fail with the same
// error. So, we choose to return that there is no
// need to heal.
return false
}
// Return true even if one disk has xl.json or one or more
// parts missing.
for _, disk := range availableDisks {
if disk == nil {
return true
}
}
return false
}
@ -232,7 +251,8 @@ func disksWithAllParts(onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs
blakeBytes, hErr := hashSum(onlineDisk, bucket, partPath, hash)
if hErr == errFileNotFound {
errs[index] = errFileNotFound
continue
availableDisks[index] = nil
break
}
if hErr != nil && hErr != errFileNotFound {
@ -246,6 +266,7 @@ func disksWithAllParts(onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs
// healing.
if blakeSum != partChecksum {
errs[index] = errFileNotFound
availableDisks[index] = nil
break
}
availableDisks[index] = onlineDisk

View File

@ -322,7 +322,7 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
return toObjectErr(reducedErr, bucket, object)
}
if !xlShouldHeal(partsMetadata, errs) {
if !xlShouldHeal(storageDisks, partsMetadata, errs, bucket, object) {
// There is nothing to heal.
return nil
}

View File

@ -144,7 +144,7 @@ func (xl xlObjects) listObjectsHeal(bucket, prefix, marker, delimiter string, ma
objectLock := globalNSMutex.NewNSLock(bucket, objInfo.Name)
objectLock.RLock()
partsMetadata, errs := readAllXLMetadata(xl.storageDisks, bucket, objInfo.Name)
if xlShouldHeal(partsMetadata, errs) {
if xlShouldHeal(xl.storageDisks, partsMetadata, errs, bucket, objInfo.Name) {
healStat := xlHealStat(xl, partsMetadata, errs)
result.Objects = append(result.Objects, ObjectInfo{
Name: objInfo.Name,
@ -377,7 +377,9 @@ func (xl xlObjects) listMultipartUploadsHeal(bucket, prefix, keyMarker,
uploadIDPath := filepath.Join(bucket, upload.Object, upload.UploadID)
partsMetadata, errs := readAllXLMetadata(xl.storageDisks,
minioMetaMultipartBucket, uploadIDPath)
if xlShouldHeal(partsMetadata, errs) {
if xlShouldHeal(xl.storageDisks, partsMetadata, errs,
minioMetaMultipartBucket, uploadIDPath) {
healUploadInfo := xlHealStat(xl, partsMetadata, errs)
upload.HealUploadInfo = &healUploadInfo
result.Uploads = append(result.Uploads, upload)

View File

@ -61,8 +61,8 @@ func main() {
return
}
if object.HealInfo != nil {
switch healInfo := *object.HealInfo; healInfo.Status {
if object.HealObjectInfo != nil {
switch healInfo := *object.HealObjectInfo; healInfo.Status {
case madmin.CanHeal:
fmt.Println(object.Key, " can be healed.")
case madmin.QuorumUnavailable: