Optimize healObject by eliminating extra data passes (#4949)

This commit is contained in:
Aditya Manthramurthy
2017-09-29 04:27:19 +05:30
committed by Dee Koder
parent 94670a387e
commit 4c9fae90ff
4 changed files with 144 additions and 117 deletions

View File

@@ -333,25 +333,24 @@ func quickHeal(storageDisks []StorageAPI, writeQuorum int, readQuorum int) error
}
// Heals an object only the corrupted/missing erasure blocks.
func healObject(storageDisks []StorageAPI, bucket string, object string, quorum int) (int, int, error) {
func healObject(storageDisks []StorageAPI, bucket, object string, quorum int) (int, int, error) {
partsMetadata, errs := readAllXLMetadata(storageDisks, bucket, object)
// readQuorum suffices for xl.json since we use monotonic
// system time to break the tie when a split-brain situation
// arises.
if reducedErr := reduceReadQuorumErrs(errs, nil, quorum); reducedErr != nil {
return 0, 0, toObjectErr(reducedErr, bucket, object)
}
if !xlShouldHeal(storageDisks, partsMetadata, errs, bucket, object) {
// There is nothing to heal.
return 0, 0, nil
if rErr := reduceReadQuorumErrs(errs, nil, quorum); rErr != nil {
return 0, 0, toObjectErr(rErr, bucket, object)
}
// List of disks having latest version of the object.
latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs)
// List of disks having all parts as per latest xl.json.
availableDisks, errs, aErr := disksWithAllParts(latestDisks, partsMetadata, errs, bucket, object)
// List of disks having all parts as per latest xl.json - this
// does a full pass over the data and verifies all part files
// on disk
availableDisks, errs, aErr := disksWithAllParts(latestDisks, partsMetadata, errs, bucket,
object)
if aErr != nil {
return 0, 0, toObjectErr(aErr, bucket, object)
}
@@ -359,8 +358,7 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
// Number of disks which don't serve data.
numOfflineDisks := 0
for index, disk := range storageDisks {
switch {
case disk == nil, errs[index] == errDiskNotFound:
if disk == nil || errs[index] == errDiskNotFound {
numOfflineDisks++
}
}
@@ -368,12 +366,16 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
// Number of disks which have all parts of the given object.
numAvailableDisks := 0
for _, disk := range availableDisks {
switch {
case disk != nil:
if disk != nil {
numAvailableDisks++
}
}
if numAvailableDisks == len(storageDisks) {
// nothing to heal in this case
return 0, 0, nil
}
// If less than read quorum number of disks have all the parts
// of the data, we can't reconstruct the erasure-coded data.
if numAvailableDisks < quorum {
@@ -381,8 +383,8 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
}
// List of disks having outdated version of the object or missing object.
outDatedDisks := outDatedDisks(storageDisks, availableDisks, errs, partsMetadata,
bucket, object)
outDatedDisks := outDatedDisks(storageDisks, availableDisks, errs, partsMetadata, bucket,
object)
// Number of disks that had outdated content of the given
// object and are online to be healed.
@@ -401,9 +403,10 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
}
for index, disk := range outDatedDisks {
// Before healing outdated disks, we need to remove xl.json
// and part files from "bucket/object/" so that
// rename(minioMetaBucket, "tmp/tmpuuid/", "bucket", "object/") succeeds.
// Before healing outdated disks, we need to remove
// xl.json and part files from "bucket/object/" so
// that rename(minioMetaBucket, "tmp/tmpuuid/",
// "bucket", "object/") succeeds.
if disk == nil {
// Not an outdated disk.
continue
@@ -417,27 +420,15 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
continue
}
// Outdated object with the same name exists that needs to be deleted.
outDatedMeta := partsMetadata[index]
// Consult valid metadata picked when there is no
// metadata available on this disk.
if isErr(errs[index], errFileNotFound) {
outDatedMeta = latestMeta
}
// Delete all the parts. Ignore if parts are not found.
for _, part := range outDatedMeta.Parts {
dErr := disk.DeleteFile(bucket, pathJoin(object, part.Name))
if dErr != nil && !isErr(dErr, errFileNotFound) {
return 0, 0, toObjectErr(traceError(dErr), bucket, object)
// List and delete the object directory, ignoring
// errors.
files, err := disk.ListDir(bucket, object)
if err == nil {
for _, entry := range files {
_ = disk.DeleteFile(bucket,
pathJoin(object, entry))
}
}
// Delete xl.json file. Ignore if xl.json not found.
dErr := disk.DeleteFile(bucket, pathJoin(object, xlMetaJSONFile))
if dErr != nil && !isErr(dErr, errFileNotFound) {
return 0, 0, toObjectErr(traceError(dErr), bucket, object)
}
}
// Reorder so that we have data disks first and parity disks next.
@@ -445,16 +436,19 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
outDatedDisks = shuffleDisks(outDatedDisks, latestMeta.Erasure.Distribution)
partsMetadata = shufflePartsMetadata(partsMetadata, latestMeta.Erasure.Distribution)
// We write at temporary location and then rename to fianal location.
// We write at temporary location and then rename to final location.
tmpID := mustGetUUID()
// Checksum of the part files. checkSumInfos[index] will contain checksums
// of all the part files in the outDatedDisks[index]
// Checksum of the part files. checkSumInfos[index] will
// contain checksums of all the part files in the
// outDatedDisks[index]
checksumInfos := make([][]ChecksumInfo, len(outDatedDisks))
// Heal each part. erasureHealFile() will write the healed part to
// .minio/tmp/uuid/ which needs to be renamed later to the final location.
storage, err := NewErasureStorage(latestDisks, latestMeta.Erasure.DataBlocks, latestMeta.Erasure.ParityBlocks)
// Heal each part. erasureHealFile() will write the healed
// part to .minio/tmp/uuid/ which needs to be renamed later to
// the final location.
storage, err := NewErasureStorage(latestDisks,
latestMeta.Erasure.DataBlocks, latestMeta.Erasure.ParityBlocks)
if err != nil {
return 0, 0, toObjectErr(err, bucket, object)
}
@@ -472,14 +466,33 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
}
}
// Heal the part file.
file, hErr := storage.HealFile(outDatedDisks, bucket, pathJoin(object, partName), erasure.BlockSize, minioMetaTmpBucket, pathJoin(tmpID, partName), partSize, algorithm, checksums)
file, hErr := storage.HealFile(outDatedDisks, bucket, pathJoin(object, partName),
erasure.BlockSize, minioMetaTmpBucket, pathJoin(tmpID, partName), partSize,
algorithm, checksums)
if hErr != nil {
return 0, 0, toObjectErr(hErr, bucket, object)
}
for i := range outDatedDisks {
if outDatedDisks[i] != OfflineDisk {
checksumInfos[i] = append(checksumInfos[i], ChecksumInfo{partName, file.Algorithm, file.Checksums[i]})
// outDatedDisks that had write errors should not be
// written to for remaining parts, so we nil it out.
for i, disk := range outDatedDisks {
if disk == nil {
continue
}
// A non-nil stale disk which did not receive
// a healed part checksum had a write error.
if file.Checksums[i] == nil {
outDatedDisks[i] = nil
numHealedDisks--
continue
}
// append part checksums
checksumInfos[i] = append(checksumInfos[i],
ChecksumInfo{partName, file.Algorithm, file.Checksums[i]})
}
// If all disks are having errors, we give up.
if numHealedDisks == 0 {
return 0, 0, fmt.Errorf("all disks without up-to-date data had write errors")
}
}
@@ -493,7 +506,8 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
}
// Generate and write `xl.json` generated from other disks.
outDatedDisks, aErr = writeUniqueXLMetadata(outDatedDisks, minioMetaTmpBucket, tmpID, partsMetadata, diskCount(outDatedDisks))
outDatedDisks, aErr = writeUniqueXLMetadata(outDatedDisks, minioMetaTmpBucket, tmpID,
partsMetadata, diskCount(outDatedDisks))
if aErr != nil {
return 0, 0, toObjectErr(aErr, bucket, object)
}
@@ -503,13 +517,10 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
if disk == nil {
continue
}
// Remove any lingering partial data from current namespace.
aErr = disk.DeleteFile(bucket, retainSlash(object))
if aErr != nil && aErr != errFileNotFound {
return 0, 0, toObjectErr(traceError(aErr), bucket, object)
}
// Attempt a rename now from healed data to final location.
aErr = disk.RenameFile(minioMetaTmpBucket, retainSlash(tmpID), bucket, retainSlash(object))
aErr = disk.RenameFile(minioMetaTmpBucket, retainSlash(tmpID), bucket,
retainSlash(object))
if aErr != nil {
return 0, 0, toObjectErr(traceError(aErr), bucket, object)
}