mirror of
https://github.com/minio/minio.git
synced 2025-11-09 13:39:46 -05:00
Optimize healObject by eliminating extra data passes (#4949)
This commit is contained in:
committed by
Dee Koder
parent
94670a387e
commit
4c9fae90ff
@@ -333,25 +333,24 @@ func quickHeal(storageDisks []StorageAPI, writeQuorum int, readQuorum int) error
|
||||
}
|
||||
|
||||
// Heals an object only the corrupted/missing erasure blocks.
|
||||
func healObject(storageDisks []StorageAPI, bucket string, object string, quorum int) (int, int, error) {
|
||||
func healObject(storageDisks []StorageAPI, bucket, object string, quorum int) (int, int, error) {
|
||||
|
||||
partsMetadata, errs := readAllXLMetadata(storageDisks, bucket, object)
|
||||
// readQuorum suffices for xl.json since we use monotonic
|
||||
// system time to break the tie when a split-brain situation
|
||||
// arises.
|
||||
if reducedErr := reduceReadQuorumErrs(errs, nil, quorum); reducedErr != nil {
|
||||
return 0, 0, toObjectErr(reducedErr, bucket, object)
|
||||
}
|
||||
|
||||
if !xlShouldHeal(storageDisks, partsMetadata, errs, bucket, object) {
|
||||
// There is nothing to heal.
|
||||
return 0, 0, nil
|
||||
if rErr := reduceReadQuorumErrs(errs, nil, quorum); rErr != nil {
|
||||
return 0, 0, toObjectErr(rErr, bucket, object)
|
||||
}
|
||||
|
||||
// List of disks having latest version of the object.
|
||||
latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs)
|
||||
|
||||
// List of disks having all parts as per latest xl.json.
|
||||
availableDisks, errs, aErr := disksWithAllParts(latestDisks, partsMetadata, errs, bucket, object)
|
||||
// List of disks having all parts as per latest xl.json - this
|
||||
// does a full pass over the data and verifies all part files
|
||||
// on disk
|
||||
availableDisks, errs, aErr := disksWithAllParts(latestDisks, partsMetadata, errs, bucket,
|
||||
object)
|
||||
if aErr != nil {
|
||||
return 0, 0, toObjectErr(aErr, bucket, object)
|
||||
}
|
||||
@@ -359,8 +358,7 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
|
||||
// Number of disks which don't serve data.
|
||||
numOfflineDisks := 0
|
||||
for index, disk := range storageDisks {
|
||||
switch {
|
||||
case disk == nil, errs[index] == errDiskNotFound:
|
||||
if disk == nil || errs[index] == errDiskNotFound {
|
||||
numOfflineDisks++
|
||||
}
|
||||
}
|
||||
@@ -368,12 +366,16 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
|
||||
// Number of disks which have all parts of the given object.
|
||||
numAvailableDisks := 0
|
||||
for _, disk := range availableDisks {
|
||||
switch {
|
||||
case disk != nil:
|
||||
if disk != nil {
|
||||
numAvailableDisks++
|
||||
}
|
||||
}
|
||||
|
||||
if numAvailableDisks == len(storageDisks) {
|
||||
// nothing to heal in this case
|
||||
return 0, 0, nil
|
||||
}
|
||||
|
||||
// If less than read quorum number of disks have all the parts
|
||||
// of the data, we can't reconstruct the erasure-coded data.
|
||||
if numAvailableDisks < quorum {
|
||||
@@ -381,8 +383,8 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
|
||||
}
|
||||
|
||||
// List of disks having outdated version of the object or missing object.
|
||||
outDatedDisks := outDatedDisks(storageDisks, availableDisks, errs, partsMetadata,
|
||||
bucket, object)
|
||||
outDatedDisks := outDatedDisks(storageDisks, availableDisks, errs, partsMetadata, bucket,
|
||||
object)
|
||||
|
||||
// Number of disks that had outdated content of the given
|
||||
// object and are online to be healed.
|
||||
@@ -401,9 +403,10 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
|
||||
}
|
||||
|
||||
for index, disk := range outDatedDisks {
|
||||
// Before healing outdated disks, we need to remove xl.json
|
||||
// and part files from "bucket/object/" so that
|
||||
// rename(minioMetaBucket, "tmp/tmpuuid/", "bucket", "object/") succeeds.
|
||||
// Before healing outdated disks, we need to remove
|
||||
// xl.json and part files from "bucket/object/" so
|
||||
// that rename(minioMetaBucket, "tmp/tmpuuid/",
|
||||
// "bucket", "object/") succeeds.
|
||||
if disk == nil {
|
||||
// Not an outdated disk.
|
||||
continue
|
||||
@@ -417,27 +420,15 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
|
||||
continue
|
||||
}
|
||||
|
||||
// Outdated object with the same name exists that needs to be deleted.
|
||||
outDatedMeta := partsMetadata[index]
|
||||
// Consult valid metadata picked when there is no
|
||||
// metadata available on this disk.
|
||||
if isErr(errs[index], errFileNotFound) {
|
||||
outDatedMeta = latestMeta
|
||||
}
|
||||
|
||||
// Delete all the parts. Ignore if parts are not found.
|
||||
for _, part := range outDatedMeta.Parts {
|
||||
dErr := disk.DeleteFile(bucket, pathJoin(object, part.Name))
|
||||
if dErr != nil && !isErr(dErr, errFileNotFound) {
|
||||
return 0, 0, toObjectErr(traceError(dErr), bucket, object)
|
||||
// List and delete the object directory, ignoring
|
||||
// errors.
|
||||
files, err := disk.ListDir(bucket, object)
|
||||
if err == nil {
|
||||
for _, entry := range files {
|
||||
_ = disk.DeleteFile(bucket,
|
||||
pathJoin(object, entry))
|
||||
}
|
||||
}
|
||||
|
||||
// Delete xl.json file. Ignore if xl.json not found.
|
||||
dErr := disk.DeleteFile(bucket, pathJoin(object, xlMetaJSONFile))
|
||||
if dErr != nil && !isErr(dErr, errFileNotFound) {
|
||||
return 0, 0, toObjectErr(traceError(dErr), bucket, object)
|
||||
}
|
||||
}
|
||||
|
||||
// Reorder so that we have data disks first and parity disks next.
|
||||
@@ -445,16 +436,19 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
|
||||
outDatedDisks = shuffleDisks(outDatedDisks, latestMeta.Erasure.Distribution)
|
||||
partsMetadata = shufflePartsMetadata(partsMetadata, latestMeta.Erasure.Distribution)
|
||||
|
||||
// We write at temporary location and then rename to fianal location.
|
||||
// We write at temporary location and then rename to final location.
|
||||
tmpID := mustGetUUID()
|
||||
|
||||
// Checksum of the part files. checkSumInfos[index] will contain checksums
|
||||
// of all the part files in the outDatedDisks[index]
|
||||
// Checksum of the part files. checkSumInfos[index] will
|
||||
// contain checksums of all the part files in the
|
||||
// outDatedDisks[index]
|
||||
checksumInfos := make([][]ChecksumInfo, len(outDatedDisks))
|
||||
|
||||
// Heal each part. erasureHealFile() will write the healed part to
|
||||
// .minio/tmp/uuid/ which needs to be renamed later to the final location.
|
||||
storage, err := NewErasureStorage(latestDisks, latestMeta.Erasure.DataBlocks, latestMeta.Erasure.ParityBlocks)
|
||||
// Heal each part. erasureHealFile() will write the healed
|
||||
// part to .minio/tmp/uuid/ which needs to be renamed later to
|
||||
// the final location.
|
||||
storage, err := NewErasureStorage(latestDisks,
|
||||
latestMeta.Erasure.DataBlocks, latestMeta.Erasure.ParityBlocks)
|
||||
if err != nil {
|
||||
return 0, 0, toObjectErr(err, bucket, object)
|
||||
}
|
||||
@@ -472,14 +466,33 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
|
||||
}
|
||||
}
|
||||
// Heal the part file.
|
||||
file, hErr := storage.HealFile(outDatedDisks, bucket, pathJoin(object, partName), erasure.BlockSize, minioMetaTmpBucket, pathJoin(tmpID, partName), partSize, algorithm, checksums)
|
||||
file, hErr := storage.HealFile(outDatedDisks, bucket, pathJoin(object, partName),
|
||||
erasure.BlockSize, minioMetaTmpBucket, pathJoin(tmpID, partName), partSize,
|
||||
algorithm, checksums)
|
||||
if hErr != nil {
|
||||
return 0, 0, toObjectErr(hErr, bucket, object)
|
||||
}
|
||||
for i := range outDatedDisks {
|
||||
if outDatedDisks[i] != OfflineDisk {
|
||||
checksumInfos[i] = append(checksumInfos[i], ChecksumInfo{partName, file.Algorithm, file.Checksums[i]})
|
||||
// outDatedDisks that had write errors should not be
|
||||
// written to for remaining parts, so we nil it out.
|
||||
for i, disk := range outDatedDisks {
|
||||
if disk == nil {
|
||||
continue
|
||||
}
|
||||
// A non-nil stale disk which did not receive
|
||||
// a healed part checksum had a write error.
|
||||
if file.Checksums[i] == nil {
|
||||
outDatedDisks[i] = nil
|
||||
numHealedDisks--
|
||||
continue
|
||||
}
|
||||
// append part checksums
|
||||
checksumInfos[i] = append(checksumInfos[i],
|
||||
ChecksumInfo{partName, file.Algorithm, file.Checksums[i]})
|
||||
}
|
||||
|
||||
// If all disks are having errors, we give up.
|
||||
if numHealedDisks == 0 {
|
||||
return 0, 0, fmt.Errorf("all disks without up-to-date data had write errors")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -493,7 +506,8 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
|
||||
}
|
||||
|
||||
// Generate and write `xl.json` generated from other disks.
|
||||
outDatedDisks, aErr = writeUniqueXLMetadata(outDatedDisks, minioMetaTmpBucket, tmpID, partsMetadata, diskCount(outDatedDisks))
|
||||
outDatedDisks, aErr = writeUniqueXLMetadata(outDatedDisks, minioMetaTmpBucket, tmpID,
|
||||
partsMetadata, diskCount(outDatedDisks))
|
||||
if aErr != nil {
|
||||
return 0, 0, toObjectErr(aErr, bucket, object)
|
||||
}
|
||||
@@ -503,13 +517,10 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
|
||||
if disk == nil {
|
||||
continue
|
||||
}
|
||||
// Remove any lingering partial data from current namespace.
|
||||
aErr = disk.DeleteFile(bucket, retainSlash(object))
|
||||
if aErr != nil && aErr != errFileNotFound {
|
||||
return 0, 0, toObjectErr(traceError(aErr), bucket, object)
|
||||
}
|
||||
|
||||
// Attempt a rename now from healed data to final location.
|
||||
aErr = disk.RenameFile(minioMetaTmpBucket, retainSlash(tmpID), bucket, retainSlash(object))
|
||||
aErr = disk.RenameFile(minioMetaTmpBucket, retainSlash(tmpID), bucket,
|
||||
retainSlash(object))
|
||||
if aErr != nil {
|
||||
return 0, 0, toObjectErr(traceError(aErr), bucket, object)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user