improve delete-marker healing (#12794)

delete-markers missing on drives were
not healed due to few things

disksWithAllParts() does not know-how
to deal with delete markers, add support
for that.

fixes #12787
This commit is contained in:
Harshavardhana 2021-07-26 11:48:09 -07:00 committed by GitHub
parent f175ff8f66
commit a3f7d575e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 37 additions and 31 deletions

View File

@ -225,8 +225,9 @@ func fileInfoConsistent(ctx context.Context, partsMetadata []FileInfo, errs []er
// //
// - slice of errors about the state of data files on disk - can have // - slice of errors about the state of data files on disk - can have
// a not-found error or a hash-mismatch error. // a not-found error or a hash-mismatch error.
func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []FileInfo, errs []error, bucket, func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []FileInfo,
object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) { errs []error, bucket, object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) {
// List of disks having latest version of the object er.meta (by modtime) // List of disks having latest version of the object er.meta (by modtime)
_, modTime, dataDir := listOnlineDisks(onlineDisks, partsMetadata, errs) _, modTime, dataDir := listOnlineDisks(onlineDisks, partsMetadata, errs)
@ -239,15 +240,17 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
// consider the offline disks as consistent. // consider the offline disks as consistent.
continue continue
} }
if len(meta.Erasure.Distribution) != len(onlineDisks) { if !meta.Deleted {
// Erasure distribution seems to have lesser if len(meta.Erasure.Distribution) != len(onlineDisks) {
// number of items than number of online disks. // Erasure distribution seems to have lesser
inconsistent++ // number of items than number of online disks.
continue inconsistent++
} continue
if meta.Erasure.Distribution[i] != meta.Erasure.Index { }
// Mismatch indexes with distribution order if meta.Erasure.Distribution[i] != meta.Erasure.Index {
inconsistent++ // Mismatch indexes with distribution order
inconsistent++
}
} }
} }
@ -267,8 +270,8 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
dataErrs[i] = errDiskNotFound dataErrs[i] = errDiskNotFound
continue continue
} }
meta := partsMetadata[i]
meta := partsMetadata[i]
if !meta.ModTime.Equal(modTime) || meta.DataDir != dataDir { if !meta.ModTime.Equal(modTime) || meta.DataDir != dataDir {
dataErrs[i] = errFileCorrupt dataErrs[i] = errFileCorrupt
partsMetadata[i] = FileInfo{} partsMetadata[i] = FileInfo{}
@ -280,20 +283,22 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
continue continue
} }
if len(meta.Erasure.Distribution) != len(onlineDisks) { if !meta.Deleted {
// Erasure distribution is not the same as onlineDisks if len(meta.Erasure.Distribution) != len(onlineDisks) {
// attempt a fix if possible, assuming other entries // Erasure distribution is not the same as onlineDisks
// might have the right erasure distribution. // attempt a fix if possible, assuming other entries
partsMetadata[i] = FileInfo{} // might have the right erasure distribution.
dataErrs[i] = errFileCorrupt partsMetadata[i] = FileInfo{}
continue dataErrs[i] = errFileCorrupt
} continue
}
// Since erasure.Distribution is trustable we can fix the mismatching erasure.Index // Since erasure.Distribution is trustable we can fix the mismatching erasure.Index
if meta.Erasure.Distribution[i] != meta.Erasure.Index { if meta.Erasure.Distribution[i] != meta.Erasure.Index {
partsMetadata[i] = FileInfo{} partsMetadata[i] = FileInfo{}
dataErrs[i] = errFileCorrupt dataErrs[i] = errFileCorrupt
continue continue
}
} }
} }
@ -320,11 +325,11 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
// disk has a valid xl.meta but may not have all the // disk has a valid xl.meta but may not have all the
// parts. This is considered an outdated disk, since // parts. This is considered an outdated disk, since
// it needs healing too. // it needs healing too.
if !partsMetadata[i].IsRemote() { if !partsMetadata[i].Deleted && !partsMetadata[i].IsRemote() {
dataErrs[i] = onlineDisk.VerifyFile(ctx, bucket, object, partsMetadata[i]) dataErrs[i] = onlineDisk.VerifyFile(ctx, bucket, object, partsMetadata[i])
} }
case madmin.HealNormalScan: case madmin.HealNormalScan:
if !partsMetadata[i].IsRemote() { if !partsMetadata[i].Deleted && !partsMetadata[i].IsRemote() {
dataErrs[i] = onlineDisk.CheckParts(ctx, bucket, object, partsMetadata[i]) dataErrs[i] = onlineDisk.CheckParts(ctx, bucket, object, partsMetadata[i])
} }
} }

View File

@ -212,7 +212,7 @@ func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, quorumModTime t
return true return true
} }
if erErr == nil { if erErr == nil {
if !meta.IsRemote() { if !meta.Deleted && !meta.IsRemote() {
// If xl.meta was read fine but there may be problem with the part.N files. // If xl.meta was read fine but there may be problem with the part.N files.
if IsErr(dataErr, []error{ if IsErr(dataErr, []error{
errFileNotFound, errFileNotFound,
@ -267,10 +267,10 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
// Re-read when we have lock... // Re-read when we have lock...
partsMetadata, errs := readAllFileInfo(ctx, storageDisks, bucket, object, versionID, true) partsMetadata, errs := readAllFileInfo(ctx, storageDisks, bucket, object, versionID, true)
_, err = getLatestFileInfo(ctx, partsMetadata, errs) if _, err = getLatestFileInfo(ctx, partsMetadata, errs); err != nil {
if err != nil {
return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, []error{}, opts) return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, []error{}, opts)
} }
// List of disks having latest version of the object er.meta // List of disks having latest version of the object er.meta
// (by modtime). // (by modtime).
_, modTime, dataDir := listOnlineDisks(storageDisks, partsMetadata, errs) _, modTime, dataDir := listOnlineDisks(storageDisks, partsMetadata, errs)
@ -292,7 +292,8 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
// used here for reconstruction. This is done to ensure that // used here for reconstruction. This is done to ensure that
// we do not skip drives that have inconsistent metadata to be // we do not skip drives that have inconsistent metadata to be
// skipped from purging when they are stale. // skipped from purging when they are stale.
availableDisks, dataErrs := disksWithAllParts(ctx, storageDisks, partsMetadata, errs, bucket, object, scanMode) availableDisks, dataErrs := disksWithAllParts(ctx, storageDisks, partsMetadata,
errs, bucket, object, scanMode)
// Loop to find number of disks with valid data, per-drive // Loop to find number of disks with valid data, per-drive
// data state and a list of outdated disks on which data needs // data state and a list of outdated disks on which data needs