heal: Include more use case of not healable but readable objects (#248) (#20776)

If one object has many parts where all parts are readable but some parts
are missing from some drives, this object can be sometimes un-healable,
which is wrong.

This commit will avoid reading from drives that have missing, corrupted or
outdated xl.meta. It will also check if any part is unreadable to avoid
healing in that case.
This commit is contained in:
Anis Eleuch
2024-12-18 14:15:44 +01:00
committed by GitHub
parent 01e520eb23
commit 16f8cf1c52
4 changed files with 86 additions and 120 deletions

View File

@@ -277,23 +277,21 @@ func partNeedsHealing(partErrs []int) bool {
return slices.IndexFunc(partErrs, func(i int) bool { return i != checkPartSuccess && i != checkPartUnknown }) > -1
}
func hasPartErr(partErrs []int) bool {
return slices.IndexFunc(partErrs, func(i int) bool { return i != checkPartSuccess }) > -1
func countPartNotSuccess(partErrs []int) (c int) {
for _, pe := range partErrs {
if pe != checkPartSuccess {
c++
}
}
return
}
// disksWithAllParts - This function needs to be called with
// []StorageAPI returned by listOnlineDisks. Returns,
//
// - disks which have all parts specified in the latest xl.meta.
//
// - slice of errors about the state of data files on disk - can have
// a not-found error or a hash-mismatch error.
func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []FileInfo,
// checkObjectWithAllParts sets partsMetadata and onlineDisks when xl.meta is inexistant/corrupted or outdated
// it also checks if the status of each part (corrupted, missing, ok) in each drive
func checkObjectWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []FileInfo,
errs []error, latestMeta FileInfo, filterByETag bool, bucket, object string,
scanMode madmin.HealScanMode,
) (availableDisks []StorageAPI, dataErrsByDisk map[int][]int, dataErrsByPart map[int][]int) {
availableDisks = make([]StorageAPI, len(onlineDisks))
) (dataErrsByDisk map[int][]int, dataErrsByPart map[int][]int) {
dataErrsByDisk = make(map[int][]int, len(onlineDisks))
for i := range onlineDisks {
dataErrsByDisk[i] = make([]int, len(latestMeta.Parts))
@@ -334,12 +332,12 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
metaErrs := make([]error, len(errs))
for i, onlineDisk := range onlineDisks {
for i := range onlineDisks {
if errs[i] != nil {
metaErrs[i] = errs[i]
continue
}
if onlineDisk == OfflineDisk {
if onlineDisks[i] == OfflineDisk {
metaErrs[i] = errDiskNotFound
continue
}
@@ -355,6 +353,7 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
if corrupted {
metaErrs[i] = errFileCorrupt
partsMetadata[i] = FileInfo{}
onlineDisks[i] = nil
continue
}
@@ -362,6 +361,7 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
if !meta.IsValid() {
partsMetadata[i] = FileInfo{}
metaErrs[i] = errFileCorrupt
onlineDisks[i] = nil
continue
}
@@ -372,6 +372,7 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
// might have the right erasure distribution.
partsMetadata[i] = FileInfo{}
metaErrs[i] = errFileCorrupt
onlineDisks[i] = nil
continue
}
}
@@ -440,20 +441,5 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
dataErrsByDisk[disk][part] = dataErrsByPart[part][disk]
}
}
for i, onlineDisk := range onlineDisks {
if metaErrs[i] == nil {
meta := partsMetadata[i]
if meta.Deleted || meta.IsRemote() || !hasPartErr(dataErrsByDisk[i]) {
// All parts verified, mark it as all data available.
availableDisks[i] = onlineDisk
continue
}
}
// upon errors just make that disk's fileinfo invalid
partsMetadata[i] = FileInfo{}
}
return
}