add healing for invalid shards by skipping the blocks (#13978)

Built on top of #13945, now we need to simply skip the
shards and its automated.
This commit is contained in:
Harshavardhana
2021-12-23 23:01:46 -08:00
committed by GitHub
parent 9ad6012782
commit 7e3a7d7044
8 changed files with 88 additions and 33 deletions

View File

@@ -225,10 +225,11 @@ func getLatestFileInfo(ctx context.Context, partsMetadata []FileInfo, errs []err
// - slice of errors about the state of data files on disk - can have
// a not-found error or a hash-mismatch error.
func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []FileInfo,
errs []error, latestMeta FileInfo,
bucket, object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) {
errs []error, latestMeta FileInfo, bucket, object string,
scanMode madmin.HealScanMode) ([]StorageAPI, []error, time.Time) {
var diskMTime time.Time
var shardFix bool
if !latestMeta.DataShardFixed() {
diskMTime = pickValidDiskTimeWithQuorum(partsMetadata,
latestMeta.Erasure.DataBlocks)
@@ -283,6 +284,8 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
if erasureDistributionReliable {
if !meta.IsValid() {
partsMetadata[i] = FileInfo{}
dataErrs[i] = errFileCorrupt
continue
}
@@ -305,6 +308,23 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
}
}
if !diskMTime.Equal(timeSentinel) && !diskMTime.IsZero() {
if !partsMetadata[i].AcceptableDelta(diskMTime, shardDiskTimeDelta) {
// not with in acceptable delta, skip.
// If disk mTime mismatches it is considered outdated
// https://github.com/minio/minio/pull/13803
//
// This check only is active if we could find maximally
// occurring disk mtimes that are somewhat same across
// the quorum. Allowing to skip those shards which we
// might think are wrong.
shardFix = true
partsMetadata[i] = FileInfo{}
dataErrs[i] = errFileCorrupt
continue
}
}
// Always check data, if we got it.
if (len(meta.Data) > 0 || meta.Size == 0) && len(meta.Parts) > 0 {
checksumInfo := meta.Erasure.GetChecksumInfo(meta.Parts[0].Number)
@@ -338,14 +358,6 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
}
}
if !diskMTime.Equal(timeSentinel) && !diskMTime.IsZero() {
if !partsMetadata[i].AcceptableDelta(diskMTime, shardDiskTimeDelta) {
// not with in acceptable delta, skip.
partsMetadata[i] = FileInfo{}
continue
}
}
if dataErrs[i] == nil {
// All parts verified, mark it as all data available.
availableDisks[i] = onlineDisk
@@ -355,5 +367,10 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
}
}
return availableDisks, dataErrs
if shardFix {
// Only when shard is fixed return an appropriate disk mtime value.
return availableDisks, dataErrs, diskMTime
} // else return timeSentinel for disk mtime
return availableDisks, dataErrs, timeSentinel
}