mirror of
https://github.com/minio/minio.git
synced 2024-12-24 06:05:55 -05:00
scanner: Consider preselection bias when selecting for Healing (#14492)
Healing decisions would align with skipped folder counters. This can lead to files never being selected for heal checks on "clean" paths. Use different hashing methods and take objectHealProbDiv into account when calculating the cycle. Found by @vadmeste
This commit is contained in:
parent
1f11af42f1
commit
1d1b213f1f
@ -403,7 +403,7 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, int
|
||||
if filter != nil && ok && existing.Compacted {
|
||||
// If folder isn't in filter and we have data, skip it completely.
|
||||
if folder.name != dataUsageRoot && !filter.containsDir(folder.name) {
|
||||
if f.healObjectSelect == 0 || !thisHash.mod(f.oldCache.Info.NextCycle, f.healFolderInclude/folder.objectHealProbDiv) {
|
||||
if f.healObjectSelect == 0 || !thisHash.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healFolderInclude/folder.objectHealProbDiv) {
|
||||
f.newCache.copyWithChildren(&f.oldCache, thisHash, folder.parent)
|
||||
f.updateCache.copyWithChildren(&f.oldCache, thisHash, folder.parent)
|
||||
if f.dataUsageScannerDebug {
|
||||
@ -482,7 +482,7 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, int
|
||||
debug: f.dataUsageScannerDebug,
|
||||
lifeCycle: activeLifeCycle,
|
||||
replication: replicationCfg,
|
||||
heal: thisHash.mod(f.oldCache.Info.NextCycle, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure,
|
||||
heal: thisHash.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure,
|
||||
}
|
||||
// if the drive belongs to an erasure set
|
||||
// that is already being healed, skip the
|
||||
@ -609,13 +609,13 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, int
|
||||
// and the entry itself is compacted.
|
||||
if !into.Compacted && f.oldCache.isCompacted(h) {
|
||||
if !h.mod(f.oldCache.Info.NextCycle, dataUsageUpdateDirCycles) {
|
||||
if f.healObjectSelect == 0 || !h.mod(f.oldCache.Info.NextCycle, f.healFolderInclude/folder.objectHealProbDiv) {
|
||||
if f.healObjectSelect == 0 || !h.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healFolderInclude/folder.objectHealProbDiv) {
|
||||
// Transfer and add as child...
|
||||
f.newCache.copyWithChildren(&f.oldCache, h, folder.parent)
|
||||
into.addChild(h)
|
||||
continue
|
||||
}
|
||||
folder.objectHealProbDiv = dataUsageUpdateDirCycles
|
||||
folder.objectHealProbDiv = f.healFolderInclude
|
||||
}
|
||||
}
|
||||
scanFolder(folder)
|
||||
|
@ -369,6 +369,17 @@ func (h dataUsageHash) mod(cycle uint32, cycles uint32) bool {
|
||||
return uint32(xxhash.Sum64String(string(h)))%cycles == cycle%cycles
|
||||
}
|
||||
|
||||
// modAlt returns true if the hash mod cycles == cycle.
|
||||
// This is out of sync with mod.
|
||||
// If cycles is 0 false is always returned.
|
||||
// If cycles is 1 true is always returned (as expected).
|
||||
func (h dataUsageHash) modAlt(cycle uint32, cycles uint32) bool {
|
||||
if cycles <= 1 {
|
||||
return cycles == 1
|
||||
}
|
||||
return uint32(xxhash.Sum64String(string(h))>>32)%(cycles) == cycle%cycles
|
||||
}
|
||||
|
||||
// addChild will add a child based on its hash.
|
||||
// If it already exists it will not be added again.
|
||||
func (e *dataUsageEntry) addChild(hash dataUsageHash) {
|
||||
|
Loading…
Reference in New Issue
Block a user