scanner: Consider preselection bias when selecting for Healing (#14492)

Healing decisions would align with skipped folder counters. This can lead to files 
never being selected for heal checks on "clean" paths.

Use different hashing methods and take objectHealProbDiv into account when 
calculating the cycle.

Found by @vadmeste
This commit is contained in:
Klaus Post 2022-03-07 09:25:53 -08:00 committed by GitHub
parent 1f11af42f1
commit 1d1b213f1f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 15 additions and 4 deletions

View File

@ -403,7 +403,7 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, int
if filter != nil && ok && existing.Compacted {
// If folder isn't in filter and we have data, skip it completely.
if folder.name != dataUsageRoot && !filter.containsDir(folder.name) {
if f.healObjectSelect == 0 || !thisHash.mod(f.oldCache.Info.NextCycle, f.healFolderInclude/folder.objectHealProbDiv) {
if f.healObjectSelect == 0 || !thisHash.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healFolderInclude/folder.objectHealProbDiv) {
f.newCache.copyWithChildren(&f.oldCache, thisHash, folder.parent)
f.updateCache.copyWithChildren(&f.oldCache, thisHash, folder.parent)
if f.dataUsageScannerDebug {
@ -482,7 +482,7 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, int
debug: f.dataUsageScannerDebug,
lifeCycle: activeLifeCycle,
replication: replicationCfg,
heal: thisHash.mod(f.oldCache.Info.NextCycle, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure,
heal: thisHash.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure,
}
// if the drive belongs to an erasure set
// that is already being healed, skip the
@ -609,13 +609,13 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, int
// and the entry itself is compacted.
if !into.Compacted && f.oldCache.isCompacted(h) {
if !h.mod(f.oldCache.Info.NextCycle, dataUsageUpdateDirCycles) {
if f.healObjectSelect == 0 || !h.mod(f.oldCache.Info.NextCycle, f.healFolderInclude/folder.objectHealProbDiv) {
if f.healObjectSelect == 0 || !h.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healFolderInclude/folder.objectHealProbDiv) {
// Transfer and add as child...
f.newCache.copyWithChildren(&f.oldCache, h, folder.parent)
into.addChild(h)
continue
}
folder.objectHealProbDiv = dataUsageUpdateDirCycles
folder.objectHealProbDiv = f.healFolderInclude
}
}
scanFolder(folder)

View File

@ -369,6 +369,17 @@ func (h dataUsageHash) mod(cycle uint32, cycles uint32) bool {
return uint32(xxhash.Sum64String(string(h)))%cycles == cycle%cycles
}
// modAlt returns true if the hash mod cycles == cycle.
// This is out of sync with mod.
// If cycles is 0 false is always returned.
// If cycles is 1 true is always returned (as expected).
func (h dataUsageHash) modAlt(cycle uint32, cycles uint32) bool {
if cycles <= 1 {
return cycles == 1
}
return uint32(xxhash.Sum64String(string(h))>>32)%(cycles) == cycle%cycles
}
// addChild will add a child based on its hash.
// If it already exists it will not be added again.
func (e *dataUsageEntry) addChild(hash dataUsageHash) {