From 1d1b213f1fec1093c3aef4217348ac36a9d9b743 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Mon, 7 Mar 2022 09:25:53 -0800 Subject: [PATCH] scanner: Consider preselection bias when selecting for Healing (#14492) Healing decisions would align with skipped folder counters. This can lead to files never being selected for heal checks on "clean" paths. Use different hashing methods and take objectHealProbDiv into account when calculating the cycle. Found by @vadmeste --- cmd/data-scanner.go | 8 ++++---- cmd/data-usage-cache.go | 11 +++++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/cmd/data-scanner.go b/cmd/data-scanner.go index e84eb3a9c..71c088eb8 100644 --- a/cmd/data-scanner.go +++ b/cmd/data-scanner.go @@ -403,7 +403,7 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, int if filter != nil && ok && existing.Compacted { // If folder isn't in filter and we have data, skip it completely. if folder.name != dataUsageRoot && !filter.containsDir(folder.name) { - if f.healObjectSelect == 0 || !thisHash.mod(f.oldCache.Info.NextCycle, f.healFolderInclude/folder.objectHealProbDiv) { + if f.healObjectSelect == 0 || !thisHash.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healFolderInclude/folder.objectHealProbDiv) { f.newCache.copyWithChildren(&f.oldCache, thisHash, folder.parent) f.updateCache.copyWithChildren(&f.oldCache, thisHash, folder.parent) if f.dataUsageScannerDebug { @@ -482,7 +482,7 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, int debug: f.dataUsageScannerDebug, lifeCycle: activeLifeCycle, replication: replicationCfg, - heal: thisHash.mod(f.oldCache.Info.NextCycle, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure, + heal: thisHash.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure, } // if the drive belongs to an erasure set // that is already being healed, skip the @@ -609,13 +609,13 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, int // and the entry itself is compacted. if !into.Compacted && f.oldCache.isCompacted(h) { if !h.mod(f.oldCache.Info.NextCycle, dataUsageUpdateDirCycles) { - if f.healObjectSelect == 0 || !h.mod(f.oldCache.Info.NextCycle, f.healFolderInclude/folder.objectHealProbDiv) { + if f.healObjectSelect == 0 || !h.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healFolderInclude/folder.objectHealProbDiv) { // Transfer and add as child... f.newCache.copyWithChildren(&f.oldCache, h, folder.parent) into.addChild(h) continue } - folder.objectHealProbDiv = dataUsageUpdateDirCycles + folder.objectHealProbDiv = f.healFolderInclude } } scanFolder(folder) diff --git a/cmd/data-usage-cache.go b/cmd/data-usage-cache.go index 8cfccea34..4e0df290e 100644 --- a/cmd/data-usage-cache.go +++ b/cmd/data-usage-cache.go @@ -369,6 +369,17 @@ func (h dataUsageHash) mod(cycle uint32, cycles uint32) bool { return uint32(xxhash.Sum64String(string(h)))%cycles == cycle%cycles } +// modAlt returns true if the hash mod cycles == cycle. +// This is out of sync with mod. +// If cycles is 0 false is always returned. +// If cycles is 1 true is always returned (as expected). +func (h dataUsageHash) modAlt(cycle uint32, cycles uint32) bool { + if cycles <= 1 { + return cycles == 1 + } + return uint32(xxhash.Sum64String(string(h))>>32)%(cycles) == cycle%cycles +} + // addChild will add a child based on its hash. // If it already exists it will not be added again. func (e *dataUsageEntry) addChild(hash dataUsageHash) {