mirror of
https://github.com/minio/minio.git
synced 2025-11-23 19:17:43 -05:00
heal: Enable periodic bitrot scan configuration (#14464)
This commit is contained in:
@@ -21,6 +21,7 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
@@ -103,6 +104,63 @@ func (s *safeDuration) Get() time.Duration {
|
||||
return s.t
|
||||
}
|
||||
|
||||
func getCycleScanMode(currentCycle, bitrotStartCycle uint64, bitrotStartTime time.Time) madmin.HealScanMode {
|
||||
bitrotCycle := globalHealConfig.BitrotScanCycle()
|
||||
switch bitrotCycle {
|
||||
case -1:
|
||||
return madmin.HealNormalScan
|
||||
case 0:
|
||||
return madmin.HealDeepScan
|
||||
}
|
||||
|
||||
if currentCycle-bitrotStartCycle < healObjectSelectProb {
|
||||
return madmin.HealDeepScan
|
||||
}
|
||||
|
||||
if time.Since(bitrotStartTime) > bitrotCycle {
|
||||
return madmin.HealDeepScan
|
||||
}
|
||||
|
||||
return madmin.HealNormalScan
|
||||
}
|
||||
|
||||
type backgroundHealInfo struct {
|
||||
BitrotStartTime time.Time `json:"bitrotStartTime"`
|
||||
BitrotStartCycle uint64 `json:"bitrotStartCycle"`
|
||||
CurrentScanMode madmin.HealScanMode `json:"currentScanMode"`
|
||||
}
|
||||
|
||||
func readBackgroundHealInfo(ctx context.Context, objAPI ObjectLayer) backgroundHealInfo {
|
||||
// Get last healing information
|
||||
buf, err := readConfig(ctx, objAPI, backgroundHealInfoPath)
|
||||
if err != nil {
|
||||
if !errors.Is(err, errConfigNotFound) {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
return backgroundHealInfo{}
|
||||
}
|
||||
var info backgroundHealInfo
|
||||
err = json.Unmarshal(buf, &info)
|
||||
if err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
return backgroundHealInfo{}
|
||||
}
|
||||
return info
|
||||
}
|
||||
|
||||
func saveBackgroundHealInfo(ctx context.Context, objAPI ObjectLayer, info backgroundHealInfo) {
|
||||
b, err := json.Marshal(info)
|
||||
if err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
return
|
||||
}
|
||||
// Get last healing information
|
||||
err = saveConfig(ctx, objAPI, backgroundHealInfoPath, b)
|
||||
if err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
}
|
||||
|
||||
// runDataScanner will start a data scanner.
|
||||
// The function will block until the context is canceled.
|
||||
// There should only ever be one scanner running per cluster.
|
||||
@@ -145,12 +203,24 @@ func runDataScanner(pctx context.Context, objAPI ObjectLayer) {
|
||||
console.Debugln("starting scanner cycle")
|
||||
}
|
||||
|
||||
bgHealInfo := readBackgroundHealInfo(ctx, objAPI)
|
||||
scanMode := getCycleScanMode(nextBloomCycle, bgHealInfo.BitrotStartCycle, bgHealInfo.BitrotStartTime)
|
||||
if bgHealInfo.CurrentScanMode != scanMode {
|
||||
newHealInfo := bgHealInfo
|
||||
newHealInfo.CurrentScanMode = scanMode
|
||||
if scanMode == madmin.HealDeepScan {
|
||||
newHealInfo.BitrotStartTime = time.Now().UTC()
|
||||
newHealInfo.BitrotStartCycle = nextBloomCycle
|
||||
}
|
||||
saveBackgroundHealInfo(ctx, objAPI, newHealInfo)
|
||||
}
|
||||
|
||||
// Wait before starting next cycle and wait on startup.
|
||||
results := make(chan DataUsageInfo, 1)
|
||||
go storeDataUsageInBackend(ctx, objAPI, results)
|
||||
bf, err := globalNotificationSys.updateBloomFilter(ctx, nextBloomCycle)
|
||||
logger.LogIf(ctx, err)
|
||||
err = objAPI.NSScanner(ctx, bf, results, uint32(nextBloomCycle))
|
||||
err = objAPI.NSScanner(ctx, bf, results, uint32(nextBloomCycle), scanMode)
|
||||
logger.LogIf(ctx, err)
|
||||
if err == nil {
|
||||
// Store new cycle...
|
||||
@@ -182,6 +252,7 @@ type folderScanner struct {
|
||||
dataUsageScannerDebug bool
|
||||
healFolderInclude uint32 // Include a clean folder one in n cycles.
|
||||
healObjectSelect uint32 // Do a heal check on an object once every n cycles. Must divide into healFolderInclude
|
||||
scanMode madmin.HealScanMode
|
||||
|
||||
disks []StorageAPI
|
||||
disksQuorum int
|
||||
@@ -250,7 +321,7 @@ var globalScannerStats scannerStats
|
||||
// The returned cache will always be valid, but may not be updated from the existing.
|
||||
// Before each operation sleepDuration is called which can be used to temporarily halt the scanner.
|
||||
// If the supplied context is canceled the function will return at the first chance.
|
||||
func scanDataFolder(ctx context.Context, poolIdx, setIdx int, basePath string, cache dataUsageCache, getSize getSizeFn) (dataUsageCache, error) {
|
||||
func scanDataFolder(ctx context.Context, poolIdx, setIdx int, basePath string, cache dataUsageCache, getSize getSizeFn, scanMode madmin.HealScanMode) (dataUsageCache, error) {
|
||||
t := UTCNow()
|
||||
|
||||
logPrefix := color.Green("data-usage: ")
|
||||
@@ -279,6 +350,7 @@ func scanDataFolder(ctx context.Context, poolIdx, setIdx int, basePath string, c
|
||||
dataUsageScannerDebug: intDataUpdateTracker.debug,
|
||||
healFolderInclude: 0,
|
||||
healObjectSelect: 0,
|
||||
scanMode: scanMode,
|
||||
updates: cache.Info.updates,
|
||||
}
|
||||
|
||||
@@ -482,12 +554,15 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, int
|
||||
debug: f.dataUsageScannerDebug,
|
||||
lifeCycle: activeLifeCycle,
|
||||
replication: replicationCfg,
|
||||
heal: thisHash.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure,
|
||||
}
|
||||
|
||||
item.heal.enabled = thisHash.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure
|
||||
item.heal.bitrot = f.scanMode == madmin.HealDeepScan
|
||||
|
||||
// if the drive belongs to an erasure set
|
||||
// that is already being healed, skip the
|
||||
// healing attempt on this drive.
|
||||
item.heal = item.heal && f.healObjectSelect > 0
|
||||
item.heal.enabled = item.heal.enabled && f.healObjectSelect > 0
|
||||
|
||||
sz, err := f.getSize(item)
|
||||
if err != nil {
|
||||
@@ -821,8 +896,11 @@ type scannerItem struct {
|
||||
replication replicationConfig
|
||||
lifeCycle *lifecycle.Lifecycle
|
||||
Typ fs.FileMode
|
||||
heal bool // Has the object been selected for heal check?
|
||||
debug bool
|
||||
heal struct {
|
||||
enabled bool
|
||||
bitrot bool
|
||||
} // Has the object been selected for heal check?
|
||||
debug bool
|
||||
}
|
||||
|
||||
type sizeSummary struct {
|
||||
@@ -874,9 +952,13 @@ func (i *scannerItem) applyHealing(ctx context.Context, o ObjectLayer, oi Object
|
||||
console.Debugf(applyActionsLogPrefix+" heal checking: %v/%v\n", i.bucket, i.objectPath())
|
||||
}
|
||||
}
|
||||
scanMode := madmin.HealNormalScan
|
||||
if i.heal.bitrot {
|
||||
scanMode = madmin.HealDeepScan
|
||||
}
|
||||
healOpts := madmin.HealOpts{
|
||||
Remove: healDeleteDangling,
|
||||
ScanMode: globalHealConfig.ScanMode(),
|
||||
ScanMode: scanMode,
|
||||
}
|
||||
res, err := o.HealObject(ctx, i.bucket, i.objectPath(), oi.VersionID, healOpts)
|
||||
if err != nil && !errors.Is(err, NotImplemented{}) {
|
||||
@@ -1040,7 +1122,7 @@ func (i *scannerItem) applyActions(ctx context.Context, o ObjectLayer, oi Object
|
||||
// from the current deployment, which means we don't have to call healing
|
||||
// routine even if we are asked to do via heal flag.
|
||||
if !applied {
|
||||
if i.heal {
|
||||
if i.heal.enabled {
|
||||
size = i.applyHealing(ctx, o, oi)
|
||||
}
|
||||
// replicate only if lifecycle rules are not applied.
|
||||
|
||||
Reference in New Issue
Block a user