mirror of
https://github.com/minio/minio.git
synced 2024-12-24 22:25:54 -05:00
heal: Enable periodic bitrot scan configuration (#14464)
This commit is contained in:
parent
ee49a23220
commit
16431d222c
@ -701,7 +701,7 @@ func (h *healSequence) queueHealTask(source healSource, healType madmin.HealItem
|
|||||||
if source.opts != nil {
|
if source.opts != nil {
|
||||||
task.opts = *source.opts
|
task.opts = *source.opts
|
||||||
} else {
|
} else {
|
||||||
task.opts.ScanMode = globalHealConfig.ScanMode()
|
task.opts.ScanMode = madmin.HealNormalScan
|
||||||
}
|
}
|
||||||
|
|
||||||
h.mutex.Lock()
|
h.mutex.Lock()
|
||||||
|
@ -21,6 +21,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
@ -103,6 +104,63 @@ func (s *safeDuration) Get() time.Duration {
|
|||||||
return s.t
|
return s.t
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getCycleScanMode(currentCycle, bitrotStartCycle uint64, bitrotStartTime time.Time) madmin.HealScanMode {
|
||||||
|
bitrotCycle := globalHealConfig.BitrotScanCycle()
|
||||||
|
switch bitrotCycle {
|
||||||
|
case -1:
|
||||||
|
return madmin.HealNormalScan
|
||||||
|
case 0:
|
||||||
|
return madmin.HealDeepScan
|
||||||
|
}
|
||||||
|
|
||||||
|
if currentCycle-bitrotStartCycle < healObjectSelectProb {
|
||||||
|
return madmin.HealDeepScan
|
||||||
|
}
|
||||||
|
|
||||||
|
if time.Since(bitrotStartTime) > bitrotCycle {
|
||||||
|
return madmin.HealDeepScan
|
||||||
|
}
|
||||||
|
|
||||||
|
return madmin.HealNormalScan
|
||||||
|
}
|
||||||
|
|
||||||
|
type backgroundHealInfo struct {
|
||||||
|
BitrotStartTime time.Time `json:"bitrotStartTime"`
|
||||||
|
BitrotStartCycle uint64 `json:"bitrotStartCycle"`
|
||||||
|
CurrentScanMode madmin.HealScanMode `json:"currentScanMode"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func readBackgroundHealInfo(ctx context.Context, objAPI ObjectLayer) backgroundHealInfo {
|
||||||
|
// Get last healing information
|
||||||
|
buf, err := readConfig(ctx, objAPI, backgroundHealInfoPath)
|
||||||
|
if err != nil {
|
||||||
|
if !errors.Is(err, errConfigNotFound) {
|
||||||
|
logger.LogIf(ctx, err)
|
||||||
|
}
|
||||||
|
return backgroundHealInfo{}
|
||||||
|
}
|
||||||
|
var info backgroundHealInfo
|
||||||
|
err = json.Unmarshal(buf, &info)
|
||||||
|
if err != nil {
|
||||||
|
logger.LogIf(ctx, err)
|
||||||
|
return backgroundHealInfo{}
|
||||||
|
}
|
||||||
|
return info
|
||||||
|
}
|
||||||
|
|
||||||
|
func saveBackgroundHealInfo(ctx context.Context, objAPI ObjectLayer, info backgroundHealInfo) {
|
||||||
|
b, err := json.Marshal(info)
|
||||||
|
if err != nil {
|
||||||
|
logger.LogIf(ctx, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Get last healing information
|
||||||
|
err = saveConfig(ctx, objAPI, backgroundHealInfoPath, b)
|
||||||
|
if err != nil {
|
||||||
|
logger.LogIf(ctx, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// runDataScanner will start a data scanner.
|
// runDataScanner will start a data scanner.
|
||||||
// The function will block until the context is canceled.
|
// The function will block until the context is canceled.
|
||||||
// There should only ever be one scanner running per cluster.
|
// There should only ever be one scanner running per cluster.
|
||||||
@ -145,12 +203,24 @@ func runDataScanner(pctx context.Context, objAPI ObjectLayer) {
|
|||||||
console.Debugln("starting scanner cycle")
|
console.Debugln("starting scanner cycle")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bgHealInfo := readBackgroundHealInfo(ctx, objAPI)
|
||||||
|
scanMode := getCycleScanMode(nextBloomCycle, bgHealInfo.BitrotStartCycle, bgHealInfo.BitrotStartTime)
|
||||||
|
if bgHealInfo.CurrentScanMode != scanMode {
|
||||||
|
newHealInfo := bgHealInfo
|
||||||
|
newHealInfo.CurrentScanMode = scanMode
|
||||||
|
if scanMode == madmin.HealDeepScan {
|
||||||
|
newHealInfo.BitrotStartTime = time.Now().UTC()
|
||||||
|
newHealInfo.BitrotStartCycle = nextBloomCycle
|
||||||
|
}
|
||||||
|
saveBackgroundHealInfo(ctx, objAPI, newHealInfo)
|
||||||
|
}
|
||||||
|
|
||||||
// Wait before starting next cycle and wait on startup.
|
// Wait before starting next cycle and wait on startup.
|
||||||
results := make(chan DataUsageInfo, 1)
|
results := make(chan DataUsageInfo, 1)
|
||||||
go storeDataUsageInBackend(ctx, objAPI, results)
|
go storeDataUsageInBackend(ctx, objAPI, results)
|
||||||
bf, err := globalNotificationSys.updateBloomFilter(ctx, nextBloomCycle)
|
bf, err := globalNotificationSys.updateBloomFilter(ctx, nextBloomCycle)
|
||||||
logger.LogIf(ctx, err)
|
logger.LogIf(ctx, err)
|
||||||
err = objAPI.NSScanner(ctx, bf, results, uint32(nextBloomCycle))
|
err = objAPI.NSScanner(ctx, bf, results, uint32(nextBloomCycle), scanMode)
|
||||||
logger.LogIf(ctx, err)
|
logger.LogIf(ctx, err)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
// Store new cycle...
|
// Store new cycle...
|
||||||
@ -182,6 +252,7 @@ type folderScanner struct {
|
|||||||
dataUsageScannerDebug bool
|
dataUsageScannerDebug bool
|
||||||
healFolderInclude uint32 // Include a clean folder one in n cycles.
|
healFolderInclude uint32 // Include a clean folder one in n cycles.
|
||||||
healObjectSelect uint32 // Do a heal check on an object once every n cycles. Must divide into healFolderInclude
|
healObjectSelect uint32 // Do a heal check on an object once every n cycles. Must divide into healFolderInclude
|
||||||
|
scanMode madmin.HealScanMode
|
||||||
|
|
||||||
disks []StorageAPI
|
disks []StorageAPI
|
||||||
disksQuorum int
|
disksQuorum int
|
||||||
@ -250,7 +321,7 @@ var globalScannerStats scannerStats
|
|||||||
// The returned cache will always be valid, but may not be updated from the existing.
|
// The returned cache will always be valid, but may not be updated from the existing.
|
||||||
// Before each operation sleepDuration is called which can be used to temporarily halt the scanner.
|
// Before each operation sleepDuration is called which can be used to temporarily halt the scanner.
|
||||||
// If the supplied context is canceled the function will return at the first chance.
|
// If the supplied context is canceled the function will return at the first chance.
|
||||||
func scanDataFolder(ctx context.Context, poolIdx, setIdx int, basePath string, cache dataUsageCache, getSize getSizeFn) (dataUsageCache, error) {
|
func scanDataFolder(ctx context.Context, poolIdx, setIdx int, basePath string, cache dataUsageCache, getSize getSizeFn, scanMode madmin.HealScanMode) (dataUsageCache, error) {
|
||||||
t := UTCNow()
|
t := UTCNow()
|
||||||
|
|
||||||
logPrefix := color.Green("data-usage: ")
|
logPrefix := color.Green("data-usage: ")
|
||||||
@ -279,6 +350,7 @@ func scanDataFolder(ctx context.Context, poolIdx, setIdx int, basePath string, c
|
|||||||
dataUsageScannerDebug: intDataUpdateTracker.debug,
|
dataUsageScannerDebug: intDataUpdateTracker.debug,
|
||||||
healFolderInclude: 0,
|
healFolderInclude: 0,
|
||||||
healObjectSelect: 0,
|
healObjectSelect: 0,
|
||||||
|
scanMode: scanMode,
|
||||||
updates: cache.Info.updates,
|
updates: cache.Info.updates,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -482,12 +554,15 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, int
|
|||||||
debug: f.dataUsageScannerDebug,
|
debug: f.dataUsageScannerDebug,
|
||||||
lifeCycle: activeLifeCycle,
|
lifeCycle: activeLifeCycle,
|
||||||
replication: replicationCfg,
|
replication: replicationCfg,
|
||||||
heal: thisHash.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
item.heal.enabled = thisHash.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure
|
||||||
|
item.heal.bitrot = f.scanMode == madmin.HealDeepScan
|
||||||
|
|
||||||
// if the drive belongs to an erasure set
|
// if the drive belongs to an erasure set
|
||||||
// that is already being healed, skip the
|
// that is already being healed, skip the
|
||||||
// healing attempt on this drive.
|
// healing attempt on this drive.
|
||||||
item.heal = item.heal && f.healObjectSelect > 0
|
item.heal.enabled = item.heal.enabled && f.healObjectSelect > 0
|
||||||
|
|
||||||
sz, err := f.getSize(item)
|
sz, err := f.getSize(item)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -821,7 +896,10 @@ type scannerItem struct {
|
|||||||
replication replicationConfig
|
replication replicationConfig
|
||||||
lifeCycle *lifecycle.Lifecycle
|
lifeCycle *lifecycle.Lifecycle
|
||||||
Typ fs.FileMode
|
Typ fs.FileMode
|
||||||
heal bool // Has the object been selected for heal check?
|
heal struct {
|
||||||
|
enabled bool
|
||||||
|
bitrot bool
|
||||||
|
} // Has the object been selected for heal check?
|
||||||
debug bool
|
debug bool
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -874,9 +952,13 @@ func (i *scannerItem) applyHealing(ctx context.Context, o ObjectLayer, oi Object
|
|||||||
console.Debugf(applyActionsLogPrefix+" heal checking: %v/%v\n", i.bucket, i.objectPath())
|
console.Debugf(applyActionsLogPrefix+" heal checking: %v/%v\n", i.bucket, i.objectPath())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
scanMode := madmin.HealNormalScan
|
||||||
|
if i.heal.bitrot {
|
||||||
|
scanMode = madmin.HealDeepScan
|
||||||
|
}
|
||||||
healOpts := madmin.HealOpts{
|
healOpts := madmin.HealOpts{
|
||||||
Remove: healDeleteDangling,
|
Remove: healDeleteDangling,
|
||||||
ScanMode: globalHealConfig.ScanMode(),
|
ScanMode: scanMode,
|
||||||
}
|
}
|
||||||
res, err := o.HealObject(ctx, i.bucket, i.objectPath(), oi.VersionID, healOpts)
|
res, err := o.HealObject(ctx, i.bucket, i.objectPath(), oi.VersionID, healOpts)
|
||||||
if err != nil && !errors.Is(err, NotImplemented{}) {
|
if err != nil && !errors.Is(err, NotImplemented{}) {
|
||||||
@ -1040,7 +1122,7 @@ func (i *scannerItem) applyActions(ctx context.Context, o ObjectLayer, oi Object
|
|||||||
// from the current deployment, which means we don't have to call healing
|
// from the current deployment, which means we don't have to call healing
|
||||||
// routine even if we are asked to do via heal flag.
|
// routine even if we are asked to do via heal flag.
|
||||||
if !applied {
|
if !applied {
|
||||||
if i.heal {
|
if i.heal.enabled {
|
||||||
size = i.applyHealing(ctx, o, oi)
|
size = i.applyHealing(ctx, o, oi)
|
||||||
}
|
}
|
||||||
// replicate only if lifecycle rules are not applied.
|
// replicate only if lifecycle rules are not applied.
|
||||||
|
@ -35,6 +35,8 @@ const (
|
|||||||
dataUsageBloomName = ".bloomcycle.bin"
|
dataUsageBloomName = ".bloomcycle.bin"
|
||||||
dataUsageBloomNamePath = bucketMetaPrefix + SlashSeparator + dataUsageBloomName
|
dataUsageBloomNamePath = bucketMetaPrefix + SlashSeparator + dataUsageBloomName
|
||||||
|
|
||||||
|
backgroundHealInfoPath = bucketMetaPrefix + SlashSeparator + ".background-heal.json"
|
||||||
|
|
||||||
dataUsageCacheName = ".usage-cache.bin"
|
dataUsageCacheName = ".usage-cache.bin"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -67,7 +67,7 @@ func TestDataUsageUpdate(t *testing.T) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
got, err := scanDataFolder(context.Background(), 0, 0, base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
|
got, err := scanDataFolder(context.Background(), 0, 0, base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -178,7 +178,7 @@ func TestDataUsageUpdate(t *testing.T) {
|
|||||||
}
|
}
|
||||||
// Changed dir must be picked up in this many cycles.
|
// Changed dir must be picked up in this many cycles.
|
||||||
for i := 0; i < dataUsageUpdateDirCycles; i++ {
|
for i := 0; i < dataUsageUpdateDirCycles; i++ {
|
||||||
got, err = scanDataFolder(context.Background(), 0, 0, base, got, getSize)
|
got, err = scanDataFolder(context.Background(), 0, 0, base, got, getSize, 0)
|
||||||
got.Info.NextCycle++
|
got.Info.NextCycle++
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
@ -289,7 +289,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
|
|||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
got, err := scanDataFolder(context.Background(), 0, 0, base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize)
|
got, err := scanDataFolder(context.Background(), 0, 0, base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -423,7 +423,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
|
|||||||
}
|
}
|
||||||
// Changed dir must be picked up in this many cycles.
|
// Changed dir must be picked up in this many cycles.
|
||||||
for i := 0; i < dataUsageUpdateDirCycles; i++ {
|
for i := 0; i < dataUsageUpdateDirCycles; i++ {
|
||||||
got, err = scanDataFolder(context.Background(), 0, 0, base, got, getSize)
|
got, err = scanDataFolder(context.Background(), 0, 0, base, got, getSize, 0)
|
||||||
got.Info.NextCycle++
|
got.Info.NextCycle++
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
@ -575,7 +575,7 @@ func TestDataUsageCacheSerialize(t *testing.T) {
|
|||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
want, err := scanDataFolder(context.Background(), 0, 0, base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
|
want, err := scanDataFolder(context.Background(), 0, 0, base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -531,7 +531,7 @@ func (z *erasureServerPools) StorageInfo(ctx context.Context) (StorageInfo, []er
|
|||||||
return storageInfo, errs
|
return storageInfo, errs
|
||||||
}
|
}
|
||||||
|
|
||||||
func (z *erasureServerPools) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo, wantCycle uint32) error {
|
func (z *erasureServerPools) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo, wantCycle uint32, healScanMode madmin.HealScanMode) error {
|
||||||
// Updates must be closed before we return.
|
// Updates must be closed before we return.
|
||||||
defer close(updates)
|
defer close(updates)
|
||||||
|
|
||||||
@ -576,7 +576,7 @@ func (z *erasureServerPools) NSScanner(ctx context.Context, bf *bloomFilter, upd
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
// Start scanner. Blocks until done.
|
// Start scanner. Blocks until done.
|
||||||
err := erObj.nsScanner(ctx, allBuckets, bf, wantCycle, updates)
|
err := erObj.nsScanner(ctx, allBuckets, bf, wantCycle, updates, healScanMode)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.LogIf(ctx, err)
|
logger.LogIf(ctx, err)
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
|
@ -344,7 +344,7 @@ func (er erasureObjects) cleanupDeletedObjects(ctx context.Context) {
|
|||||||
|
|
||||||
// nsScanner will start scanning buckets and send updated totals as they are traversed.
|
// nsScanner will start scanning buckets and send updated totals as they are traversed.
|
||||||
// Updates are sent on a regular basis and the caller *must* consume them.
|
// Updates are sent on a regular basis and the caller *must* consume them.
|
||||||
func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf *bloomFilter, wantCycle uint32, updates chan<- dataUsageCache) error {
|
func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf *bloomFilter, wantCycle uint32, updates chan<- dataUsageCache, healScanMode madmin.HealScanMode) error {
|
||||||
if len(buckets) == 0 {
|
if len(buckets) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -490,7 +490,7 @@ func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf
|
|||||||
// Calc usage
|
// Calc usage
|
||||||
before := cache.Info.LastUpdate
|
before := cache.Info.LastUpdate
|
||||||
var err error
|
var err error
|
||||||
cache, err = disk.NSScanner(ctx, cache, updates)
|
cache, err = disk.NSScanner(ctx, cache, updates, healScanMode)
|
||||||
cache.Info.BloomFilter = nil
|
cache.Info.BloomFilter = nil
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if !cache.Info.LastUpdate.IsZero() && cache.Info.LastUpdate.After(before) {
|
if !cache.Info.LastUpdate.IsZero() && cache.Info.LastUpdate.After(before) {
|
||||||
|
@ -235,7 +235,7 @@ func (fs *FSObjects) StorageInfo(ctx context.Context) (StorageInfo, []error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NSScanner returns data usage stats of the current FS deployment
|
// NSScanner returns data usage stats of the current FS deployment
|
||||||
func (fs *FSObjects) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo, wantCycle uint32) error {
|
func (fs *FSObjects) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo, wantCycle uint32, _ madmin.HealScanMode) error {
|
||||||
defer close(updates)
|
defer close(updates)
|
||||||
// Load bucket totals
|
// Load bucket totals
|
||||||
var totalCache dataUsageCache
|
var totalCache dataUsageCache
|
||||||
@ -396,7 +396,7 @@ func (fs *FSObjects) scanBucket(ctx context.Context, bucket string, cache dataUs
|
|||||||
}
|
}
|
||||||
|
|
||||||
return sizeSummary{totalSize: fi.Size(), versions: 1}, nil
|
return sizeSummary{totalSize: fi.Size(), versions: 1}, nil
|
||||||
})
|
}, 0)
|
||||||
|
|
||||||
return cache, err
|
return cache, err
|
||||||
}
|
}
|
||||||
|
@ -48,7 +48,7 @@ func (a GatewayUnsupported) LocalStorageInfo(ctx context.Context) (StorageInfo,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NSScanner - scanner is not implemented for gateway
|
// NSScanner - scanner is not implemented for gateway
|
||||||
func (a GatewayUnsupported) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo, wantCycle uint32) error {
|
func (a GatewayUnsupported) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo, wantCycle uint32, scanMode madmin.HealScanMode) error {
|
||||||
logger.CriticalIf(ctx, errors.New("not implemented"))
|
logger.CriticalIf(ctx, errors.New("not implemented"))
|
||||||
return NotImplemented{}
|
return NotImplemented{}
|
||||||
}
|
}
|
||||||
|
@ -45,7 +45,6 @@ func newBgHealSequence() *healSequence {
|
|||||||
hs := madmin.HealOpts{
|
hs := madmin.HealOpts{
|
||||||
// Remove objects that do not have read-quorum
|
// Remove objects that do not have read-quorum
|
||||||
Remove: healDeleteDangling,
|
Remove: healDeleteDangling,
|
||||||
ScanMode: globalHealConfig.ScanMode(),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return &healSequence{
|
return &healSequence{
|
||||||
@ -165,7 +164,7 @@ func mustGetHealSequence(ctx context.Context) *healSequence {
|
|||||||
// healErasureSet lists and heals all objects in a specific erasure set
|
// healErasureSet lists and heals all objects in a specific erasure set
|
||||||
func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string, tracker *healingTracker) error {
|
func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string, tracker *healingTracker) error {
|
||||||
bgSeq := mustGetHealSequence(ctx)
|
bgSeq := mustGetHealSequence(ctx)
|
||||||
scanMode := globalHealConfig.ScanMode()
|
scanMode := madmin.HealNormalScan
|
||||||
|
|
||||||
// Make sure to copy since `buckets slice`
|
// Make sure to copy since `buckets slice`
|
||||||
// is modified in place by tracker.
|
// is modified in place by tracker.
|
||||||
|
@ -185,7 +185,7 @@ func (m *mrfState) healRoutine() {
|
|||||||
defer idler.Stop()
|
defer idler.Stop()
|
||||||
|
|
||||||
mrfHealingOpts := madmin.HealOpts{
|
mrfHealingOpts := madmin.HealOpts{
|
||||||
ScanMode: globalHealConfig.ScanMode(),
|
ScanMode: madmin.HealNormalScan,
|
||||||
Remove: healDeleteDangling,
|
Remove: healDeleteDangling,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,6 +22,8 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/minio/madmin-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
// naughtyDisk wraps a POSIX disk and returns programmed errors
|
// naughtyDisk wraps a POSIX disk and returns programmed errors
|
||||||
@ -110,8 +112,8 @@ func (d *naughtyDisk) SetDiskID(id string) {
|
|||||||
d.disk.SetDiskID(id)
|
d.disk.SetDiskID(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *naughtyDisk) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry) (info dataUsageCache, err error) {
|
func (d *naughtyDisk) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode) (info dataUsageCache, err error) {
|
||||||
return d.disk.NSScanner(ctx, cache, updates)
|
return d.disk.NSScanner(ctx, cache, updates, scanMode)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *naughtyDisk) DiskInfo(ctx context.Context) (info DiskInfo, err error) {
|
func (d *naughtyDisk) DiskInfo(ctx context.Context) (info DiskInfo, err error) {
|
||||||
|
@ -171,7 +171,7 @@ type ObjectLayer interface {
|
|||||||
|
|
||||||
// Storage operations.
|
// Storage operations.
|
||||||
Shutdown(context.Context) error
|
Shutdown(context.Context) error
|
||||||
NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo, wantCycle uint32) error
|
NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo, wantCycle uint32, scanMode madmin.HealScanMode) error
|
||||||
BackendInfo() madmin.BackendInfo
|
BackendInfo() madmin.BackendInfo
|
||||||
StorageInfo(ctx context.Context) (StorageInfo, []error)
|
StorageInfo(ctx context.Context) (StorageInfo, []error)
|
||||||
LocalStorageInfo(ctx context.Context) (StorageInfo, []error)
|
LocalStorageInfo(ctx context.Context) (StorageInfo, []error)
|
||||||
|
@ -21,6 +21,8 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"io"
|
"io"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/minio/madmin-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
// StorageAPI interface.
|
// StorageAPI interface.
|
||||||
@ -64,7 +66,7 @@ type StorageAPI interface {
|
|||||||
// has never been replaced.
|
// has never been replaced.
|
||||||
Healing() *healingTracker
|
Healing() *healingTracker
|
||||||
DiskInfo(ctx context.Context) (info DiskInfo, err error)
|
DiskInfo(ctx context.Context) (info DiskInfo, err error)
|
||||||
NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry) (dataUsageCache, error)
|
NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode) (dataUsageCache, error)
|
||||||
|
|
||||||
// Volume operations.
|
// Volume operations.
|
||||||
MakeVol(ctx context.Context, volume string) (err error)
|
MakeVol(ctx context.Context, volume string) (err error)
|
||||||
@ -142,7 +144,7 @@ func (p *unrecognizedDisk) Healing() *healingTracker {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *unrecognizedDisk) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry) (dataUsageCache, error) {
|
func (p *unrecognizedDisk) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode) (dataUsageCache, error) {
|
||||||
return dataUsageCache{}, errDiskNotFound
|
return dataUsageCache{}, errDiskNotFound
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -33,6 +33,7 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/minio/madmin-go"
|
||||||
xhttp "github.com/minio/minio/internal/http"
|
xhttp "github.com/minio/minio/internal/http"
|
||||||
"github.com/minio/minio/internal/logger"
|
"github.com/minio/minio/internal/logger"
|
||||||
"github.com/minio/minio/internal/rest"
|
"github.com/minio/minio/internal/rest"
|
||||||
@ -207,12 +208,14 @@ func (client *storageRESTClient) Healing() *healingTracker {
|
|||||||
return val.(*healingTracker)
|
return val.(*healingTracker)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (client *storageRESTClient) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry) (dataUsageCache, error) {
|
func (client *storageRESTClient) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode) (dataUsageCache, error) {
|
||||||
defer close(updates)
|
defer close(updates)
|
||||||
pr, pw := io.Pipe()
|
pr, pw := io.Pipe()
|
||||||
go func() {
|
go func() {
|
||||||
pw.CloseWithError(cache.serializeTo(pw))
|
pw.CloseWithError(cache.serializeTo(pw))
|
||||||
}()
|
}()
|
||||||
|
vals := make(url.Values)
|
||||||
|
vals.Set(storageRESTScanMode, strconv.Itoa(int(scanMode)))
|
||||||
respBody, err := client.call(ctx, storageRESTMethodNSScanner, url.Values{}, pr, -1)
|
respBody, err := client.call(ctx, storageRESTMethodNSScanner, url.Values{}, pr, -1)
|
||||||
defer xhttp.DrainBody(respBody)
|
defer xhttp.DrainBody(respBody)
|
||||||
pr.CloseWithError(err)
|
pr.CloseWithError(err)
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
const (
|
const (
|
||||||
storageRESTVersion = "v43" // Added DiskMTime field for FileInfo
|
storageRESTVersion = "v44" // Added heal scan mode in NSScanner
|
||||||
storageRESTVersionPrefix = SlashSeparator + storageRESTVersion
|
storageRESTVersionPrefix = SlashSeparator + storageRESTVersion
|
||||||
storageRESTPrefix = minioReservedBucketPath + "/storage"
|
storageRESTPrefix = minioReservedBucketPath + "/storage"
|
||||||
)
|
)
|
||||||
@ -79,4 +79,5 @@ const (
|
|||||||
storageRESTDiskID = "disk-id"
|
storageRESTDiskID = "disk-id"
|
||||||
storageRESTForceDelete = "force-delete"
|
storageRESTForceDelete = "force-delete"
|
||||||
storageRESTGlob = "glob"
|
storageRESTGlob = "glob"
|
||||||
|
storageRESTScanMode = "scan-mode"
|
||||||
)
|
)
|
||||||
|
@ -40,6 +40,7 @@ import (
|
|||||||
|
|
||||||
jwtreq "github.com/golang-jwt/jwt/v4/request"
|
jwtreq "github.com/golang-jwt/jwt/v4/request"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
|
"github.com/minio/madmin-go"
|
||||||
"github.com/minio/minio/internal/config"
|
"github.com/minio/minio/internal/config"
|
||||||
xhttp "github.com/minio/minio/internal/http"
|
xhttp "github.com/minio/minio/internal/http"
|
||||||
xioutil "github.com/minio/minio/internal/ioutil"
|
xioutil "github.com/minio/minio/internal/ioutil"
|
||||||
@ -179,10 +180,17 @@ func (s *storageRESTServer) NSScannerHandler(w http.ResponseWriter, r *http.Requ
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
scanMode, err := strconv.Atoi(r.Form.Get(storageRESTScanMode))
|
||||||
|
if err != nil {
|
||||||
|
logger.LogIf(r.Context(), err)
|
||||||
|
s.writeErrorResponse(w, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
setEventStreamHeaders(w)
|
setEventStreamHeaders(w)
|
||||||
|
|
||||||
var cache dataUsageCache
|
var cache dataUsageCache
|
||||||
err := cache.deserialize(r.Body)
|
err = cache.deserialize(r.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.LogIf(r.Context(), err)
|
logger.LogIf(r.Context(), err)
|
||||||
s.writeErrorResponse(w, err)
|
s.writeErrorResponse(w, err)
|
||||||
@ -220,7 +228,7 @@ func (s *storageRESTServer) NSScannerHandler(w http.ResponseWriter, r *http.Requ
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
usageInfo, err := s.storage.NSScanner(ctx, cache, updates)
|
usageInfo, err := s.storage.NSScanner(ctx, cache, updates, madmin.HealScanMode(scanMode))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
respW.Flush()
|
respW.Flush()
|
||||||
resp.CloseWithError(err)
|
resp.CloseWithError(err)
|
||||||
|
@ -153,7 +153,7 @@ func (p *xlStorageDiskIDCheck) Healing() *healingTracker {
|
|||||||
return p.storage.Healing()
|
return p.storage.Healing()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *xlStorageDiskIDCheck) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry) (dataUsageCache, error) {
|
func (p *xlStorageDiskIDCheck) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode) (dataUsageCache, error) {
|
||||||
if contextCanceled(ctx) {
|
if contextCanceled(ctx) {
|
||||||
return dataUsageCache{}, ctx.Err()
|
return dataUsageCache{}, ctx.Err()
|
||||||
}
|
}
|
||||||
@ -161,7 +161,7 @@ func (p *xlStorageDiskIDCheck) NSScanner(ctx context.Context, cache dataUsageCac
|
|||||||
if err := p.checkDiskStale(); err != nil {
|
if err := p.checkDiskStale(); err != nil {
|
||||||
return dataUsageCache{}, err
|
return dataUsageCache{}, err
|
||||||
}
|
}
|
||||||
return p.storage.NSScanner(ctx, cache, updates)
|
return p.storage.NSScanner(ctx, cache, updates, scanMode)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *xlStorageDiskIDCheck) GetDiskLoc() (poolIdx, setIdx, diskIdx int) {
|
func (p *xlStorageDiskIDCheck) GetDiskLoc() (poolIdx, setIdx, diskIdx int) {
|
||||||
|
@ -38,6 +38,7 @@ import (
|
|||||||
|
|
||||||
"github.com/dustin/go-humanize"
|
"github.com/dustin/go-humanize"
|
||||||
jsoniter "github.com/json-iterator/go"
|
jsoniter "github.com/json-iterator/go"
|
||||||
|
"github.com/minio/madmin-go"
|
||||||
"github.com/minio/minio/internal/bucket/lifecycle"
|
"github.com/minio/minio/internal/bucket/lifecycle"
|
||||||
"github.com/minio/minio/internal/color"
|
"github.com/minio/minio/internal/color"
|
||||||
"github.com/minio/minio/internal/disk"
|
"github.com/minio/minio/internal/disk"
|
||||||
@ -409,7 +410,7 @@ func (s *xlStorage) readMetadata(ctx context.Context, itemPath string) ([]byte,
|
|||||||
return buf, err
|
return buf, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry) (dataUsageCache, error) {
|
func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode) (dataUsageCache, error) {
|
||||||
// Updates must be closed before we return.
|
// Updates must be closed before we return.
|
||||||
defer close(updates)
|
defer close(updates)
|
||||||
var lc *lifecycle.Lifecycle
|
var lc *lifecycle.Lifecycle
|
||||||
@ -524,7 +525,7 @@ func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache, updates
|
|||||||
item.applyTierObjSweep(ctx, objAPI, oi)
|
item.applyTierObjSweep(ctx, objAPI, oi)
|
||||||
}
|
}
|
||||||
return sizeS, nil
|
return sizeS, nil
|
||||||
})
|
}, scanMode)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return dataUsageInfo, err
|
return dataUsageInfo, err
|
||||||
}
|
}
|
||||||
|
@ -81,7 +81,7 @@ func ParseBool(str string) (bool, error) {
|
|||||||
if strings.EqualFold(str, "disabled") {
|
if strings.EqualFold(str, "disabled") {
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
return false, fmt.Errorf("ParseBool: parsing '%s': %s", str, strconv.ErrSyntax)
|
return false, fmt.Errorf("ParseBool: parsing '%s': %w", str, strconv.ErrSyntax)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseBoolFlag - parses string into BoolFlag.
|
// ParseBoolFlag - parses string into BoolFlag.
|
||||||
|
@ -18,12 +18,13 @@
|
|||||||
package heal
|
package heal
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/minio/madmin-go"
|
|
||||||
"github.com/minio/minio/internal/config"
|
"github.com/minio/minio/internal/config"
|
||||||
"github.com/minio/pkg/env"
|
"github.com/minio/pkg/env"
|
||||||
)
|
)
|
||||||
@ -44,20 +45,27 @@ var configMutex sync.RWMutex
|
|||||||
// Config represents the heal settings.
|
// Config represents the heal settings.
|
||||||
type Config struct {
|
type Config struct {
|
||||||
// Bitrot will perform bitrot scan on local disk when checking objects.
|
// Bitrot will perform bitrot scan on local disk when checking objects.
|
||||||
Bitrot bool `json:"bitrotscan"`
|
Bitrot string `json:"bitrotscan"`
|
||||||
|
|
||||||
// maximum sleep duration between objects to slow down heal operation.
|
// maximum sleep duration between objects to slow down heal operation.
|
||||||
Sleep time.Duration `json:"sleep"`
|
Sleep time.Duration `json:"sleep"`
|
||||||
IOCount int `json:"iocount"`
|
IOCount int `json:"iocount"`
|
||||||
|
|
||||||
|
// Cached value from Bitrot field
|
||||||
|
cache struct {
|
||||||
|
// -1: bitrot enabled, 0: bitrot disabled, > 0: bitrot cycle
|
||||||
|
bitrotCycle time.Duration
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ScanMode returns configured scan mode
|
// BitrotScanCycle returns the configured cycle for the scanner healing
|
||||||
func (opts Config) ScanMode() madmin.HealScanMode {
|
// -1 for not enabled
|
||||||
|
// 0 for contiunous bitrot scanning
|
||||||
|
// >0 interval duration between cycles
|
||||||
|
func (opts Config) BitrotScanCycle() (d time.Duration) {
|
||||||
configMutex.RLock()
|
configMutex.RLock()
|
||||||
defer configMutex.RUnlock()
|
defer configMutex.RUnlock()
|
||||||
if opts.Bitrot {
|
return opts.cache.bitrotCycle
|
||||||
return madmin.HealDeepScan
|
|
||||||
}
|
|
||||||
return madmin.HealNormalScan
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait waits for IOCount to go down or max sleep to elapse before returning.
|
// Wait waits for IOCount to go down or max sleep to elapse before returning.
|
||||||
@ -103,6 +111,8 @@ func (opts *Config) Update(nopts Config) {
|
|||||||
opts.Bitrot = nopts.Bitrot
|
opts.Bitrot = nopts.Bitrot
|
||||||
opts.IOCount = nopts.IOCount
|
opts.IOCount = nopts.IOCount
|
||||||
opts.Sleep = nopts.Sleep
|
opts.Sleep = nopts.Sleep
|
||||||
|
|
||||||
|
opts.cache.bitrotCycle, _ = parseBitrotConfig(nopts.Bitrot)
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -126,9 +136,9 @@ var (
|
|||||||
Help = config.HelpKVS{
|
Help = config.HelpKVS{
|
||||||
config.HelpKV{
|
config.HelpKV{
|
||||||
Key: Bitrot,
|
Key: Bitrot,
|
||||||
Description: `perform bitrot scan on disks when checking objects during scanner`,
|
Description: `perform bitrot scan on disks when checking objects during scanner. e.g 6m`,
|
||||||
Optional: true,
|
Optional: true,
|
||||||
Type: "on|off",
|
Type: "on|off|duration",
|
||||||
},
|
},
|
||||||
config.HelpKV{
|
config.HelpKV{
|
||||||
Key: Sleep,
|
Key: Sleep,
|
||||||
@ -145,12 +155,44 @@ var (
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const minimumBitrotCycleInMonths = 1
|
||||||
|
|
||||||
|
func parseBitrotConfig(s string) (time.Duration, error) {
|
||||||
|
// Try to parse as a boolean
|
||||||
|
enabled, err := config.ParseBool(s)
|
||||||
|
if err == nil {
|
||||||
|
switch enabled {
|
||||||
|
case true:
|
||||||
|
return 0, nil
|
||||||
|
case false:
|
||||||
|
return -1, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to parse as a number of months
|
||||||
|
if !strings.HasSuffix(s, "m") {
|
||||||
|
return -1, errors.New("unknown format")
|
||||||
|
}
|
||||||
|
|
||||||
|
months, err := strconv.Atoi(strings.TrimSuffix(s, "m"))
|
||||||
|
if err != nil {
|
||||||
|
return -1, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if months < minimumBitrotCycleInMonths {
|
||||||
|
return -1, fmt.Errorf("minimum bitrot cycle is %d month(s)", minimumBitrotCycleInMonths)
|
||||||
|
}
|
||||||
|
|
||||||
|
return time.Duration(months) * 30 * 24 * time.Hour, nil
|
||||||
|
}
|
||||||
|
|
||||||
// LookupConfig - lookup config and override with valid environment settings if any.
|
// LookupConfig - lookup config and override with valid environment settings if any.
|
||||||
func LookupConfig(kvs config.KVS) (cfg Config, err error) {
|
func LookupConfig(kvs config.KVS) (cfg Config, err error) {
|
||||||
if err = config.CheckValidKeys(config.HealSubSys, kvs, DefaultKVS); err != nil {
|
if err = config.CheckValidKeys(config.HealSubSys, kvs, DefaultKVS); err != nil {
|
||||||
return cfg, err
|
return cfg, err
|
||||||
}
|
}
|
||||||
cfg.Bitrot, err = config.ParseBool(env.Get(EnvBitrot, kvs.GetWithDefault(Bitrot, DefaultKVS)))
|
cfg.Bitrot = env.Get(EnvBitrot, kvs.GetWithDefault(Bitrot, DefaultKVS))
|
||||||
|
_, err = parseBitrotConfig(cfg.Bitrot)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return cfg, fmt.Errorf("'heal:bitrotscan' value invalid: %w", err)
|
return cfg, fmt.Errorf("'heal:bitrotscan' value invalid: %w", err)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user