mirror of
https://github.com/minio/minio.git
synced 2025-02-23 03:22:30 -05:00
Do regular checks for healing status while scanning (#19946)
This commit is contained in:
parent
eb990f64a9
commit
2f9018f03b
@ -464,7 +464,10 @@ func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint
|
||||
}
|
||||
|
||||
// Remove .healing.bin from all disks with similar heal-id
|
||||
disks := z.serverPools[poolIdx].sets[setIdx].getDisks()
|
||||
disks, err := z.GetDisks(poolIdx, setIdx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, disk := range disks {
|
||||
if disk == nil {
|
||||
|
@ -31,6 +31,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/minio/madmin-go/v3"
|
||||
@ -249,7 +250,8 @@ type folderScanner struct {
|
||||
healObjectSelect uint32 // Do a heal check on an object once every n cycles. Must divide into healFolderInclude
|
||||
scanMode madmin.HealScanMode
|
||||
|
||||
weSleep func() bool
|
||||
weSleep func() bool
|
||||
shouldHeal func() bool
|
||||
|
||||
disks []StorageAPI
|
||||
disksQuorum int
|
||||
@ -304,11 +306,12 @@ type folderScanner struct {
|
||||
// The returned cache will always be valid, but may not be updated from the existing.
|
||||
// Before each operation sleepDuration is called which can be used to temporarily halt the scanner.
|
||||
// If the supplied context is canceled the function will return at the first chance.
|
||||
func scanDataFolder(ctx context.Context, disks []StorageAPI, basePath string, healing bool, cache dataUsageCache, getSize getSizeFn, scanMode madmin.HealScanMode, weSleep func() bool) (dataUsageCache, error) {
|
||||
func scanDataFolder(ctx context.Context, disks []StorageAPI, drive *xlStorage, cache dataUsageCache, getSize getSizeFn, scanMode madmin.HealScanMode, weSleep func() bool) (dataUsageCache, error) {
|
||||
switch cache.Info.Name {
|
||||
case "", dataUsageRoot:
|
||||
return cache, errors.New("internal error: root scan attempted")
|
||||
}
|
||||
basePath := drive.drivePath
|
||||
updatePath, closeDisk := globalScannerMetrics.currentPathUpdater(basePath, cache.Info.Name)
|
||||
defer closeDisk()
|
||||
|
||||
@ -319,7 +322,7 @@ func scanDataFolder(ctx context.Context, disks []StorageAPI, basePath string, he
|
||||
newCache: dataUsageCache{Info: cache.Info},
|
||||
updateCache: dataUsageCache{Info: cache.Info},
|
||||
dataUsageScannerDebug: false,
|
||||
healObjectSelect: healObjectSelectProb,
|
||||
healObjectSelect: 0,
|
||||
scanMode: scanMode,
|
||||
weSleep: weSleep,
|
||||
updates: cache.Info.updates,
|
||||
@ -328,6 +331,32 @@ func scanDataFolder(ctx context.Context, disks []StorageAPI, basePath string, he
|
||||
disksQuorum: len(disks) / 2,
|
||||
}
|
||||
|
||||
var skipHeal atomic.Bool
|
||||
if globalIsErasure || cache.Info.SkipHealing {
|
||||
skipHeal.Store(true)
|
||||
}
|
||||
|
||||
// Check if we should do healing at all.
|
||||
s.shouldHeal = func() bool {
|
||||
if skipHeal.Load() {
|
||||
return false
|
||||
}
|
||||
if s.healObjectSelect == 0 {
|
||||
return false
|
||||
}
|
||||
if di, _ := drive.DiskInfo(ctx, DiskInfoOptions{}); di.Healing {
|
||||
skipHeal.Store(true)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Enable healing in XL mode.
|
||||
if globalIsErasure && !cache.Info.SkipHealing {
|
||||
// Do a heal check on an object once every n cycles. Must divide into healFolderInclude
|
||||
s.healObjectSelect = healObjectSelectProb
|
||||
}
|
||||
|
||||
done := ctx.Done()
|
||||
|
||||
// Read top level in bucket.
|
||||
@ -338,7 +367,7 @@ func scanDataFolder(ctx context.Context, disks []StorageAPI, basePath string, he
|
||||
}
|
||||
root := dataUsageEntry{}
|
||||
folder := cachedFolder{name: cache.Info.Name, objectHealProbDiv: 1}
|
||||
err := s.scanFolder(ctx, folder, healing, &root)
|
||||
err := s.scanFolder(ctx, folder, &root)
|
||||
if err != nil {
|
||||
// No useful information...
|
||||
return cache, err
|
||||
@ -369,7 +398,7 @@ func (f *folderScanner) sendUpdate() {
|
||||
// Files found in the folders will be added to f.newCache.
|
||||
// If final is provided folders will be put into f.newFolders or f.existingFolders.
|
||||
// If final is not provided the folders found are returned from the function.
|
||||
func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, healing bool, into *dataUsageEntry) error {
|
||||
func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, into *dataUsageEntry) error {
|
||||
done := ctx.Done()
|
||||
scannerLogPrefix := color.Green("folder-scanner:")
|
||||
|
||||
@ -476,14 +505,9 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, hea
|
||||
replication: replicationCfg,
|
||||
}
|
||||
|
||||
item.heal.enabled = thisHash.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure
|
||||
item.heal.enabled = thisHash.modAlt(f.oldCache.Info.NextCycle/folder.objectHealProbDiv, f.healObjectSelect/folder.objectHealProbDiv) && f.shouldHeal()
|
||||
item.heal.bitrot = f.scanMode == madmin.HealDeepScan
|
||||
|
||||
// if the drive belongs to an erasure set
|
||||
// that is already being healed, skip the
|
||||
// healing attempt on this drive.
|
||||
item.heal.enabled = item.heal.enabled && !healing
|
||||
|
||||
sz, err := f.getSize(item)
|
||||
if err != nil && err != errIgnoreFileContrib {
|
||||
wait() // wait to proceed to next entry.
|
||||
@ -565,7 +589,7 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, hea
|
||||
if !into.Compacted {
|
||||
dst = &dataUsageEntry{Compacted: false}
|
||||
}
|
||||
if err := f.scanFolder(ctx, folder, healing, dst); err != nil {
|
||||
if err := f.scanFolder(ctx, folder, dst); err != nil {
|
||||
return
|
||||
}
|
||||
if !into.Compacted {
|
||||
@ -646,8 +670,8 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, hea
|
||||
}
|
||||
|
||||
// Scan for healing
|
||||
if healing || len(abandonedChildren) == 0 {
|
||||
// if disks are already healing or we have no abandoned childrens do not need to heal
|
||||
if len(abandonedChildren) == 0 || !f.shouldHeal() {
|
||||
// If we are not heal scanning, return now.
|
||||
break
|
||||
}
|
||||
|
||||
@ -681,6 +705,9 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, hea
|
||||
|
||||
healObjectsPrefix := color.Green("healObjects:")
|
||||
for k := range abandonedChildren {
|
||||
if !f.shouldHeal() {
|
||||
break
|
||||
}
|
||||
bucket, prefix := path2BucketObject(k)
|
||||
stopFn := globalScannerMetrics.time(scannerMetricCheckMissing)
|
||||
f.updateCurrentPath(k)
|
||||
@ -714,6 +741,10 @@ func (f *folderScanner) scanFolder(ctx context.Context, folder cachedFolder, hea
|
||||
},
|
||||
// Some disks have data for this.
|
||||
partial: func(entries metaCacheEntries, errs []error) {
|
||||
if !f.shouldHeal() {
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
entry, ok := entries.resolve(&resolver)
|
||||
if !ok {
|
||||
// check if we can get one entry at least
|
||||
|
@ -352,6 +352,9 @@ type dataUsageCacheInfo struct {
|
||||
Name string
|
||||
NextCycle uint32
|
||||
LastUpdate time.Time
|
||||
// indicates if the disk is being healed and scanner
|
||||
// should skip healing the disk
|
||||
SkipHealing bool
|
||||
|
||||
// Active lifecycle, if any on the bucket
|
||||
lifeCycle *lifecycle.Lifecycle `msg:"-"`
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -26,6 +26,9 @@ import (
|
||||
"path"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/minio/minio/internal/cachevalue"
|
||||
)
|
||||
|
||||
type usageTestFile struct {
|
||||
@ -61,10 +64,13 @@ func TestDataUsageUpdate(t *testing.T) {
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
xls := xlStorage{drivePath: base, diskInfoCache: cachevalue.New[DiskInfo]()}
|
||||
xls.diskInfoCache.InitOnce(time.Second, cachevalue.Opts{}, func(ctx context.Context) (DiskInfo, error) {
|
||||
return DiskInfo{Total: 1 << 40, Free: 1 << 40}, nil
|
||||
})
|
||||
weSleep := func() bool { return false }
|
||||
|
||||
got, err := scanDataFolder(context.Background(), nil, base, false, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize, 0, weSleep)
|
||||
got, err := scanDataFolder(context.Background(), nil, &xls, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize, 0, weSleep)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -174,7 +180,7 @@ func TestDataUsageUpdate(t *testing.T) {
|
||||
}
|
||||
// Changed dir must be picked up in this many cycles.
|
||||
for i := 0; i < dataUsageUpdateDirCycles; i++ {
|
||||
got, err = scanDataFolder(context.Background(), nil, base, false, got, getSize, 0, weSleep)
|
||||
got, err = scanDataFolder(context.Background(), nil, &xls, got, getSize, 0, weSleep)
|
||||
got.Info.NextCycle++
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@ -283,8 +289,12 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
|
||||
}
|
||||
|
||||
weSleep := func() bool { return false }
|
||||
xls := xlStorage{drivePath: base, diskInfoCache: cachevalue.New[DiskInfo]()}
|
||||
xls.diskInfoCache.InitOnce(time.Second, cachevalue.Opts{}, func(ctx context.Context) (DiskInfo, error) {
|
||||
return DiskInfo{Total: 1 << 40, Free: 1 << 40}, nil
|
||||
})
|
||||
|
||||
got, err := scanDataFolder(context.Background(), nil, base, false, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize, 0, weSleep)
|
||||
got, err := scanDataFolder(context.Background(), nil, &xls, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize, 0, weSleep)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -419,7 +429,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
|
||||
}
|
||||
// Changed dir must be picked up in this many cycles.
|
||||
for i := 0; i < dataUsageUpdateDirCycles; i++ {
|
||||
got, err = scanDataFolder(context.Background(), nil, base, false, got, getSize, 0, weSleep)
|
||||
got, err = scanDataFolder(context.Background(), nil, &xls, got, getSize, 0, weSleep)
|
||||
got.Info.NextCycle++
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@ -567,8 +577,12 @@ func TestDataUsageCacheSerialize(t *testing.T) {
|
||||
}
|
||||
return
|
||||
}
|
||||
xls := xlStorage{drivePath: base, diskInfoCache: cachevalue.New[DiskInfo]()}
|
||||
xls.diskInfoCache.InitOnce(time.Second, cachevalue.Opts{}, func(ctx context.Context) (DiskInfo, error) {
|
||||
return DiskInfo{Total: 1 << 40, Free: 1 << 40}, nil
|
||||
})
|
||||
weSleep := func() bool { return false }
|
||||
want, err := scanDataFolder(context.Background(), nil, base, false, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize, 0, weSleep)
|
||||
want, err := scanDataFolder(context.Background(), nil, &xls, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize, 0, weSleep)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -233,7 +233,7 @@ func TestListOnlineDisks(t *testing.T) {
|
||||
data := bytes.Repeat([]byte("a"), smallFileThreshold*16)
|
||||
z := obj.(*erasureServerPools)
|
||||
|
||||
erasureDisks, _, err := z.GetDisks(0, 0)
|
||||
erasureDisks, err := z.GetDisks(0, 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -409,7 +409,7 @@ func TestListOnlineDisksSmallObjects(t *testing.T) {
|
||||
data := bytes.Repeat([]byte("a"), smallFileThreshold/2)
|
||||
z := obj.(*erasureServerPools)
|
||||
|
||||
erasureDisks, _, err := z.GetDisks(0, 0)
|
||||
erasureDisks, err := z.GetDisks(0, 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -302,12 +302,11 @@ func (z *erasureServerPools) GetRawData(ctx context.Context, volume, file string
|
||||
}
|
||||
|
||||
// Return the disks belonging to the poolIdx, and setIdx.
|
||||
func (z *erasureServerPools) GetDisks(poolIdx, setIdx int) ([]StorageAPI, bool, error) {
|
||||
func (z *erasureServerPools) GetDisks(poolIdx, setIdx int) ([]StorageAPI, error) {
|
||||
if poolIdx < len(z.serverPools) && setIdx < len(z.serverPools[poolIdx].sets) {
|
||||
disks, healing := z.serverPools[poolIdx].sets[setIdx].getOnlineDisksWithHealing(true)
|
||||
return disks, healing, nil
|
||||
return z.serverPools[poolIdx].sets[setIdx].getDisks(), nil
|
||||
}
|
||||
return nil, false, fmt.Errorf("Matching pool %s, set %s not found", humanize.Ordinal(poolIdx+1), humanize.Ordinal(setIdx+1))
|
||||
return nil, fmt.Errorf("Matching pool %s, set %s not found", humanize.Ordinal(poolIdx+1), humanize.Ordinal(setIdx+1))
|
||||
}
|
||||
|
||||
// Return the count of disks in each pool
|
||||
|
@ -381,7 +381,7 @@ func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, wa
|
||||
}
|
||||
|
||||
// Collect disks we can use.
|
||||
disks, _ := er.getOnlineDisksWithHealing(false)
|
||||
disks, healing := er.getOnlineDisksWithHealing(false)
|
||||
if len(disks) == 0 {
|
||||
scannerLogIf(ctx, errors.New("data-scanner: all drives are offline or being healed, skipping scanner cycle"))
|
||||
return nil
|
||||
@ -497,6 +497,7 @@ func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, wa
|
||||
if cache.Info.Name == "" {
|
||||
cache.Info.Name = bucket.Name
|
||||
}
|
||||
cache.Info.SkipHealing = healing
|
||||
cache.Info.NextCycle = wantCycle
|
||||
if cache.Info.Name != bucket.Name {
|
||||
cache.Info = dataUsageCacheInfo{
|
||||
|
@ -287,8 +287,8 @@ type ObjectLayer interface {
|
||||
AbortMultipartUpload(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) error
|
||||
CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (objInfo ObjectInfo, err error)
|
||||
|
||||
GetDisks(poolIdx, setIdx int) ([]StorageAPI, bool, error) // return the disks belonging to pool and set.
|
||||
SetDriveCounts() []int // list of erasure stripe size for each pool in order.
|
||||
GetDisks(poolIdx, setIdx int) ([]StorageAPI, error) // return the disks belonging to pool and set.
|
||||
SetDriveCounts() []int // list of erasure stripe size for each pool in order.
|
||||
|
||||
// Healing operations.
|
||||
HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error)
|
||||
|
@ -554,14 +554,14 @@ func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache, updates
|
||||
|
||||
poolIdx, setIdx, _ := s.GetDiskLoc()
|
||||
|
||||
disks, healing, err := objAPI.GetDisks(poolIdx, setIdx)
|
||||
disks, err := objAPI.GetDisks(poolIdx, setIdx)
|
||||
if err != nil {
|
||||
return cache, err
|
||||
}
|
||||
|
||||
cache.Info.updates = updates
|
||||
|
||||
dataUsageInfo, err := scanDataFolder(ctx, disks, s.drivePath, healing, cache, func(item scannerItem) (sizeSummary, error) {
|
||||
dataUsageInfo, err := scanDataFolder(ctx, disks, s, cache, func(item scannerItem) (sizeSummary, error) {
|
||||
// Look for `xl.meta/xl.json' at the leaf.
|
||||
if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) &&
|
||||
!strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user