mirror of
https://github.com/minio/minio.git
synced 2025-11-10 22:10:12 -05:00
heal: calculate the number of workers based on NRRequests (#17945)
This commit is contained in:
@@ -683,13 +683,6 @@ func (h *healSequence) healSequenceStart(objAPI ObjectLayer) {
|
||||
}
|
||||
}
|
||||
|
||||
func (h *healSequence) logHeal(healType madmin.HealItemType) {
|
||||
h.mutex.Lock()
|
||||
h.scannedItemsMap[healType]++
|
||||
h.lastHealActivity = UTCNow()
|
||||
h.mutex.Unlock()
|
||||
}
|
||||
|
||||
func (h *healSequence) queueHealTask(source healSource, healType madmin.HealItemType) error {
|
||||
// Send heal request
|
||||
task := healTask{
|
||||
|
||||
@@ -20,6 +20,7 @@ package cmd
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
@@ -30,6 +31,7 @@ import (
|
||||
"github.com/minio/minio/internal/logger"
|
||||
"github.com/minio/pkg/v2/console"
|
||||
"github.com/minio/pkg/v2/wildcard"
|
||||
"github.com/minio/pkg/v2/workers"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -132,30 +134,8 @@ func getLocalBackgroundHealStatus(ctx context.Context, o ObjectLayer) (madmin.Bg
|
||||
return status, true
|
||||
}
|
||||
|
||||
func mustGetHealSequence(ctx context.Context) *healSequence {
|
||||
// Get background heal sequence to send elements to heal
|
||||
for {
|
||||
globalHealStateLK.RLock()
|
||||
hstate := globalBackgroundHealState
|
||||
globalHealStateLK.RUnlock()
|
||||
|
||||
if hstate == nil {
|
||||
time.Sleep(time.Second)
|
||||
continue
|
||||
}
|
||||
|
||||
bgSeq, ok := hstate.getHealSequenceByToken(bgHealingUUID)
|
||||
if !ok {
|
||||
time.Sleep(time.Second)
|
||||
continue
|
||||
}
|
||||
return bgSeq
|
||||
}
|
||||
}
|
||||
|
||||
// healErasureSet lists and heals all objects in a specific erasure set
|
||||
func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string, tracker *healingTracker) error {
|
||||
bgSeq := mustGetHealSequence(ctx)
|
||||
scanMode := madmin.HealNormalScan
|
||||
|
||||
// Make sure to copy since `buckets slice`
|
||||
@@ -173,6 +153,30 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
||||
}
|
||||
}
|
||||
|
||||
info, err := tracker.disk.DiskInfo(ctx, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to get disk information before healing it: %w", err)
|
||||
}
|
||||
|
||||
var numHealers uint64
|
||||
|
||||
if numCores := uint64(runtime.GOMAXPROCS(0)); info.NRRequests > numCores {
|
||||
numHealers = numCores / 4
|
||||
} else {
|
||||
numHealers = info.NRRequests / 4
|
||||
}
|
||||
if numHealers < 4 {
|
||||
numHealers = 4
|
||||
}
|
||||
// allow overriding this value as well..
|
||||
if v := globalHealConfig.GetWorkers(); v > 0 {
|
||||
numHealers = uint64(v)
|
||||
}
|
||||
|
||||
logger.Info(fmt.Sprintf("Healing drive '%s' - use %d parallel workers.", tracker.disk.String(), numHealers))
|
||||
|
||||
jt, _ := workers.New(int(numHealers))
|
||||
|
||||
var retErr error
|
||||
// Heal all buckets with all objects
|
||||
for _, bucket := range healBuckets {
|
||||
@@ -267,6 +271,8 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
||||
|
||||
// Note: updates from healEntry to tracker must be sent on results channel.
|
||||
healEntry := func(bucket string, entry metaCacheEntry) {
|
||||
defer jt.Give()
|
||||
|
||||
if entry.name == "" && len(entry.metadata) == 0 {
|
||||
// ignore entries that don't have metadata.
|
||||
return
|
||||
@@ -291,14 +297,17 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
||||
}
|
||||
}
|
||||
|
||||
// erasureObjects layer needs object names to be encoded
|
||||
encodedEntryName := encodeDirObject(entry.name)
|
||||
|
||||
var result healEntryResult
|
||||
fivs, err := entry.fileInfoVersions(bucket)
|
||||
if err != nil {
|
||||
err := bgSeq.queueHealTask(healSource{
|
||||
bucket: bucket,
|
||||
object: entry.name,
|
||||
versionID: "",
|
||||
}, madmin.HealItemObject)
|
||||
_, err := er.HealObject(ctx, bucket, encodedEntryName, "",
|
||||
madmin.HealOpts{
|
||||
ScanMode: scanMode,
|
||||
Remove: healDeleteDangling,
|
||||
})
|
||||
if err != nil {
|
||||
if isErrObjectNotFound(err) {
|
||||
// queueing happens across namespace, ignore
|
||||
@@ -321,11 +330,11 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
||||
if version.ModTime.After(tracker.Started) {
|
||||
continue
|
||||
}
|
||||
if err := bgSeq.queueHealTask(healSource{
|
||||
bucket: bucket,
|
||||
object: version.Name,
|
||||
versionID: version.VersionID,
|
||||
}, madmin.HealItemObject); err != nil {
|
||||
if _, err := er.HealObject(ctx, bucket, encodedEntryName,
|
||||
version.VersionID, madmin.HealOpts{
|
||||
ScanMode: scanMode,
|
||||
Remove: healDeleteDangling,
|
||||
}); err != nil {
|
||||
if isErrObjectNotFound(err) {
|
||||
// queueing happens across namespace, ignore
|
||||
// objects that are not found.
|
||||
@@ -344,7 +353,6 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
||||
} else {
|
||||
result = healEntrySuccess(uint64(version.Size))
|
||||
}
|
||||
bgSeq.logHeal(madmin.HealItemObject)
|
||||
|
||||
if !send(result) {
|
||||
return
|
||||
@@ -382,7 +390,8 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
||||
minDisks: 1,
|
||||
reportNotFound: false,
|
||||
agreed: func(entry metaCacheEntry) {
|
||||
healEntry(actualBucket, entry)
|
||||
jt.Take()
|
||||
go healEntry(actualBucket, entry)
|
||||
},
|
||||
partial: func(entries metaCacheEntries, _ []error) {
|
||||
entry, ok := entries.resolve(&resolver)
|
||||
@@ -391,10 +400,12 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
||||
// proceed to heal nonetheless.
|
||||
entry, _ = entries.firstFound()
|
||||
}
|
||||
healEntry(actualBucket, *entry)
|
||||
jt.Take()
|
||||
go healEntry(actualBucket, *entry)
|
||||
},
|
||||
finished: nil,
|
||||
})
|
||||
jt.Wait() // synchronize all the concurrent heal jobs
|
||||
close(results)
|
||||
if err != nil {
|
||||
// Set this such that when we return this function
|
||||
|
||||
@@ -46,6 +46,7 @@ type DiskInfo struct {
|
||||
FreeInodes uint64
|
||||
Major uint32
|
||||
Minor uint32
|
||||
NRRequests uint64
|
||||
FSType string
|
||||
RootDisk bool
|
||||
Healing bool
|
||||
|
||||
@@ -14,8 +14,8 @@ func (z *DiskInfo) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
err = msgp.WrapError(err)
|
||||
return
|
||||
}
|
||||
if zb0001 != 17 {
|
||||
err = msgp.ArrayError{Wanted: 17, Got: zb0001}
|
||||
if zb0001 != 18 {
|
||||
err = msgp.ArrayError{Wanted: 18, Got: zb0001}
|
||||
return
|
||||
}
|
||||
z.Total, err = dc.ReadUint64()
|
||||
@@ -53,6 +53,11 @@ func (z *DiskInfo) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
err = msgp.WrapError(err, "Minor")
|
||||
return
|
||||
}
|
||||
z.NRRequests, err = dc.ReadUint64()
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "NRRequests")
|
||||
return
|
||||
}
|
||||
z.FSType, err = dc.ReadString()
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "FSType")
|
||||
@@ -108,8 +113,8 @@ func (z *DiskInfo) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
func (z *DiskInfo) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// array header, size 17
|
||||
err = en.Append(0xdc, 0x0, 0x11)
|
||||
// array header, size 18
|
||||
err = en.Append(0xdc, 0x0, 0x12)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@@ -148,6 +153,11 @@ func (z *DiskInfo) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
err = msgp.WrapError(err, "Minor")
|
||||
return
|
||||
}
|
||||
err = en.WriteUint64(z.NRRequests)
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "NRRequests")
|
||||
return
|
||||
}
|
||||
err = en.WriteString(z.FSType)
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "FSType")
|
||||
@@ -204,8 +214,8 @@ func (z *DiskInfo) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
func (z *DiskInfo) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// array header, size 17
|
||||
o = append(o, 0xdc, 0x0, 0x11)
|
||||
// array header, size 18
|
||||
o = append(o, 0xdc, 0x0, 0x12)
|
||||
o = msgp.AppendUint64(o, z.Total)
|
||||
o = msgp.AppendUint64(o, z.Free)
|
||||
o = msgp.AppendUint64(o, z.Used)
|
||||
@@ -213,6 +223,7 @@ func (z *DiskInfo) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.AppendUint64(o, z.FreeInodes)
|
||||
o = msgp.AppendUint32(o, z.Major)
|
||||
o = msgp.AppendUint32(o, z.Minor)
|
||||
o = msgp.AppendUint64(o, z.NRRequests)
|
||||
o = msgp.AppendString(o, z.FSType)
|
||||
o = msgp.AppendBool(o, z.RootDisk)
|
||||
o = msgp.AppendBool(o, z.Healing)
|
||||
@@ -238,8 +249,8 @@ func (z *DiskInfo) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
err = msgp.WrapError(err)
|
||||
return
|
||||
}
|
||||
if zb0001 != 17 {
|
||||
err = msgp.ArrayError{Wanted: 17, Got: zb0001}
|
||||
if zb0001 != 18 {
|
||||
err = msgp.ArrayError{Wanted: 18, Got: zb0001}
|
||||
return
|
||||
}
|
||||
z.Total, bts, err = msgp.ReadUint64Bytes(bts)
|
||||
@@ -277,6 +288,11 @@ func (z *DiskInfo) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
err = msgp.WrapError(err, "Minor")
|
||||
return
|
||||
}
|
||||
z.NRRequests, bts, err = msgp.ReadUint64Bytes(bts)
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "NRRequests")
|
||||
return
|
||||
}
|
||||
z.FSType, bts, err = msgp.ReadStringBytes(bts)
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "FSType")
|
||||
@@ -333,7 +349,7 @@ func (z *DiskInfo) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *DiskInfo) Msgsize() (s int) {
|
||||
s = 3 + msgp.Uint64Size + msgp.Uint64Size + msgp.Uint64Size + msgp.Uint64Size + msgp.Uint64Size + msgp.Uint32Size + msgp.Uint32Size + msgp.StringPrefixSize + len(z.FSType) + msgp.BoolSize + msgp.BoolSize + msgp.BoolSize + msgp.StringPrefixSize + len(z.Endpoint) + msgp.StringPrefixSize + len(z.MountPath) + msgp.StringPrefixSize + len(z.ID) + msgp.BoolSize + z.Metrics.Msgsize() + msgp.StringPrefixSize + len(z.Error)
|
||||
s = 3 + msgp.Uint64Size + msgp.Uint64Size + msgp.Uint64Size + msgp.Uint64Size + msgp.Uint64Size + msgp.Uint32Size + msgp.Uint32Size + msgp.Uint64Size + msgp.StringPrefixSize + len(z.FSType) + msgp.BoolSize + msgp.BoolSize + msgp.BoolSize + msgp.StringPrefixSize + len(z.Endpoint) + msgp.StringPrefixSize + len(z.MountPath) + msgp.StringPrefixSize + len(z.ID) + msgp.BoolSize + z.Metrics.Msgsize() + msgp.StringPrefixSize + len(z.Error)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -114,6 +114,8 @@ type xlStorage struct {
|
||||
|
||||
formatData []byte
|
||||
|
||||
nrRequests uint64
|
||||
|
||||
// mutex to prevent concurrent read operations overloading walks.
|
||||
rotational bool
|
||||
walkMu *sync.Mutex
|
||||
@@ -244,6 +246,11 @@ func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) {
|
||||
diskIndex: -1,
|
||||
}
|
||||
|
||||
// Sanitize before setting it
|
||||
if info.NRRequests > 0 {
|
||||
s.nrRequests = info.NRRequests
|
||||
}
|
||||
|
||||
// We stagger listings only on HDDs.
|
||||
if info.Rotational == nil || *info.Rotational {
|
||||
s.rotational = true
|
||||
@@ -658,6 +665,7 @@ func (s *xlStorage) DiskInfo(_ context.Context, _ bool) (info DiskInfo, err erro
|
||||
dcinfo.UsedInodes = di.Files - di.Ffree
|
||||
dcinfo.FreeInodes = di.Ffree
|
||||
dcinfo.FSType = di.FSType
|
||||
dcinfo.NRRequests = s.nrRequests
|
||||
dcinfo.Rotational = s.rotational
|
||||
diskID, err := s.GetDiskID()
|
||||
// Healing is 'true' when
|
||||
|
||||
Reference in New Issue
Block a user