fix: make healObject() make non-blocking (#13071)

healObject() should be non-blocking to ensure
that scanner is not blocked for a long time,
this adversely affects performance of the scanner
and also affects the way usage is updated
subsequently.

This PR allows for a non-blocking behavior for
healing, dropping operations that cannot be queued
anymore.
This commit is contained in:
Harshavardhana 2021-08-25 17:46:20 -07:00 committed by GitHub
parent 6e5f83c45b
commit 0559f46bbb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 83 additions and 29 deletions

View File

@ -715,9 +715,6 @@ func (h *healSequence) queueHealTask(source healSource, healType madmin.HealItem
task.opts.ScanMode = madmin.HealDeepScan task.opts.ScanMode = madmin.HealDeepScan
} }
// Wait and proceed if there are active requests
waitForLowHTTPReq(opts.IOCount, opts.Sleep)
h.mutex.Lock() h.mutex.Lock()
h.scannedItemsMap[healType]++ h.scannedItemsMap[healType]++
h.lastHealActivity = UTCNow() h.lastHealActivity = UTCNow()
@ -963,5 +960,9 @@ func (h *healSequence) healObject(bucket, object, versionID string) error {
object: object, object: object,
versionID: versionID, versionID: versionID,
}, madmin.HealItemObject) }, madmin.HealItemObject)
// Wait and proceed if there are active requests
waitForLowHTTPReq()
return err return err
} }

View File

@ -52,10 +52,17 @@ type healRoutine struct {
// Add a new task in the tasks queue // Add a new task in the tasks queue
func (h *healRoutine) queueHealTask(task healTask) { func (h *healRoutine) queueHealTask(task healTask) {
h.tasks <- task select {
case h.tasks <- task:
default:
}
} }
func waitForLowHTTPReq(maxIO int, maxWait time.Duration) { func waitForLowHTTPReq() {
globalHealConfigMu.Lock()
maxIO, maxWait := globalHealConfig.IOCount, globalHealConfig.Sleep
globalHealConfigMu.Unlock()
// No need to wait run at full speed. // No need to wait run at full speed.
if maxIO <= 0 { if maxIO <= 0 {
return return
@ -115,7 +122,11 @@ func (h *healRoutine) run(ctx context.Context, objAPI ObjectLayer) {
res, err = objAPI.HealObject(ctx, task.bucket, task.object, task.versionID, task.opts) res, err = objAPI.HealObject(ctx, task.bucket, task.object, task.versionID, task.opts)
} }
} }
task.responseCh <- healResult{result: res, err: err}
select {
case task.responseCh <- healResult{result: res, err: err}:
default:
}
case <-h.doneCh: case <-h.doneCh:
return return
@ -127,7 +138,7 @@ func (h *healRoutine) run(ctx context.Context, objAPI ObjectLayer) {
func newHealRoutine() *healRoutine { func newHealRoutine() *healRoutine {
return &healRoutine{ return &healRoutine{
tasks: make(chan healTask), tasks: make(chan healTask, 50000),
doneCh: make(chan struct{}), doneCh: make(chan struct{}),
} }

View File

@ -1672,7 +1672,7 @@ func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix str
cancel() cancel()
return return
} }
waitForLowHTTPReq(globalHealConfig.IOCount, globalHealConfig.Sleep)
for _, version := range fivs.Versions { for _, version := range fivs.Versions {
if err := healObject(bucket, version.Name, version.VersionID); err != nil { if err := healObject(bucket, version.Name, version.VersionID); err != nil {
errCh <- err errCh <- err

View File

@ -41,15 +41,24 @@ func newBgHealSequence() *healSequence {
reqInfo := &logger.ReqInfo{API: "BackgroundHeal"} reqInfo := &logger.ReqInfo{API: "BackgroundHeal"}
ctx, cancelCtx := context.WithCancel(logger.SetReqInfo(GlobalContext, reqInfo)) ctx, cancelCtx := context.WithCancel(logger.SetReqInfo(GlobalContext, reqInfo))
globalHealConfigMu.Lock()
opts := globalHealConfig
globalHealConfigMu.Unlock()
scanMode := madmin.HealNormalScan
if opts.Bitrot {
scanMode = madmin.HealDeepScan
}
hs := madmin.HealOpts{ hs := madmin.HealOpts{
// Remove objects that do not have read-quorum // Remove objects that do not have read-quorum
Remove: true, Remove: healDeleteDangling,
ScanMode: madmin.HealNormalScan, ScanMode: scanMode,
} }
return &healSequence{ return &healSequence{
sourceCh: make(chan healSource), sourceCh: make(chan healSource, 50000),
respCh: make(chan healResult), respCh: make(chan healResult, 50000),
startTime: UTCNow(), startTime: UTCNow(),
clientToken: bgHealingUUID, clientToken: bgHealingUUID,
// run-background heal with reserved bucket // run-background heal with reserved bucket
@ -170,6 +179,15 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
Name: pathJoin(minioMetaBucket, minioConfigPrefix), Name: pathJoin(minioMetaBucket, minioConfigPrefix),
}) })
globalHealConfigMu.Lock()
opts := globalHealConfig
globalHealConfigMu.Unlock()
scanMode := madmin.HealNormalScan
if opts.Bitrot {
scanMode = madmin.HealDeepScan
}
// Heal all buckets with all objects // Heal all buckets with all objects
for _, bucket := range buckets { for _, bucket := range buckets {
if tracker.isHealed(bucket.Name) { if tracker.isHealed(bucket.Name) {
@ -188,7 +206,9 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
tracker.Object = "" tracker.Object = ""
tracker.Bucket = bucket.Name tracker.Bucket = bucket.Name
// Heal current bucket // Heal current bucket
if _, err := er.HealBucket(ctx, bucket.Name, madmin.HealOpts{}); err != nil { if _, err := er.HealBucket(ctx, bucket.Name, madmin.HealOpts{
ScanMode: scanMode,
}); err != nil {
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) { if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
} }
@ -236,10 +256,12 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
return return
} }
waitForLowHTTPReq(globalHealConfig.IOCount, globalHealConfig.Sleep)
for _, version := range fivs.Versions { for _, version := range fivs.Versions {
if _, err := er.HealObject(ctx, bucket.Name, version.Name, version.VersionID, madmin.HealOpts{ if _, err := er.HealObject(ctx, bucket.Name, version.Name, version.VersionID, madmin.HealOpts{
ScanMode: madmin.HealNormalScan, Remove: healDeleteDangling}); err != nil { ScanMode: scanMode,
Remove: healDeleteDangling,
}); err != nil {
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) { if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
// If not deleted, assume they failed. // If not deleted, assume they failed.
tracker.ItemsFailed++ tracker.ItemsFailed++
@ -256,6 +278,9 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
if time.Since(tracker.LastUpdate) > time.Minute { if time.Since(tracker.LastUpdate) > time.Minute {
logger.LogIf(ctx, tracker.update(ctx)) logger.LogIf(ctx, tracker.update(ctx))
} }
// Wait and proceed if there are active requests
waitForLowHTTPReq()
} }
// How to resolve partial results. // How to resolve partial results.
@ -307,15 +332,19 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
func healObject(bucket, object, versionID string, scan madmin.HealScanMode) { func healObject(bucket, object, versionID string, scan madmin.HealScanMode) {
// Get background heal sequence to send elements to heal // Get background heal sequence to send elements to heal
bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID) bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
if ok { if !ok {
bgSeq.sourceCh <- healSource{ return
bucket: bucket, }
object: object, select {
versionID: versionID, case bgSeq.sourceCh <- healSource{
opts: &madmin.HealOpts{ bucket: bucket,
Remove: true, // if found dangling purge it. object: object,
ScanMode: scan, versionID: versionID,
}, opts: &madmin.HealOpts{
} Remove: healDeleteDangling, // if found dangling purge it.
ScanMode: scan,
},
}:
default:
} }
} }

View File

@ -27,8 +27,6 @@ import (
"github.com/minio/minio/internal/logger" "github.com/minio/minio/internal/logger"
) )
var mrfHealingOpts = madmin.HealOpts{ScanMode: madmin.HealNormalScan, Remove: healDeleteDangling}
const ( const (
mrfInfoResetInterval = 10 * time.Second mrfInfoResetInterval = 10 * time.Second
mrfOpsQueueSize = 10000 mrfOpsQueueSize = 10000
@ -185,6 +183,20 @@ func (m *mrfState) healRoutine() {
idler := time.NewTimer(mrfInfoResetInterval) idler := time.NewTimer(mrfInfoResetInterval)
defer idler.Stop() defer idler.Stop()
globalHealConfigMu.Lock()
opts := globalHealConfig
globalHealConfigMu.Unlock()
scanMode := madmin.HealNormalScan
if opts.Bitrot {
scanMode = madmin.HealDeepScan
}
var mrfHealingOpts = madmin.HealOpts{
ScanMode: scanMode,
Remove: healDeleteDangling,
}
for { for {
idler.Reset(mrfInfoResetInterval) idler.Reset(mrfInfoResetInterval)
select { select {
@ -214,7 +226,6 @@ func (m *mrfState) healRoutine() {
// Heal objects // Heal objects
for _, u := range mrfOperations { for _, u := range mrfOperations {
waitForLowHTTPReq(globalHealConfig.IOCount, globalHealConfig.Sleep)
if _, err := m.objectAPI.HealObject(m.ctx, u.bucket, u.object, u.versionID, mrfHealingOpts); err != nil { if _, err := m.objectAPI.HealObject(m.ctx, u.bucket, u.object, u.versionID, mrfHealingOpts); err != nil {
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) { if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
// If not deleted, assume they failed. // If not deleted, assume they failed.
@ -238,6 +249,8 @@ func (m *mrfState) healRoutine() {
delete(m.pendingOps, u) delete(m.pendingOps, u)
m.mu.Unlock() m.mu.Unlock()
} }
waitForLowHTTPReq()
} }
} }
} }

View File

@ -59,7 +59,7 @@ var (
}, },
config.KV{ config.KV{
Key: IOCount, Key: IOCount,
Value: "10", Value: "100",
}, },
} }