fix: make healObject() make non-blocking (#13071)

healObject() should be non-blocking to ensure
that scanner is not blocked for a long time,
this adversely affects performance of the scanner
and also affects the way usage is updated
subsequently.

This PR allows for a non-blocking behavior for
healing, dropping operations that cannot be queued
anymore.
This commit is contained in:
Harshavardhana 2021-08-25 17:46:20 -07:00 committed by GitHub
parent 6e5f83c45b
commit 0559f46bbb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 83 additions and 29 deletions

View File

@ -715,9 +715,6 @@ func (h *healSequence) queueHealTask(source healSource, healType madmin.HealItem
task.opts.ScanMode = madmin.HealDeepScan
}
// Wait and proceed if there are active requests
waitForLowHTTPReq(opts.IOCount, opts.Sleep)
h.mutex.Lock()
h.scannedItemsMap[healType]++
h.lastHealActivity = UTCNow()
@ -963,5 +960,9 @@ func (h *healSequence) healObject(bucket, object, versionID string) error {
object: object,
versionID: versionID,
}, madmin.HealItemObject)
// Wait and proceed if there are active requests
waitForLowHTTPReq()
return err
}

View File

@ -52,10 +52,17 @@ type healRoutine struct {
// Add a new task in the tasks queue
func (h *healRoutine) queueHealTask(task healTask) {
h.tasks <- task
select {
case h.tasks <- task:
default:
}
}
func waitForLowHTTPReq(maxIO int, maxWait time.Duration) {
func waitForLowHTTPReq() {
globalHealConfigMu.Lock()
maxIO, maxWait := globalHealConfig.IOCount, globalHealConfig.Sleep
globalHealConfigMu.Unlock()
// No need to wait run at full speed.
if maxIO <= 0 {
return
@ -115,7 +122,11 @@ func (h *healRoutine) run(ctx context.Context, objAPI ObjectLayer) {
res, err = objAPI.HealObject(ctx, task.bucket, task.object, task.versionID, task.opts)
}
}
task.responseCh <- healResult{result: res, err: err}
select {
case task.responseCh <- healResult{result: res, err: err}:
default:
}
case <-h.doneCh:
return
@ -127,7 +138,7 @@ func (h *healRoutine) run(ctx context.Context, objAPI ObjectLayer) {
func newHealRoutine() *healRoutine {
return &healRoutine{
tasks: make(chan healTask),
tasks: make(chan healTask, 50000),
doneCh: make(chan struct{}),
}

View File

@ -1672,7 +1672,7 @@ func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix str
cancel()
return
}
waitForLowHTTPReq(globalHealConfig.IOCount, globalHealConfig.Sleep)
for _, version := range fivs.Versions {
if err := healObject(bucket, version.Name, version.VersionID); err != nil {
errCh <- err

View File

@ -41,15 +41,24 @@ func newBgHealSequence() *healSequence {
reqInfo := &logger.ReqInfo{API: "BackgroundHeal"}
ctx, cancelCtx := context.WithCancel(logger.SetReqInfo(GlobalContext, reqInfo))
globalHealConfigMu.Lock()
opts := globalHealConfig
globalHealConfigMu.Unlock()
scanMode := madmin.HealNormalScan
if opts.Bitrot {
scanMode = madmin.HealDeepScan
}
hs := madmin.HealOpts{
// Remove objects that do not have read-quorum
Remove: true,
ScanMode: madmin.HealNormalScan,
Remove: healDeleteDangling,
ScanMode: scanMode,
}
return &healSequence{
sourceCh: make(chan healSource),
respCh: make(chan healResult),
sourceCh: make(chan healSource, 50000),
respCh: make(chan healResult, 50000),
startTime: UTCNow(),
clientToken: bgHealingUUID,
// run-background heal with reserved bucket
@ -170,6 +179,15 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
Name: pathJoin(minioMetaBucket, minioConfigPrefix),
})
globalHealConfigMu.Lock()
opts := globalHealConfig
globalHealConfigMu.Unlock()
scanMode := madmin.HealNormalScan
if opts.Bitrot {
scanMode = madmin.HealDeepScan
}
// Heal all buckets with all objects
for _, bucket := range buckets {
if tracker.isHealed(bucket.Name) {
@ -188,7 +206,9 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
tracker.Object = ""
tracker.Bucket = bucket.Name
// Heal current bucket
if _, err := er.HealBucket(ctx, bucket.Name, madmin.HealOpts{}); err != nil {
if _, err := er.HealBucket(ctx, bucket.Name, madmin.HealOpts{
ScanMode: scanMode,
}); err != nil {
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
logger.LogIf(ctx, err)
}
@ -236,10 +256,12 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
logger.LogIf(ctx, err)
return
}
waitForLowHTTPReq(globalHealConfig.IOCount, globalHealConfig.Sleep)
for _, version := range fivs.Versions {
if _, err := er.HealObject(ctx, bucket.Name, version.Name, version.VersionID, madmin.HealOpts{
ScanMode: madmin.HealNormalScan, Remove: healDeleteDangling}); err != nil {
ScanMode: scanMode,
Remove: healDeleteDangling,
}); err != nil {
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
// If not deleted, assume they failed.
tracker.ItemsFailed++
@ -256,6 +278,9 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
if time.Since(tracker.LastUpdate) > time.Minute {
logger.LogIf(ctx, tracker.update(ctx))
}
// Wait and proceed if there are active requests
waitForLowHTTPReq()
}
// How to resolve partial results.
@ -307,15 +332,19 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
func healObject(bucket, object, versionID string, scan madmin.HealScanMode) {
// Get background heal sequence to send elements to heal
bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
if ok {
bgSeq.sourceCh <- healSource{
if !ok {
return
}
select {
case bgSeq.sourceCh <- healSource{
bucket: bucket,
object: object,
versionID: versionID,
opts: &madmin.HealOpts{
Remove: true, // if found dangling purge it.
Remove: healDeleteDangling, // if found dangling purge it.
ScanMode: scan,
},
}
}:
default:
}
}

View File

@ -27,8 +27,6 @@ import (
"github.com/minio/minio/internal/logger"
)
var mrfHealingOpts = madmin.HealOpts{ScanMode: madmin.HealNormalScan, Remove: healDeleteDangling}
const (
mrfInfoResetInterval = 10 * time.Second
mrfOpsQueueSize = 10000
@ -185,6 +183,20 @@ func (m *mrfState) healRoutine() {
idler := time.NewTimer(mrfInfoResetInterval)
defer idler.Stop()
globalHealConfigMu.Lock()
opts := globalHealConfig
globalHealConfigMu.Unlock()
scanMode := madmin.HealNormalScan
if opts.Bitrot {
scanMode = madmin.HealDeepScan
}
var mrfHealingOpts = madmin.HealOpts{
ScanMode: scanMode,
Remove: healDeleteDangling,
}
for {
idler.Reset(mrfInfoResetInterval)
select {
@ -214,7 +226,6 @@ func (m *mrfState) healRoutine() {
// Heal objects
for _, u := range mrfOperations {
waitForLowHTTPReq(globalHealConfig.IOCount, globalHealConfig.Sleep)
if _, err := m.objectAPI.HealObject(m.ctx, u.bucket, u.object, u.versionID, mrfHealingOpts); err != nil {
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
// If not deleted, assume they failed.
@ -238,6 +249,8 @@ func (m *mrfState) healRoutine() {
delete(m.pendingOps, u)
m.mu.Unlock()
}
waitForLowHTTPReq()
}
}
}

View File

@ -59,7 +59,7 @@ var (
},
config.KV{
Key: IOCount,
Value: "10",
Value: "100",
},
}