mirror of
https://github.com/minio/minio.git
synced 2024-12-24 22:25:54 -05:00
fix: make healObject() make non-blocking (#13071)
healObject() should be non-blocking to ensure that scanner is not blocked for a long time, this adversely affects performance of the scanner and also affects the way usage is updated subsequently. This PR allows for a non-blocking behavior for healing, dropping operations that cannot be queued anymore.
This commit is contained in:
parent
6e5f83c45b
commit
0559f46bbb
@ -715,9 +715,6 @@ func (h *healSequence) queueHealTask(source healSource, healType madmin.HealItem
|
|||||||
task.opts.ScanMode = madmin.HealDeepScan
|
task.opts.ScanMode = madmin.HealDeepScan
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait and proceed if there are active requests
|
|
||||||
waitForLowHTTPReq(opts.IOCount, opts.Sleep)
|
|
||||||
|
|
||||||
h.mutex.Lock()
|
h.mutex.Lock()
|
||||||
h.scannedItemsMap[healType]++
|
h.scannedItemsMap[healType]++
|
||||||
h.lastHealActivity = UTCNow()
|
h.lastHealActivity = UTCNow()
|
||||||
@ -963,5 +960,9 @@ func (h *healSequence) healObject(bucket, object, versionID string) error {
|
|||||||
object: object,
|
object: object,
|
||||||
versionID: versionID,
|
versionID: versionID,
|
||||||
}, madmin.HealItemObject)
|
}, madmin.HealItemObject)
|
||||||
|
|
||||||
|
// Wait and proceed if there are active requests
|
||||||
|
waitForLowHTTPReq()
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -52,10 +52,17 @@ type healRoutine struct {
|
|||||||
|
|
||||||
// Add a new task in the tasks queue
|
// Add a new task in the tasks queue
|
||||||
func (h *healRoutine) queueHealTask(task healTask) {
|
func (h *healRoutine) queueHealTask(task healTask) {
|
||||||
h.tasks <- task
|
select {
|
||||||
|
case h.tasks <- task:
|
||||||
|
default:
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func waitForLowHTTPReq(maxIO int, maxWait time.Duration) {
|
func waitForLowHTTPReq() {
|
||||||
|
globalHealConfigMu.Lock()
|
||||||
|
maxIO, maxWait := globalHealConfig.IOCount, globalHealConfig.Sleep
|
||||||
|
globalHealConfigMu.Unlock()
|
||||||
|
|
||||||
// No need to wait run at full speed.
|
// No need to wait run at full speed.
|
||||||
if maxIO <= 0 {
|
if maxIO <= 0 {
|
||||||
return
|
return
|
||||||
@ -115,7 +122,11 @@ func (h *healRoutine) run(ctx context.Context, objAPI ObjectLayer) {
|
|||||||
res, err = objAPI.HealObject(ctx, task.bucket, task.object, task.versionID, task.opts)
|
res, err = objAPI.HealObject(ctx, task.bucket, task.object, task.versionID, task.opts)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
task.responseCh <- healResult{result: res, err: err}
|
|
||||||
|
select {
|
||||||
|
case task.responseCh <- healResult{result: res, err: err}:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
case <-h.doneCh:
|
case <-h.doneCh:
|
||||||
return
|
return
|
||||||
@ -127,7 +138,7 @@ func (h *healRoutine) run(ctx context.Context, objAPI ObjectLayer) {
|
|||||||
|
|
||||||
func newHealRoutine() *healRoutine {
|
func newHealRoutine() *healRoutine {
|
||||||
return &healRoutine{
|
return &healRoutine{
|
||||||
tasks: make(chan healTask),
|
tasks: make(chan healTask, 50000),
|
||||||
doneCh: make(chan struct{}),
|
doneCh: make(chan struct{}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1672,7 +1672,7 @@ func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix str
|
|||||||
cancel()
|
cancel()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
waitForLowHTTPReq(globalHealConfig.IOCount, globalHealConfig.Sleep)
|
|
||||||
for _, version := range fivs.Versions {
|
for _, version := range fivs.Versions {
|
||||||
if err := healObject(bucket, version.Name, version.VersionID); err != nil {
|
if err := healObject(bucket, version.Name, version.VersionID); err != nil {
|
||||||
errCh <- err
|
errCh <- err
|
||||||
|
@ -41,15 +41,24 @@ func newBgHealSequence() *healSequence {
|
|||||||
reqInfo := &logger.ReqInfo{API: "BackgroundHeal"}
|
reqInfo := &logger.ReqInfo{API: "BackgroundHeal"}
|
||||||
ctx, cancelCtx := context.WithCancel(logger.SetReqInfo(GlobalContext, reqInfo))
|
ctx, cancelCtx := context.WithCancel(logger.SetReqInfo(GlobalContext, reqInfo))
|
||||||
|
|
||||||
|
globalHealConfigMu.Lock()
|
||||||
|
opts := globalHealConfig
|
||||||
|
globalHealConfigMu.Unlock()
|
||||||
|
|
||||||
|
scanMode := madmin.HealNormalScan
|
||||||
|
if opts.Bitrot {
|
||||||
|
scanMode = madmin.HealDeepScan
|
||||||
|
}
|
||||||
|
|
||||||
hs := madmin.HealOpts{
|
hs := madmin.HealOpts{
|
||||||
// Remove objects that do not have read-quorum
|
// Remove objects that do not have read-quorum
|
||||||
Remove: true,
|
Remove: healDeleteDangling,
|
||||||
ScanMode: madmin.HealNormalScan,
|
ScanMode: scanMode,
|
||||||
}
|
}
|
||||||
|
|
||||||
return &healSequence{
|
return &healSequence{
|
||||||
sourceCh: make(chan healSource),
|
sourceCh: make(chan healSource, 50000),
|
||||||
respCh: make(chan healResult),
|
respCh: make(chan healResult, 50000),
|
||||||
startTime: UTCNow(),
|
startTime: UTCNow(),
|
||||||
clientToken: bgHealingUUID,
|
clientToken: bgHealingUUID,
|
||||||
// run-background heal with reserved bucket
|
// run-background heal with reserved bucket
|
||||||
@ -170,6 +179,15 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
|||||||
Name: pathJoin(minioMetaBucket, minioConfigPrefix),
|
Name: pathJoin(minioMetaBucket, minioConfigPrefix),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
globalHealConfigMu.Lock()
|
||||||
|
opts := globalHealConfig
|
||||||
|
globalHealConfigMu.Unlock()
|
||||||
|
|
||||||
|
scanMode := madmin.HealNormalScan
|
||||||
|
if opts.Bitrot {
|
||||||
|
scanMode = madmin.HealDeepScan
|
||||||
|
}
|
||||||
|
|
||||||
// Heal all buckets with all objects
|
// Heal all buckets with all objects
|
||||||
for _, bucket := range buckets {
|
for _, bucket := range buckets {
|
||||||
if tracker.isHealed(bucket.Name) {
|
if tracker.isHealed(bucket.Name) {
|
||||||
@ -188,7 +206,9 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
|||||||
tracker.Object = ""
|
tracker.Object = ""
|
||||||
tracker.Bucket = bucket.Name
|
tracker.Bucket = bucket.Name
|
||||||
// Heal current bucket
|
// Heal current bucket
|
||||||
if _, err := er.HealBucket(ctx, bucket.Name, madmin.HealOpts{}); err != nil {
|
if _, err := er.HealBucket(ctx, bucket.Name, madmin.HealOpts{
|
||||||
|
ScanMode: scanMode,
|
||||||
|
}); err != nil {
|
||||||
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
|
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
|
||||||
logger.LogIf(ctx, err)
|
logger.LogIf(ctx, err)
|
||||||
}
|
}
|
||||||
@ -236,10 +256,12 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
|||||||
logger.LogIf(ctx, err)
|
logger.LogIf(ctx, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
waitForLowHTTPReq(globalHealConfig.IOCount, globalHealConfig.Sleep)
|
|
||||||
for _, version := range fivs.Versions {
|
for _, version := range fivs.Versions {
|
||||||
if _, err := er.HealObject(ctx, bucket.Name, version.Name, version.VersionID, madmin.HealOpts{
|
if _, err := er.HealObject(ctx, bucket.Name, version.Name, version.VersionID, madmin.HealOpts{
|
||||||
ScanMode: madmin.HealNormalScan, Remove: healDeleteDangling}); err != nil {
|
ScanMode: scanMode,
|
||||||
|
Remove: healDeleteDangling,
|
||||||
|
}); err != nil {
|
||||||
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
|
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
|
||||||
// If not deleted, assume they failed.
|
// If not deleted, assume they failed.
|
||||||
tracker.ItemsFailed++
|
tracker.ItemsFailed++
|
||||||
@ -256,6 +278,9 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
|||||||
if time.Since(tracker.LastUpdate) > time.Minute {
|
if time.Since(tracker.LastUpdate) > time.Minute {
|
||||||
logger.LogIf(ctx, tracker.update(ctx))
|
logger.LogIf(ctx, tracker.update(ctx))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Wait and proceed if there are active requests
|
||||||
|
waitForLowHTTPReq()
|
||||||
}
|
}
|
||||||
|
|
||||||
// How to resolve partial results.
|
// How to resolve partial results.
|
||||||
@ -307,15 +332,19 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []BucketIn
|
|||||||
func healObject(bucket, object, versionID string, scan madmin.HealScanMode) {
|
func healObject(bucket, object, versionID string, scan madmin.HealScanMode) {
|
||||||
// Get background heal sequence to send elements to heal
|
// Get background heal sequence to send elements to heal
|
||||||
bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
|
bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
|
||||||
if ok {
|
if !ok {
|
||||||
bgSeq.sourceCh <- healSource{
|
return
|
||||||
bucket: bucket,
|
}
|
||||||
object: object,
|
select {
|
||||||
versionID: versionID,
|
case bgSeq.sourceCh <- healSource{
|
||||||
opts: &madmin.HealOpts{
|
bucket: bucket,
|
||||||
Remove: true, // if found dangling purge it.
|
object: object,
|
||||||
ScanMode: scan,
|
versionID: versionID,
|
||||||
},
|
opts: &madmin.HealOpts{
|
||||||
}
|
Remove: healDeleteDangling, // if found dangling purge it.
|
||||||
|
ScanMode: scan,
|
||||||
|
},
|
||||||
|
}:
|
||||||
|
default:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
19
cmd/mrf.go
19
cmd/mrf.go
@ -27,8 +27,6 @@ import (
|
|||||||
"github.com/minio/minio/internal/logger"
|
"github.com/minio/minio/internal/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
var mrfHealingOpts = madmin.HealOpts{ScanMode: madmin.HealNormalScan, Remove: healDeleteDangling}
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
mrfInfoResetInterval = 10 * time.Second
|
mrfInfoResetInterval = 10 * time.Second
|
||||||
mrfOpsQueueSize = 10000
|
mrfOpsQueueSize = 10000
|
||||||
@ -185,6 +183,20 @@ func (m *mrfState) healRoutine() {
|
|||||||
idler := time.NewTimer(mrfInfoResetInterval)
|
idler := time.NewTimer(mrfInfoResetInterval)
|
||||||
defer idler.Stop()
|
defer idler.Stop()
|
||||||
|
|
||||||
|
globalHealConfigMu.Lock()
|
||||||
|
opts := globalHealConfig
|
||||||
|
globalHealConfigMu.Unlock()
|
||||||
|
|
||||||
|
scanMode := madmin.HealNormalScan
|
||||||
|
if opts.Bitrot {
|
||||||
|
scanMode = madmin.HealDeepScan
|
||||||
|
}
|
||||||
|
|
||||||
|
var mrfHealingOpts = madmin.HealOpts{
|
||||||
|
ScanMode: scanMode,
|
||||||
|
Remove: healDeleteDangling,
|
||||||
|
}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
idler.Reset(mrfInfoResetInterval)
|
idler.Reset(mrfInfoResetInterval)
|
||||||
select {
|
select {
|
||||||
@ -214,7 +226,6 @@ func (m *mrfState) healRoutine() {
|
|||||||
|
|
||||||
// Heal objects
|
// Heal objects
|
||||||
for _, u := range mrfOperations {
|
for _, u := range mrfOperations {
|
||||||
waitForLowHTTPReq(globalHealConfig.IOCount, globalHealConfig.Sleep)
|
|
||||||
if _, err := m.objectAPI.HealObject(m.ctx, u.bucket, u.object, u.versionID, mrfHealingOpts); err != nil {
|
if _, err := m.objectAPI.HealObject(m.ctx, u.bucket, u.object, u.versionID, mrfHealingOpts); err != nil {
|
||||||
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
|
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
|
||||||
// If not deleted, assume they failed.
|
// If not deleted, assume they failed.
|
||||||
@ -238,6 +249,8 @@ func (m *mrfState) healRoutine() {
|
|||||||
delete(m.pendingOps, u)
|
delete(m.pendingOps, u)
|
||||||
m.mu.Unlock()
|
m.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
waitForLowHTTPReq()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -59,7 +59,7 @@ var (
|
|||||||
},
|
},
|
||||||
config.KV{
|
config.KV{
|
||||||
Key: IOCount,
|
Key: IOCount,
|
||||||
Value: "10",
|
Value: "100",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user