From 84bb7d05a998a096b96acd68ea94053d28a87eb3 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Fri, 17 Feb 2023 18:52:43 +0100 Subject: [PATCH] fix: healing deadlocks and ordering (#16643) --- cmd/admin-heal-ops.go | 32 +++++++++++++++----------------- cmd/global-heal.go | 1 - 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/cmd/admin-heal-ops.go b/cmd/admin-heal-ops.go index 6cae8170c..b9402a5aa 100644 --- a/cmd/admin-heal-ops.go +++ b/cmd/admin-heal-ops.go @@ -407,9 +407,6 @@ type healSequence struct { // bucket, and object on which heal seq. was initiated bucket, object string - // A channel of entities with heal result - respCh chan healResult - // Report healing progress reportProgress bool @@ -472,7 +469,6 @@ func newHealSequence(ctx context.Context, bucket, objPrefix, clientAddr string, clientToken := mustGetUUID() return &healSequence{ - respCh: make(chan healResult), bucket: bucket, object: objPrefix, reportProgress: true, @@ -719,28 +715,30 @@ func (h *healSequence) queueHealTask(source healSource, healType madmin.HealItem if serverDebugLog { logger.Info("Task in the queue: %#v", task) } - case <-h.ctx.Done(): - return nil default: // task queue is full, no more workers, we shall move on and heal later. return nil } - } else { - // respCh must be set for guaranteed result - task.respCh = h.respCh - select { - case globalBackgroundHealRoutine.tasks <- task: - if serverDebugLog { - logger.Info("Task in the queue: %#v", task) - } - case <-h.ctx.Done(): - return nil + // Don't wait for result + return nil + } + + // respCh must be set to wait for result. + // We make it size 1, so a result can always be written + // even if we aren't listening. + task.respCh = make(chan healResult, 1) + select { + case globalBackgroundHealRoutine.tasks <- task: + if serverDebugLog { + logger.Info("Task in the queue: %#v", task) } + case <-h.ctx.Done(): + return nil } // task queued, now wait for the response. select { - case res := <-h.respCh: + case res := <-task.respCh: if !h.reportProgress { if errors.Is(res.err, errSkipFile) { // this is only sent usually by nopHeal return nil diff --git a/cmd/global-heal.go b/cmd/global-heal.go index 7b2160be0..cac79b255 100644 --- a/cmd/global-heal.go +++ b/cmd/global-heal.go @@ -48,7 +48,6 @@ func newBgHealSequence() *healSequence { } return &healSequence{ - respCh: make(chan healResult), startTime: UTCNow(), clientToken: bgHealingUUID, // run-background heal with reserved bucket