From 442e1698cb813331641cba876aa4f38f573bf121 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Wed, 15 Jan 2020 01:08:39 -0800 Subject: [PATCH] heal: Avoid spinning up object healing during startup (#8819) auto-heal disks, metadata and buckets in background but not objects, let the auto heal kick in for objects after the cluster has been up for a while. --- buildscripts/verify-healing.sh | 6 +++--- cmd/admin-heal-ops.go | 22 ++++++++++++++-------- cmd/background-newdisks-heal-ops.go | 4 ++-- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/buildscripts/verify-healing.sh b/buildscripts/verify-healing.sh index 51ebe63d6..6e4108916 100755 --- a/buildscripts/verify-healing.sh +++ b/buildscripts/verify-healing.sh @@ -75,7 +75,7 @@ function __init__() } function perform_test_1() { - minio_pids=( $(start_minio_3_node 20) ) + minio_pids=( $(start_minio_3_node 30) ) for pid in "${minio_pids[@]}"; do kill "$pid" done @@ -111,7 +111,7 @@ function perform_test_1() { } function perform_test_2() { - minio_pids=( $(start_minio_3_node 20) ) + minio_pids=( $(start_minio_3_node 30) ) for pid in "${minio_pids[@]}"; do kill "$pid" done @@ -148,7 +148,7 @@ function perform_test_2() { } function perform_test_3() { - minio_pids=( $(start_minio_3_node 20) ) + minio_pids=( $(start_minio_3_node 30) ) for pid in "${minio_pids[@]}"; do kill "$pid" done diff --git a/cmd/admin-heal-ops.go b/cmd/admin-heal-ops.go index 62897f9c2..2020ccf59 100644 --- a/cmd/admin-heal-ops.go +++ b/cmd/admin-heal-ops.go @@ -577,7 +577,8 @@ func (h *healSequence) queueHealTask(path string, healType madmin.HealItemType) func (h *healSequence) healItemsFromSourceCh() error { h.lastHealActivity = UTCNow() - if err := h.healItems(); err != nil { + bucketsOnly := true // heal buckets only, not objects. + if err := h.healItems(bucketsOnly); err != nil { logger.LogIf(h.ctx, err) } @@ -615,7 +616,7 @@ func (h *healSequence) healFromSourceCh() { close(h.traverseAndHealDoneCh) } -func (h *healSequence) healItems() error { +func (h *healSequence) healItems(bucketsOnly bool) error { // Start with format healing if err := h.healDiskFormat(); err != nil { return err @@ -637,7 +638,7 @@ func (h *healSequence) healItems() error { } // Heal buckets and objects - return h.healBuckets() + return h.healBuckets(bucketsOnly) } // traverseAndHeal - traverses on-disk data and performs healing @@ -648,7 +649,8 @@ func (h *healSequence) healItems() error { // has to wait until a safe point is reached, such as between scanning // two objects. func (h *healSequence) traverseAndHeal() { - if err := h.healItems(); err != nil { + bucketsOnly := false // Heals buckets and objects also. + if err := h.healItems(bucketsOnly); err != nil { if h.isQuitting() { err = errHealStopSignalled } @@ -704,14 +706,14 @@ func (h *healSequence) healDiskFormat() error { } // healBuckets - check for all buckets heal or just particular bucket. -func (h *healSequence) healBuckets() error { +func (h *healSequence) healBuckets(bucketsOnly bool) error { if h.isQuitting() { return errHealStopSignalled } // 1. If a bucket was specified, heal only the bucket. if h.bucket != "" { - return h.healBucket(h.bucket) + return h.healBucket(h.bucket, bucketsOnly) } // Get current object layer instance. @@ -726,7 +728,7 @@ func (h *healSequence) healBuckets() error { } for _, bucket := range buckets { - if err = h.healBucket(bucket.Name); err != nil { + if err = h.healBucket(bucket.Name, bucketsOnly); err != nil { return err } } @@ -735,7 +737,7 @@ func (h *healSequence) healBuckets() error { } // healBucket - traverses and heals given bucket -func (h *healSequence) healBucket(bucket string) error { +func (h *healSequence) healBucket(bucket string, bucketsOnly bool) error { // Get current object layer instance. objectAPI := newObjectLayerWithoutSafeModeFn() if objectAPI == nil { @@ -746,6 +748,10 @@ func (h *healSequence) healBucket(bucket string) error { return err } + if bucketsOnly { + return nil + } + if !h.settings.Recursive { if h.objPrefix != "" { // Check if an object named as the objPrefix exists, diff --git a/cmd/background-newdisks-heal-ops.go b/cmd/background-newdisks-heal-ops.go index f7038a331..91156c713 100644 --- a/cmd/background-newdisks-heal-ops.go +++ b/cmd/background-newdisks-heal-ops.go @@ -64,6 +64,8 @@ func monitorLocalDisksAndHeal() { // Perform automatic disk healing when a disk is replaced locally. for { + time.Sleep(defaultMonitorNewDiskInterval) + // Attempt a heal as the server starts-up first. localDisksInZoneHeal := make([]Endpoints, len(z.zones)) for i, ep := range globalEndpoints { @@ -91,8 +93,6 @@ func monitorLocalDisksAndHeal() { // Ensure that reformatting disks is finished bgSeq.sourceCh <- nopHeal - time.Sleep(defaultMonitorNewDiskInterval) - var erasureSetInZoneToHeal = make([][]int, len(localDisksInZoneHeal)) // Compute the list of erasure set to heal for i, localDisksToHeal := range localDisksInZoneHeal {