diff --git a/buildscripts/verify-healing-empty-erasure-set.sh b/buildscripts/verify-healing-empty-erasure-set.sh index 2e632b71e..3a9217bae 100755 --- a/buildscripts/verify-healing-empty-erasure-set.sh +++ b/buildscripts/verify-healing-empty-erasure-set.sh @@ -38,8 +38,40 @@ function start_minio_3_node() { disown $pid3 export MC_HOST_myminio="http://minio:minio123@127.0.0.1:$((start_port + 1))" + /tmp/mc ready myminio + # Wait for all drives to be online and formatted + while [ $(/tmp/mc admin info --json myminio | jq '.info.servers[].drives[].state | select(. != "ok")' | wc -l) -gt 0 ]; do sleep 1; done + # Wait for all drives to be healed + while [ $(/tmp/mc admin info --json myminio | jq '.info.servers[].drives[].healing | select(. != null) | select(. == true)' | wc -l) -gt 0 ]; do sleep 1; done + + # Wait for Status: in MinIO output + while true; do + rv=$(check_online) + if [ "$rv" != "1" ]; then + # success + break + fi + + # Check if we should retry + retry=$((retry + 1)) + if [ $retry -le 20 ]; then + sleep 5 + continue + fi + + # Failure + for i in $(seq 1 3); do + echo "server$i log:" + cat "${WORK_DIR}/dist-minio-server$i.log" + done + pkill -9 minio + echo "FAILED" + purge "$WORK_DIR" + exit 1 + done + if ! ps -p $pid1 1>&2 >/dev/null; then echo "server1 log:" cat "${WORK_DIR}/dist-minio-server1.log" @@ -90,7 +122,7 @@ function check_online() { } function purge() { - rm -rf "$1" + echo rm -rf "$1" } function __init__() { @@ -117,18 +149,6 @@ function perform_test() { set -x start_minio_3_node $2 - - rv=$(check_online) - if [ "$rv" == "1" ]; then - for i in $(seq 1 3); do - echo "server$i log:" - cat "${WORK_DIR}/dist-minio-server$i.log" - done - pkill -9 minio - echo "FAILED" - purge "$WORK_DIR" - exit 1 - fi } function main() { diff --git a/cmd/background-newdisks-heal-ops.go b/cmd/background-newdisks-heal-ops.go index c68abf5b8..da7ecba14 100644 --- a/cmd/background-newdisks-heal-ops.go +++ b/cmd/background-newdisks-heal-ops.go @@ -453,10 +453,6 @@ func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint healingLogEvent(ctx, "Healing of drive '%s' is finished (healed: %d, skipped: %d, failed: %d).", disk, tracker.ItemsHealed, tracker.ItemsSkipped, tracker.ItemsFailed) - if len(tracker.QueuedBuckets) > 0 { - return fmt.Errorf("not all buckets were healed: %v", tracker.QueuedBuckets) - } - if serverDebugLog { tracker.printTo(os.Stdout) fmt.Printf("\n") diff --git a/cmd/global-heal.go b/cmd/global-heal.go index 5cc7a0235..f499f5ed8 100644 --- a/cmd/global-heal.go +++ b/cmd/global-heal.go @@ -530,7 +530,16 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string, tracker.setObject("") tracker.setBucket("") - return retErr + if retErr != nil { + return retErr + } + + // Last sanity check + if len(tracker.QueuedBuckets) > 0 { + return fmt.Errorf("not all buckets were healed: %v", tracker.QueuedBuckets) + } + + return nil } func healBucket(bucket string, scan madmin.HealScanMode) error {