mirror of
https://github.com/minio/minio.git
synced 2025-04-21 19:14:39 -04:00
heal: Remove .healing.bin when all ES drives are healing (#19846)
In the very rare case when all drives in a erasure set need to be healed, remove .healing.bin from all drives, otherwise it will be stuck in a loop Also, fix a unit test that fails sometimes due to wrong test.
This commit is contained in:
parent
8f93e81afb
commit
1277ad69a6
@ -38,8 +38,40 @@ function start_minio_3_node() {
|
|||||||
disown $pid3
|
disown $pid3
|
||||||
|
|
||||||
export MC_HOST_myminio="http://minio:minio123@127.0.0.1:$((start_port + 1))"
|
export MC_HOST_myminio="http://minio:minio123@127.0.0.1:$((start_port + 1))"
|
||||||
|
|
||||||
/tmp/mc ready myminio
|
/tmp/mc ready myminio
|
||||||
|
|
||||||
|
# Wait for all drives to be online and formatted
|
||||||
|
while [ $(/tmp/mc admin info --json myminio | jq '.info.servers[].drives[].state | select(. != "ok")' | wc -l) -gt 0 ]; do sleep 1; done
|
||||||
|
# Wait for all drives to be healed
|
||||||
|
while [ $(/tmp/mc admin info --json myminio | jq '.info.servers[].drives[].healing | select(. != null) | select(. == true)' | wc -l) -gt 0 ]; do sleep 1; done
|
||||||
|
|
||||||
|
# Wait for Status: in MinIO output
|
||||||
|
while true; do
|
||||||
|
rv=$(check_online)
|
||||||
|
if [ "$rv" != "1" ]; then
|
||||||
|
# success
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if we should retry
|
||||||
|
retry=$((retry + 1))
|
||||||
|
if [ $retry -le 20 ]; then
|
||||||
|
sleep 5
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Failure
|
||||||
|
for i in $(seq 1 3); do
|
||||||
|
echo "server$i log:"
|
||||||
|
cat "${WORK_DIR}/dist-minio-server$i.log"
|
||||||
|
done
|
||||||
|
pkill -9 minio
|
||||||
|
echo "FAILED"
|
||||||
|
purge "$WORK_DIR"
|
||||||
|
exit 1
|
||||||
|
done
|
||||||
|
|
||||||
if ! ps -p $pid1 1>&2 >/dev/null; then
|
if ! ps -p $pid1 1>&2 >/dev/null; then
|
||||||
echo "server1 log:"
|
echo "server1 log:"
|
||||||
cat "${WORK_DIR}/dist-minio-server1.log"
|
cat "${WORK_DIR}/dist-minio-server1.log"
|
||||||
@ -90,7 +122,7 @@ function check_online() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function purge() {
|
function purge() {
|
||||||
rm -rf "$1"
|
echo rm -rf "$1"
|
||||||
}
|
}
|
||||||
|
|
||||||
function __init__() {
|
function __init__() {
|
||||||
@ -117,18 +149,6 @@ function perform_test() {
|
|||||||
|
|
||||||
set -x
|
set -x
|
||||||
start_minio_3_node $2
|
start_minio_3_node $2
|
||||||
|
|
||||||
rv=$(check_online)
|
|
||||||
if [ "$rv" == "1" ]; then
|
|
||||||
for i in $(seq 1 3); do
|
|
||||||
echo "server$i log:"
|
|
||||||
cat "${WORK_DIR}/dist-minio-server$i.log"
|
|
||||||
done
|
|
||||||
pkill -9 minio
|
|
||||||
echo "FAILED"
|
|
||||||
purge "$WORK_DIR"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function main() {
|
function main() {
|
||||||
|
@ -453,10 +453,6 @@ func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint
|
|||||||
|
|
||||||
healingLogEvent(ctx, "Healing of drive '%s' is finished (healed: %d, skipped: %d, failed: %d).", disk, tracker.ItemsHealed, tracker.ItemsSkipped, tracker.ItemsFailed)
|
healingLogEvent(ctx, "Healing of drive '%s' is finished (healed: %d, skipped: %d, failed: %d).", disk, tracker.ItemsHealed, tracker.ItemsSkipped, tracker.ItemsFailed)
|
||||||
|
|
||||||
if len(tracker.QueuedBuckets) > 0 {
|
|
||||||
return fmt.Errorf("not all buckets were healed: %v", tracker.QueuedBuckets)
|
|
||||||
}
|
|
||||||
|
|
||||||
if serverDebugLog {
|
if serverDebugLog {
|
||||||
tracker.printTo(os.Stdout)
|
tracker.printTo(os.Stdout)
|
||||||
fmt.Printf("\n")
|
fmt.Printf("\n")
|
||||||
|
@ -530,7 +530,16 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||||||
tracker.setObject("")
|
tracker.setObject("")
|
||||||
tracker.setBucket("")
|
tracker.setBucket("")
|
||||||
|
|
||||||
return retErr
|
if retErr != nil {
|
||||||
|
return retErr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Last sanity check
|
||||||
|
if len(tracker.QueuedBuckets) > 0 {
|
||||||
|
return fmt.Errorf("not all buckets were healed: %v", tracker.QueuedBuckets)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func healBucket(bucket string, scan madmin.HealScanMode) error {
|
func healBucket(bucket string, scan madmin.HealScanMode) error {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user