mirror of
https://github.com/minio/minio.git
synced 2025-11-07 12:52:58 -05:00
skip inconsistent shards if possible (#13945)
data shards were wrong due to a healing bug reported in #13803 mainly with unaligned object sizes. This PR is an attempt to automatically avoid these shards, with available information about the `xl.meta` and actually disk mtime.
This commit is contained in:
152
buildscripts/unaligned-healing.sh
Executable file
152
buildscripts/unaligned-healing.sh
Executable file
@@ -0,0 +1,152 @@
|
||||
#!/bin/bash -e
|
||||
#
|
||||
|
||||
set -E
|
||||
set -o pipefail
|
||||
set -x
|
||||
|
||||
if [ ! -x "$PWD/minio" ]; then
|
||||
echo "minio executable binary not found in current directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
WORK_DIR="$PWD/.verify-$RANDOM"
|
||||
MINIO_CONFIG_DIR="$WORK_DIR/.minio"
|
||||
MINIO_OLD=( "$PWD/minio.RELEASE.2021-11-24T23-19-33Z" --config-dir "$MINIO_CONFIG_DIR" server )
|
||||
MINIO=( "$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server )
|
||||
|
||||
function download_old_release() {
|
||||
if [ ! -f minio.RELEASE.2021-11-24T23-19-33Z ]; then
|
||||
curl --silent -O https://dl.minio.io/server/minio/release/linux-amd64/archive/minio.RELEASE.2021-11-24T23-19-33Z
|
||||
chmod a+x minio.RELEASE.2021-11-24T23-19-33Z
|
||||
fi
|
||||
}
|
||||
|
||||
function start_minio_16drive() {
|
||||
start_port=$1
|
||||
|
||||
export MINIO_ROOT_USER=minio
|
||||
export MINIO_ROOT_PASSWORD=minio123
|
||||
export MC_HOST_minio="http://minio:minio123@127.0.0.1:${start_port}/"
|
||||
unset MINIO_KMS_AUTO_ENCRYPTION # do not auto-encrypt objects
|
||||
|
||||
MC_BUILD_DIR="mc-$RANDOM"
|
||||
if ! git clone --quiet https://github.com/minio/mc "$MC_BUILD_DIR"; then
|
||||
echo "failed to download https://github.com/minio/mc"
|
||||
purge "${MC_BUILD_DIR}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
(cd "${MC_BUILD_DIR}" && go build -o "$WORK_DIR/mc")
|
||||
|
||||
# remove mc source.
|
||||
purge "${MC_BUILD_DIR}"
|
||||
|
||||
"${MINIO_OLD[@]}" --address ":$start_port" "${WORK_DIR}/xl{1...16}" > "${WORK_DIR}/server1.log" 2>&1 &
|
||||
pid=$!
|
||||
disown $pid
|
||||
sleep 30
|
||||
|
||||
if ! ps -p ${pid} 1>&2 >/dev/null; then
|
||||
echo "server1 log:"
|
||||
cat "${WORK_DIR}/server1.log"
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
shred --iterations=1 --size=5241856 - 1>"${WORK_DIR}/unaligned" 2>/dev/null
|
||||
"${WORK_DIR}/mc" mb minio/healing-shard-bucket --quiet
|
||||
"${WORK_DIR}/mc" cp \
|
||||
"${WORK_DIR}/unaligned" \
|
||||
minio/healing-shard-bucket/unaligned \
|
||||
--disable-multipart --quiet
|
||||
|
||||
## "unaligned" object name gets consistently distributed
|
||||
## to disks in following distribution order
|
||||
##
|
||||
## NOTE: if you change the name make sure to change the
|
||||
## distribution order present here
|
||||
##
|
||||
## [15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
|
||||
|
||||
## make sure to remove the "last" data shard
|
||||
rm -rf "${WORK_DIR}/xl14/healing-shard-bucket/unaligned"
|
||||
sleep 10
|
||||
## Heal the shard
|
||||
"${WORK_DIR}/mc" admin heal --quiet --recursive minio/healing-shard-bucket
|
||||
## then remove any other data shard let's pick first disk
|
||||
## - 1st data shard.
|
||||
rm -rf "${WORK_DIR}/xl3/healing-shard-bucket/unaligned"
|
||||
sleep 10
|
||||
## Heal the shard
|
||||
"${WORK_DIR}/mc" admin heal --quiet --recursive minio/healing-shard-bucket
|
||||
|
||||
go build ./docs/debugging/s3-check-md5/
|
||||
if ! ./s3-check-md5 \
|
||||
-debug \
|
||||
-access-key minio \
|
||||
-secret-key minio123 \
|
||||
-endpoint http://127.0.0.1:${start_port}/ 2>&1 | grep CORRUPTED; then
|
||||
echo "server1 log:"
|
||||
cat "${WORK_DIR}/server1.log"
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
pkill minio
|
||||
sleep 3
|
||||
|
||||
"${MINIO[@]}" --address ":$start_port" "${WORK_DIR}/xl{1...16}" > "${WORK_DIR}/server1.log" 2>&1 &
|
||||
pid=$!
|
||||
disown $pid
|
||||
sleep 30
|
||||
|
||||
if ! ps -p ${pid} 1>&2 >/dev/null; then
|
||||
echo "server1 log:"
|
||||
cat "${WORK_DIR}/server1.log"
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! ./s3-check-md5 \
|
||||
-debug \
|
||||
-access-key minio \
|
||||
-secret-key minio123 \
|
||||
-endpoint http://127.0.0.1:${start_port}/ 2>&1 | grep INTACT; then
|
||||
echo "server1 log:"
|
||||
cat "${WORK_DIR}/server1.log"
|
||||
echo "FAILED"
|
||||
mkdir inspects
|
||||
(cd inspects; "${WORK_DIR}/mc" admin inspect minio/healing-shard-bucket/unaligned/**)
|
||||
|
||||
"${WORK_DIR}/mc" mb play/inspects
|
||||
"${WORK_DIR}/mc" mirror inspects play/inspects
|
||||
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
pkill minio
|
||||
sleep 3
|
||||
}
|
||||
|
||||
function main() {
|
||||
download_old_release
|
||||
|
||||
start_port=$(shuf -i 10000-65000 -n 1)
|
||||
|
||||
start_minio_16drive ${start_port}
|
||||
}
|
||||
|
||||
function purge()
|
||||
{
|
||||
rm -rf "$1"
|
||||
}
|
||||
|
||||
( main "$@" )
|
||||
rv=$?
|
||||
purge "$WORK_DIR"
|
||||
exit "$rv"
|
||||
@@ -21,51 +21,70 @@ function start_minio_3_node() {
|
||||
start_port=$2
|
||||
args=""
|
||||
for i in $(seq 1 3); do
|
||||
args="$args http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/6/"
|
||||
args="$args http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/6/"
|
||||
done
|
||||
|
||||
"${MINIO[@]}" --address ":$[$start_port+1]" $args > "${WORK_DIR}/dist-minio-server1.log" 2>&1 &
|
||||
disown $!
|
||||
pid1=$!
|
||||
disown ${pid1}
|
||||
|
||||
"${MINIO[@]}" --address ":$[$start_port+2]" $args > "${WORK_DIR}/dist-minio-server2.log" 2>&1 &
|
||||
disown $!
|
||||
pid2=$!
|
||||
disown $pid2
|
||||
|
||||
"${MINIO[@]}" --address ":$[$start_port+3]" $args > "${WORK_DIR}/dist-minio-server3.log" 2>&1 &
|
||||
disown $!
|
||||
pid3=$!
|
||||
disown $pid3
|
||||
|
||||
sleep "$1"
|
||||
if [ "$(pgrep -c minio)" -ne 3 ]; then
|
||||
for i in $(seq 1 3); do
|
||||
echo "server$i log:"
|
||||
cat "${WORK_DIR}/dist-minio-server$i.log"
|
||||
done
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
|
||||
if ! ps -p $pid1 1>&2 > /dev/null; then
|
||||
echo "server1 log:"
|
||||
cat "${WORK_DIR}/dist-minio-server1.log"
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! ps -p $pid2 1>&2 > /dev/null; then
|
||||
echo "server2 log:"
|
||||
cat "${WORK_DIR}/dist-minio-server2.log"
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! ps -p $pid3 1>&2 > /dev/null; then
|
||||
echo "server3 log:"
|
||||
cat "${WORK_DIR}/dist-minio-server3.log"
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! pkill minio; then
|
||||
for i in $(seq 1 3); do
|
||||
echo "server$i log:"
|
||||
cat "${WORK_DIR}/dist-minio-server$i.log"
|
||||
done
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
for i in $(seq 1 3); do
|
||||
echo "server$i log:"
|
||||
cat "${WORK_DIR}/dist-minio-server$i.log"
|
||||
done
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
sleep 1;
|
||||
if pgrep minio; then
|
||||
# forcibly killing, to proceed further properly.
|
||||
if ! pkill -9 minio; then
|
||||
echo "no minio process running anymore, proceed."
|
||||
fi
|
||||
# forcibly killing, to proceed further properly.
|
||||
if ! pkill -9 minio; then
|
||||
echo "no minio process running anymore, proceed."
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
function check_online() {
|
||||
if grep -q 'Unable to initialize sub-systems' ${WORK_DIR}/dist-minio-*.log; then
|
||||
echo "1"
|
||||
echo "1"
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -96,14 +115,14 @@ function perform_test() {
|
||||
|
||||
rv=$(check_online)
|
||||
if [ "$rv" == "1" ]; then
|
||||
for i in $(seq 1 3); do
|
||||
echo "server$i log:"
|
||||
cat "${WORK_DIR}/dist-minio-server$i.log"
|
||||
done
|
||||
pkill -9 minio
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
for i in $(seq 1 3); do
|
||||
echo "server$i log:"
|
||||
cat "${WORK_DIR}/dist-minio-server$i.log"
|
||||
done
|
||||
pkill -9 minio
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user