Add resiliency tests (#20786)

This commit is contained in:
Allan Roger Reid 2024-12-20 20:24:45 -08:00 committed by GitHub
parent ddd137d317
commit 330dca9a35
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 847 additions and 0 deletions

39
.github/workflows/go-resiliency.yml vendored Normal file
View File

@ -0,0 +1,39 @@
name: Resiliency Functional Tests
on:
pull_request:
branches:
- master
# This ensures that previous jobs for the PR are canceled when the PR is
# updated.
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
build:
name: Go ${{ matrix.go-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
go-version: [1.23.x]
os: [ubuntu-latest]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go-version }}
check-latest: true
- name: Build on ${{ matrix.os }}
if: matrix.os == 'ubuntu-latest'
env:
CGO_ENABLED: 0
GO111MODULE: on
run: |
sudo sysctl net.ipv6.conf.all.disable_ipv6=0
sudo sysctl net.ipv6.conf.default.disable_ipv6=0
make test-resiliency

View File

@ -217,6 +217,10 @@ docker: build ## builds minio docker container
@echo "Building minio docker image '$(TAG)'"
@docker build -q --no-cache -t $(TAG) . -f Dockerfile
test-resiliency: build
@echo "Running resiliency tests"
@(DOCKER_COMPOSE_FILE=$(PWD)/docs/resiliency/docker-compose.yaml env bash $(PWD)/docs/resiliency/resiliency-tests.sh)
install-race: checks build-debugging ## builds minio to $(PWD)
@echo "Building minio binary with -race to './minio'"
@GORACE=history_size=7 CGO_ENABLED=1 go build -tags kqueue,dev -race -trimpath --ldflags "$(LDFLAGS)" -o $(PWD)/minio 1>/dev/null

View File

@ -0,0 +1,125 @@
# Settings and configurations that are common for all containers
x-minio-common: &minio-common
build:
context: ../../.
dockerfile: Dockerfile
command: server --console-address ":9001" http://minio{1...4}/data{1...8}
expose:
- "9000"
- "9001"
environment:
MINIO_CI_CD: 1
healthcheck:
test: ["CMD", "mc", "ready", "local"]
interval: 5s
timeout: 5s
retries: 5
# starts 4 docker containers running minio server instances.
# using nginx reverse proxy, load balancing, you can access
# it through port 9000.
services:
minio1:
<<: *minio-common
hostname: minio1
volumes:
- data1-1:/data1
- data1-2:/data2
- data1-3:/data3
- data1-4:/data4
- data1-5:/data5
- data1-6:/data6
- data1-7:/data7
- data1-8:/data8
minio2:
<<: *minio-common
hostname: minio2
volumes:
- data2-1:/data1
- data2-2:/data2
- data2-3:/data3
- data2-4:/data4
- data2-5:/data5
- data2-6:/data6
- data2-7:/data7
- data2-8:/data8
minio3:
<<: *minio-common
hostname: minio3
volumes:
- data3-1:/data1
- data3-2:/data2
- data3-3:/data3
- data3-4:/data4
- data3-5:/data5
- data3-6:/data6
- data3-7:/data7
- data3-8:/data8
minio4:
<<: *minio-common
hostname: minio4
volumes:
- data4-1:/data1
- data4-2:/data2
- data4-3:/data3
- data4-4:/data4
- data4-5:/data5
- data4-6:/data6
- data4-7:/data7
- data4-8:/data8
nginx:
image: nginx:1.19.2-alpine
hostname: nginx
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
ports:
- "9000:9000"
- "9001:9001"
depends_on:
- minio1
- minio2
- minio3
- minio4
## By default this config uses default local driver,
## For custom volumes replace with volume driver configuration.
volumes:
data1-1:
data1-2:
data1-3:
data1-4:
data1-5:
data1-6:
data1-7:
data1-8:
data2-1:
data2-2:
data2-3:
data2-4:
data2-5:
data2-6:
data2-7:
data2-8:
data3-1:
data3-2:
data3-3:
data3-4:
data3-5:
data3-6:
data3-7:
data3-8:
data4-1:
data4-2:
data4-3:
data4-4:
data4-5:
data4-6:
data4-7:
data4-8:

106
docs/resiliency/nginx.conf Normal file
View File

@ -0,0 +1,106 @@
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;
events {
worker_connections 4096;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
keepalive_timeout 65;
# include /etc/nginx/conf.d/*.conf;
upstream minio {
server minio1:9000;
server minio2:9000;
server minio3:9000;
server minio4:9000;
}
upstream console {
ip_hash;
server minio1:9001;
server minio2:9001;
server minio3:9001;
server minio4:9001;
}
server {
listen 9000;
listen [::]:9000;
server_name localhost;
# To allow special characters in headers
ignore_invalid_headers off;
# Allow any size file to be uploaded.
# Set to a value such as 1000m; to restrict file size to a specific value
client_max_body_size 0;
# To disable buffering
proxy_buffering off;
proxy_request_buffering off;
location / {
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_connect_timeout 300;
# Default is HTTP/1, keepalive is only enabled in HTTP/1.1
proxy_http_version 1.1;
proxy_set_header Connection "";
chunked_transfer_encoding off;
proxy_pass http://minio;
}
}
server {
listen 9001;
listen [::]:9001;
server_name localhost;
# To allow special characters in headers
ignore_invalid_headers off;
# Allow any size file to be uploaded.
# Set to a value such as 1000m; to restrict file size to a specific value
client_max_body_size 0;
# To disable buffering
proxy_buffering off;
proxy_request_buffering off;
location / {
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-NginX-Proxy true;
# This is necessary to pass the correct IP to be hashed
real_ip_header X-Real-IP;
proxy_connect_timeout 300;
# To support websocket
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
chunked_transfer_encoding off;
proxy_pass http://console;
}
}
}

View File

@ -0,0 +1,46 @@
#!/usr/bin/env bash
# This script will run inside ubuntu-pod that is located at default namespace in the cluster
# This script will not and should not be executed in the self hosted runner
echo "script failed" >resiliency-initial.log # assume initial state
echo "sleep to wait for MinIO Server to be ready prior mc commands"
# https://github.com/minio/mc/issues/3599
MINIO_SERVER_URL="http://127.0.0.1:9000"
ALIAS_NAME=myminio
BUCKET="test-bucket"
SRC_DIR="/tmp/data"
INLINED_DIR="/tmp/inlined"
DEST_DIR="/tmp/dest"
TIMEOUT=10
while true; do
if [[ ${TIMEOUT} -le 0 ]]; then
echo retry: timeout while running: mc alias set
exit 1
fi
eval ./mc alias set "${ALIAS_NAME}" "${MINIO_SERVER_URL}" minioadmin minioadmin && break
TIMEOUT=$((TIMEOUT - 1))
sleep 1
done
./mc ready "${ALIAS_NAME}"
./mc mb "${ALIAS_NAME}"/"${BUCKET}"
rm -rf "${SRC_DIR}" "${INLINED_DIR}" "${DEST_DIR}" && mkdir -p "${SRC_DIR}" "${INLINED_DIR}" "${DEST_DIR}"
for idx in {1..10}; do
# generate random nr of blocks
COUNT=$((RANDOM % 100 + 100))
# generate random content
dd if=/dev/urandom bs=50K count="${COUNT}" of="${SRC_DIR}"/file"$idx"
done
# create small object that will be inlined into xl.meta
dd if=/dev/urandom bs=50K count=1 of="${INLINED_DIR}"/inlined
if ./mc cp --quiet --recursive "${SRC_DIR}/" "${ALIAS_NAME}"/"${BUCKET}"/initial-data/; then
if ./mc cp --quiet --recursive "${INLINED_DIR}/" "${ALIAS_NAME}"/"${BUCKET}"/inlined-data/; then
echo "script passed" >resiliency-initial.log
fi
fi

View File

@ -0,0 +1,433 @@
#!/usr/bin/env bash
TESTS_RUN_STATUS=1
function cleanup() {
echo "Cleaning up MinIO deployment"
docker compose -f "${DOCKER_COMPOSE_FILE}" down --volumes
for container in $(docker ps -q); do
echo Removing docker $container
docker rm -f $container >/dev/null 2>&1
docker wait $container
done
}
function cleanup_and_prune() {
cleanup
docker system prune --volumes --force
docker image prune --all --force
}
function verify_resiliency() {
docs/resiliency/resiliency-verify-script.sh
RESULT=$(grep "script passed" <resiliency-verify.log)
if [ "$RESULT" != "script passed" ]; then
echo -e "${RED}${1} Failed${NC}"
TESTS_RUN_STATUS=$((TESTS_RUN_STATUS & 0))
else
echo -e "${GREEN}${1} Passed${NC}"
fi
}
function verify_resiliency_failure() {
docs/resiliency/resiliency-verify-failure-script.sh
RESULT=$(grep "script passed" <resiliency-verify-failure.log)
if [ "$RESULT" != "script passed" ]; then
echo -e "${RED}${1} Failed${NC}"
TESTS_RUN_STATUS=$((TESTS_RUN_STATUS & 0))
else
echo -e "${GREEN}${1} Passed${NC}"
fi
}
function verify_resiliency_healing() {
local WANT=$2
docs/resiliency/resiliency-verify-healing-script.sh "$WANT"
RESULT=$(grep "script passed" <resiliency-verify-healing.log)
if [ "$RESULT" != "script passed" ]; then
echo -e "${RED}${1} Failed${NC}"
TESTS_RUN_STATUS=$((TESTS_RUN_STATUS & 0))
else
echo -e "${GREEN}${1} Passed${NC}"
fi
}
function test_resiliency_success_with_server_down() {
echo
echo -e "${GREEN}Running test_resiliency_success_with_server_down ...${NC}"
# Stop one node
docker stop resiliency-minio1-1
sleep 10
verify_resiliency "${FUNCNAME[0]}"
# Finally restart the node
docker start resiliency-minio1-1
./mc ready myminio
}
function test_resiliency_failure_with_server_down_and_single_disk_offline() {
echo
echo -e "${GREEN}Running test_resiliency_failure_with_server_down_and_single_disk_offline ...${NC}"
# Stop one node
docker stop resiliency-minio1-1
# In additional, suspend one more disk per set in order to induce a failure
docker exec resiliency-minio2-1 /bin/sh -c "mv /data2/.minio.sys /data2/.minio.bkp && touch /data2/.minio.sys"
docker exec resiliency-minio3-1 /bin/sh -c "mv /data6/.minio.sys /data6/.minio.bkp && touch /data6/.minio.sys"
sleep 10
verify_resiliency_failure "${FUNCNAME[0]}"
# Enable the disks back on nodes
docker exec resiliency-minio2-1 /bin/sh -c "rm -rf /data2/.minio.sys && mv /data2/.minio.bkp /data2/.minio.sys"
docker exec resiliency-minio3-1 /bin/sh -c "rm -rf /data6/.minio.sys && mv /data6/.minio.bkp /data6/.minio.sys"
# Finally restart the node
docker start resiliency-minio1-1
./mc ready myminio
}
function test_resiliency_failure_with_servers_down() {
echo
echo -e "${GREEN}Running test_resiliency_failure_with_servers_down ...${NC}"
# Stop two nodes
docker stop resiliency-minio1-1
docker stop resiliency-minio2-1
sleep 10
verify_resiliency_failure "${FUNCNAME[0]}"
# Restart the nodes
docker start resiliency-minio1-1
docker start resiliency-minio2-1
./mc ready myminio
}
function test_resiliency_success_with_disks_offline() {
echo
echo -e "${GREEN}Running test_resiliency_success_with_disks_offline ...${NC}"
# There are 8 disks on each node with EC:4 and two erasure sets.
# We should be able to safely suspend one disk per set from each server.
docker exec resiliency-minio1-1 /bin/sh -c "mv /data1/.minio.sys /data1/.minio.bkp && touch /data1/.minio.sys"
docker exec resiliency-minio1-1 /bin/sh -c "mv /data5/.minio.sys /data5/.minio.bkp && touch /data5/.minio.sys"
docker exec resiliency-minio2-1 /bin/sh -c "mv /data1/.minio.sys /data1/.minio.bkp && touch /data1/.minio.sys"
docker exec resiliency-minio2-1 /bin/sh -c "mv /data5/.minio.sys /data5/.minio.bkp && touch /data5/.minio.sys"
docker exec resiliency-minio3-1 /bin/sh -c "mv /data1/.minio.sys /data1/.minio.bkp && touch /data1/.minio.sys"
docker exec resiliency-minio3-1 /bin/sh -c "mv /data5/.minio.sys /data5/.minio.bkp && touch /data5/.minio.sys"
docker exec resiliency-minio4-1 /bin/sh -c "mv /data1/.minio.sys /data1/.minio.bkp && touch /data1/.minio.sys"
docker exec resiliency-minio4-1 /bin/sh -c "mv /data5/.minio.sys /data5/.minio.bkp && touch /data5/.minio.sys"
sleep 10
verify_resiliency "${FUNCNAME[0]}"
# Finally enable the disks back on nodes
docker exec resiliency-minio1-1 /bin/sh -c "rm -rf /data1/.minio.sys && mv /data1/.minio.bkp /data1/.minio.sys"
docker exec resiliency-minio1-1 /bin/sh -c "rm -rf /data5/.minio.sys && mv /data5/.minio.bkp /data5/.minio.sys"
docker exec resiliency-minio2-1 /bin/sh -c "rm -rf /data1/.minio.sys && mv /data1/.minio.bkp /data1/.minio.sys"
docker exec resiliency-minio2-1 /bin/sh -c "rm -rf /data5/.minio.sys && mv /data5/.minio.bkp /data5/.minio.sys"
docker exec resiliency-minio3-1 /bin/sh -c "rm -rf /data1/.minio.sys && mv /data1/.minio.bkp /data1/.minio.sys"
docker exec resiliency-minio3-1 /bin/sh -c "rm -rf /data5/.minio.sys && mv /data5/.minio.bkp /data5/.minio.sys"
docker exec resiliency-minio4-1 /bin/sh -c "rm -rf /data1/.minio.sys && mv /data1/.minio.bkp /data1/.minio.sys"
docker exec resiliency-minio4-1 /bin/sh -c "rm -rf /data5/.minio.sys && mv /data5/.minio.bkp /data5/.minio.sys"
./mc ready myminio
}
function test_resiliency_failure_with_too_many_disks_offline() {
echo
echo -e "${GREEN}Running test_resiliency_failure_with_too_many_disks_offline ...${NC}"
# There are 8 disks on each node with EC:4 and two erasure sets.
# We should be able to safely suspend one disk per set from each server.
# suspending one additional disk from each set should cause failures
docker exec resiliency-minio1-1 /bin/sh -c "mv /data1/.minio.sys /data1/.minio.bkp && touch /data1/.minio.sys"
docker exec resiliency-minio2-1 /bin/sh -c "mv /data1/.minio.sys /data1/.minio.bkp && touch /data1/.minio.sys"
docker exec resiliency-minio3-1 /bin/sh -c "mv /data1/.minio.sys /data1/.minio.bkp && touch /data1/.minio.sys"
docker exec resiliency-minio4-1 /bin/sh -c "mv /data1/.minio.sys /data1/.minio.bkp && touch /data1/.minio.sys"
docker exec resiliency-minio1-1 /bin/sh -c "mv /data5/.minio.sys /data5/.minio.bkp && touch /data5/.minio.sys"
docker exec resiliency-minio2-1 /bin/sh -c "mv /data5/.minio.sys /data5/.minio.bkp && touch /data5/.minio.sys"
docker exec resiliency-minio3-1 /bin/sh -c "mv /data5/.minio.sys /data5/.minio.bkp && touch /data5/.minio.sys"
docker exec resiliency-minio4-1 /bin/sh -c "mv /data5/.minio.sys /data5/.minio.bkp && touch /data5/.minio.sys"
docker exec resiliency-minio2-1 /bin/sh -c "mv /data2/.minio.sys /data2/.minio.bkp && touch /data2/.minio.sys"
docker exec resiliency-minio3-1 /bin/sh -c "mv /data6/.minio.sys /data6/.minio.bkp && touch /data6/.minio.sys"
sleep 10
verify_resiliency_failure "${FUNCNAME[0]}"
# Finally enable the disks back on nodes
docker exec resiliency-minio1-1 /bin/sh -c "rm -rf /data1/.minio.sys && mv /data1/.minio.bkp /data1/.minio.sys"
docker exec resiliency-minio2-1 /bin/sh -c "rm -rf /data1/.minio.sys && mv /data1/.minio.bkp /data1/.minio.sys"
docker exec resiliency-minio3-1 /bin/sh -c "rm -rf /data1/.minio.sys && mv /data1/.minio.bkp /data1/.minio.sys"
docker exec resiliency-minio4-1 /bin/sh -c "rm -rf /data1/.minio.sys && mv /data1/.minio.bkp /data1/.minio.sys"
docker exec resiliency-minio1-1 /bin/sh -c "rm -rf /data5/.minio.sys && mv /data5/.minio.bkp /data5/.minio.sys"
docker exec resiliency-minio2-1 /bin/sh -c "rm -rf /data5/.minio.sys && mv /data5/.minio.bkp /data5/.minio.sys"
docker exec resiliency-minio3-1 /bin/sh -c "rm -rf /data5/.minio.sys && mv /data5/.minio.bkp /data5/.minio.sys"
docker exec resiliency-minio4-1 /bin/sh -c "rm -rf /data5/.minio.sys && mv /data5/.minio.bkp /data5/.minio.sys"
docker exec resiliency-minio2-1 /bin/sh -c "rm -rf /data2/.minio.sys && mv /data2/.minio.bkp /data2/.minio.sys"
docker exec resiliency-minio3-1 /bin/sh -c "rm -rf /data6/.minio.sys && mv /data6/.minio.bkp /data6/.minio.sys"
./mc ready myminio
}
function find_erasure_set_for_file() {
local DATA_DRIVE=-1
local FILE=$1
local DIR=$2
# Check for existence of file in erasure set 1
docker exec resiliency-minio1-1 /bin/sh -c "stat /data1/test-bucket/$DIR/$FILE/xl.meta" >/dev/null 2>&1
STATUS=$?
if [ $STATUS -eq 0 ]; then
DATA_DRIVE=1
fi
if [ $DATA_DRIVE -eq -1 ]; then
# Check for existence of file in erasure set 2
docker exec resiliency-minio1-1 /bin/sh -c "stat /data5/test-bucket/$DIR/$FILE/xl.meta" >/dev/null 2>&1
STATUS=$?
if [ $STATUS -eq 0 ]; then
DATA_DRIVE=5
fi
fi
echo $DATA_DRIVE
}
function test_resiliency_healing_missing_xl_metas() {
echo
echo -e "${GREEN}Running test_resiliency_healing_missing_xl_metas ...${NC}"
DIR="initial-data"
FILE="file1"
DATA_DRIVE=$(find_erasure_set_for_file $FILE $DIR)
STATUS=$?
if [ $STATUS -ne 0 ]; then
echo -e "${RED}Could not find erasure set for file: ${FILE}${NC}"
echo -e "${RED}"${FUNCNAME[0]}" Failed${NC}"
TESTS_RUN_STATUS=$((TESTS_RUN_STATUS & 0))
return 1
fi
# Remove single xl.meta -- status still green
OUTPUT=$(docker exec resiliency-minio1-1 /bin/sh -c "rm /data$((DATA_DRIVE))/test-bucket/initial-data/$FILE/xl.meta")
WANT='{ "before": { "color": "green", "missing": 1, "corrupted": 0 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'"} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
# Remove two xl.meta's -- status becomes yellow
OUTPUT=$(docker exec resiliency-minio1-1 /bin/sh -c "rm /data$((DATA_DRIVE))/test-bucket/initial-data/$FILE/xl.meta")
OUTPUT=$(docker exec resiliency-minio2-1 /bin/sh -c "rm /data$((DATA_DRIVE + 1))/test-bucket/initial-data/$FILE/xl.meta")
WANT='{ "before": { "color": "yellow", "missing": 2, "corrupted": 0 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'"} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
# Remove three xl.meta's -- status becomes red (3 missing)
OUTPUT=$(docker exec resiliency-minio1-1 /bin/sh -c "rm /data$((DATA_DRIVE))/test-bucket/initial-data/$FILE/xl.meta")
OUTPUT=$(docker exec resiliency-minio2-1 /bin/sh -c "rm /data$((DATA_DRIVE + 1))/test-bucket/initial-data/$FILE/xl.meta")
OUTPUT=$(docker exec resiliency-minio3-1 /bin/sh -c "rm /data$((DATA_DRIVE + 2))/test-bucket/initial-data/$FILE/xl.meta")
WANT='{ "before": { "color": "red", "missing": 3, "corrupted": 0 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'"} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
# Remove four xl.meta's -- status becomes red (4 missing)
OUTPUT=$(docker exec resiliency-minio1-1 /bin/sh -c "rm /data$((DATA_DRIVE))/test-bucket/initial-data/$FILE/xl.meta")
OUTPUT=$(docker exec resiliency-minio2-1 /bin/sh -c "rm /data$((DATA_DRIVE + 1))/test-bucket/initial-data/$FILE/xl.meta")
OUTPUT=$(docker exec resiliency-minio3-1 /bin/sh -c "rm /data$((DATA_DRIVE + 2))/test-bucket/initial-data/$FILE/xl.meta")
OUTPUT=$(docker exec resiliency-minio4-1 /bin/sh -c "rm /data$((DATA_DRIVE + 3))/test-bucket/initial-data/$FILE/xl.meta")
WANT='{ "before": { "color": "red", "missing": 4, "corrupted": 0 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'"} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
}
function test_resiliency_healing_truncated_parts() {
echo
echo -e "${GREEN}Running test_resiliency_healing_truncated_parts ...${NC}"
DIR="initial-data"
FILE="file2"
DATA_DRIVE=$(find_erasure_set_for_file $FILE $DIR)
STATUS=$?
if [ $STATUS -ne 0 ]; then
echo -e "${RED}Could not find erasure set for file: ${FILE}${NC}"
echo -e "${RED}"${FUNCNAME[0]}" Failed${NC}"
TESTS_RUN_STATUS=$((TESTS_RUN_STATUS & 0))
return 1
fi
# Truncate single part -- status still green
OUTPUT=$(docker exec resiliency-minio1-1 /bin/sh -c "truncate --size=10K /data$((DATA_DRIVE))/test-bucket/initial-data/$FILE/*/part.1")
WANT='{ "before": { "color": "green", "missing": 0, "corrupted": 1 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'"} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
# Truncate two parts -- status becomes yellow (2 missing)
OUTPUT=$(docker exec resiliency-minio2-1 /bin/sh -c "truncate --size=10K /data{$((DATA_DRIVE))..$((DATA_DRIVE + 1))}/test-bucket/initial-data/$FILE/*/part.1")
WANT='{ "before": { "color": "yellow", "missing": 0, "corrupted": 2 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'"} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
# Truncate three parts -- status becomes red (3 missing)
OUTPUT=$(docker exec resiliency-minio3-1 /bin/sh -c "truncate --size=10K /data{$((DATA_DRIVE))..$((DATA_DRIVE + 2))}/test-bucket/initial-data/$FILE/*/part.1")
WANT='{ "before": { "color": "red", "missing": 0, "corrupted": 3 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'"} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
# Truncate four parts -- status becomes red (4 missing)
OUTPUT=$(docker exec resiliency-minio4-1 /bin/sh -c "truncate --size=10K /data{$((DATA_DRIVE))..$((DATA_DRIVE + 3))}/test-bucket/initial-data/$FILE/*/part.1")
WANT='{ "before": { "color": "red", "missing": 0, "corrupted": 4 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'"} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
}
function induce_bitrot() {
local NODE=$1
local DIR=$2
local FILE=$3
# Figure out the UUID of the directory where the `part.*` files are stored
UUID=$(docker exec resiliency-minio$NODE-1 /bin/sh -c "ls -l $DIR/test-bucket/initial-data/$FILE/*/part.1")
UUID=$(echo $UUID | cut -d " " -f 9 | cut -d "/" -f 6)
# Determine head and tail size of file where we will introduce bitrot
FILE_SIZE=$(docker exec resiliency-minio$NODE-1 /bin/sh -c "stat --printf="%s" $DIR/test-bucket/initial-data/$FILE/$UUID/part.1")
TAIL_SIZE=$((FILE_SIZE - 32 * 2))
# Extract head and tail of file
$(docker exec resiliency-minio$NODE-1 /bin/sh -c "cat $DIR/test-bucket/initial-data/$FILE/$UUID/part.1 | head --bytes 32 > /tmp/head")
$(docker exec resiliency-minio$NODE-1 /bin/sh -c "cat $DIR/test-bucket/initial-data/$FILE/$UUID/part.1 | tail --bytes $TAIL_SIZE > /tmp/tail")
# Corrupt the part by writing head twice followed by tail
$(docker exec resiliency-minio$NODE-1 /bin/sh -c "cat /tmp/head /tmp/head /tmp/tail > $DIR/test-bucket/initial-data/$FILE/$UUID/part.1")
}
function test_resiliency_healing_induced_bitrot() {
echo
echo -e "${GREEN}Running test_resiliency_healing_induced_bitrot ...${NC}"
DIR="initial-data"
FILE="file3"
DATA_DRIVE=$(find_erasure_set_for_file $FILE $DIR)
STATUS=$?
if [ $STATUS -ne 0 ]; then
echo -e "${RED}Could not find erasure set for file: ${FILE}${NC}"
echo -e "${RED}"${FUNCNAME[0]}" Failed${NC}"
TESTS_RUN_STATUS=$((TESTS_RUN_STATUS & 0))
return 1
fi
# Induce bitrot in single part -- status still green
induce_bitrot "2" "/data"$((DATA_DRIVE + 1)) $FILE
WANT='{ "before": { "color": "green", "missing": 0, "corrupted": 1 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'", "deep": true} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
# Induce bitrot in two parts -- status becomes yellow (2 corrupted)
induce_bitrot "2" "/data"$((DATA_DRIVE)) $FILE
induce_bitrot "1" "/data"$((DATA_DRIVE + 1)) $FILE
WANT='{ "before": { "color": "yellow", "missing": 0, "corrupted": 2 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'", "deep": true} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
# Induce bitrot in three parts -- status becomes red (3 corrupted)
induce_bitrot "3" "/data"$((DATA_DRIVE)) $FILE
induce_bitrot "2" "/data"$((DATA_DRIVE + 1)) $FILE
induce_bitrot "1" "/data"$((DATA_DRIVE + 2)) $FILE
WANT='{ "before": { "color": "red", "missing": 0, "corrupted": 3 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'", "deep": true} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
# Induce bitrot in four parts -- status becomes red (4 corrupted)
induce_bitrot "4" "/data"$((DATA_DRIVE)) $FILE
induce_bitrot "3" "/data"$((DATA_DRIVE + 1)) $FILE
induce_bitrot "2" "/data"$((DATA_DRIVE + 2)) $FILE
induce_bitrot "1" "/data"$((DATA_DRIVE + 3)) $FILE
WANT='{ "before": { "color": "red", "missing": 0, "corrupted": 4 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'", "deep": true} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
}
function induce_bitrot_for_xlmeta() {
local NODE=$1
local DIR=$2
local FILE=$3
# Determine head and tail size of file where we will introduce bitrot
FILE_SIZE=$(docker exec resiliency-minio$NODE-1 /bin/sh -c "stat --printf="%s" $DIR/test-bucket/inlined-data/$FILE/xl.meta")
HEAD_SIZE=$((FILE_SIZE - 32 * 2))
# Extract head and tail of file
$(docker exec resiliency-minio$NODE-1 /bin/sh -c "cat $DIR/test-bucket/inlined-data/$FILE/xl.meta | head --bytes $HEAD_SIZE > /head")
$(docker exec resiliency-minio$NODE-1 /bin/sh -c "cat $DIR/test-bucket/inlined-data/$FILE/xl.meta | tail --bytes 32 > /tail")
# Corrupt xl.meta by writing head followed by tail twice
$(docker exec resiliency-minio$NODE-1 /bin/sh -c "cat /head /tail tmp/tail > $DIR/test-bucket/inlined-data/$FILE/xl.meta")
}
function test_resiliency_healing_inlined_metadata() {
echo
echo -e "${GREEN}Running test_resiliency_healing_inlined_metadata ...${NC}"
DIR="inlined-data"
FILE="inlined"
DATA_DRIVE=$(find_erasure_set_for_file $FILE $DIR)
STATUS=$?
if [ $STATUS -ne 0 ]; then
echo -e "${RED}Could not find erasure set for file: ${FILE}${NC}"
echo -e "${RED}"${FUNCNAME[0]}" Failed${NC}"
TESTS_RUN_STATUS=$((TESTS_RUN_STATUS & 0))
return 1
fi
# Induce bitrot in single inlined xl.meta -- status still green
induce_bitrot_for_xlmeta "2" "/data"$((DATA_DRIVE + 1)) $FILE
WANT='{ "before": { "color": "green", "missing": 0, "corrupted": 1 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'"} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
# Induce bitrot in two inlined xl.meta's -- status becomes yellow (2 corrupted)
induce_bitrot_for_xlmeta "3" "/data"$((DATA_DRIVE + 1)) $FILE
induce_bitrot_for_xlmeta "3" "/data"$((DATA_DRIVE + 2)) $FILE
WANT='{ "before": { "color": "yellow", "missing": 0, "corrupted": 2 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'"} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
# Induce bitrot in three inlined xl.meta's -- status becomes red (3 corrupted)
induce_bitrot_for_xlmeta "4" "/data"$((DATA_DRIVE + 1)) $FILE
induce_bitrot_for_xlmeta "4" "/data"$((DATA_DRIVE + 2)) $FILE
induce_bitrot_for_xlmeta "4" "/data"$((DATA_DRIVE + 3)) $FILE
WANT='{ "before": { "color": "red", "missing": 0, "corrupted": 3 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'"} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
# Induce bitrot in four inlined xl.meta's -- status becomes red (4 corrupted)
induce_bitrot_for_xlmeta "1" "/data"$((DATA_DRIVE)) $FILE
induce_bitrot_for_xlmeta "1" "/data"$((DATA_DRIVE + 1)) $FILE
induce_bitrot_for_xlmeta "1" "/data"$((DATA_DRIVE + 2)) $FILE
induce_bitrot_for_xlmeta "1" "/data"$((DATA_DRIVE + 3)) $FILE
WANT='{ "before": { "color": "red", "missing": 0, "corrupted": 4 }, "after": { "color": "green", "missing": 0, "corrupted": 0 }, "args": {"file": "'${FILE}'", "dir": "'${DIR}'"} }'
verify_resiliency_healing "${FUNCNAME[0]}" "${WANT}"
}
function main() {
if [ ! -f ./mc ]; then
wget -q https://dl.minio.io/client/mc/release/linux-amd64/mc && chmod +x ./mc
fi
export MC_HOST_myminio=http://minioadmin:minioadmin@localhost:9000
cleanup_and_prune
# Run resiliency tests against MinIO
docker compose -f "${DOCKER_COMPOSE_FILE}" up -d
# Initial setup
docs/resiliency/resiliency-initial-script.sh
RESULT=$(grep "script passed" <resiliency-initial.log)
if [ "$RESULT" != "script passed" ]; then
cleanup_and_prune
exit 1
fi
test_resiliency_healing_missing_xl_metas
test_resiliency_healing_truncated_parts
test_resiliency_healing_induced_bitrot
test_resiliency_healing_inlined_metadata
test_resiliency_success_with_disks_offline
test_resiliency_failure_with_too_many_disks_offline
test_resiliency_success_with_server_down
test_resiliency_failure_with_server_down_and_single_disk_offline
test_resiliency_failure_with_servers_down
local rv=0
if [ ${TESTS_RUN_STATUS} -ne 1 ]; then
rv=1
fi
cleanup_and_prune
exit $rv
}
main "$@"

View File

@ -0,0 +1,18 @@
#!/usr/bin/env bash
echo "script failed" >resiliency-verify-failure.log # assume initial state
ALIAS_NAME=myminio
BUCKET="test-bucket"
DEST_DIR="/tmp/dest"
OUT=$(./mc cp --quiet --recursive "${ALIAS_NAME}"/"${BUCKET}"/initial-data/ "${DEST_DIR}"/)
RET=${?}
if [ ${RET} -ne 0 ]; then
# It is a success scenario as get objects should fail
echo "GET objects failed as expected"
echo "script passed" >resiliency-verify-failure.log
exit 0
else
echo "GET objects expected to fail, but succeeded: ${OUT}"
fi

View File

@ -0,0 +1,27 @@
#!/usr/bin/env bash
echo "script failed" >resiliency-verify-healing.log # assume initial state
# Extract arguments from json object ...
FILE=$(echo $1 | jq -r '.args.file')
DIR=$(echo $1 | jq -r '.args.dir')
DEEP=$(echo $1 | jq -r '.args.deep')
WANT=$(echo $1 | jq 'del(.args)') # ... and remove args from wanted result
ALIAS_NAME=myminio
BUCKET="test-bucket"
JQUERY='select(.name=="'"${BUCKET}"'/'"${DIR}"'/'"${FILE}"'") | {"before":{"color": .before.color, "missing": .before.missing, "corrupted": .before.corrupted},"after":{"color": .after.color, "missing": .after.missing, "corrupted": .after.corrupted}}'
if [ "$DEEP" = "true" ]; then
SCAN_DEEP="--scan=deep"
fi
GOT=$(./mc admin heal --json ${SCAN_DEEP} ${ALIAS_NAME}/${BUCKET}/${DIR}/${FILE})
GOT=$(echo $GOT | jq "${JQUERY}")
if [ "$(echo "$GOT" | jq -S .)" = "$(echo "$WANT" | jq -S .)" ]; then
echo "script passed" >resiliency-verify-healing.log
else
echo "Error during healing:"
echo "----GOT: "$GOT
echo "---WANT: "$WANT
fi

View File

@ -0,0 +1,49 @@
#!/usr/bin/env bash
echo "script failed" >resiliency-verify.log # assume initial state
ALIAS_NAME=myminio
BUCKET="test-bucket"
SRC_DIR="/tmp/data"
DEST_DIR="/tmp/dest"
./mc admin config set "$ALIAS_NAME" api requests_max=400
OBJ_COUNT_AFTER_STOP=$(./mc ls "${ALIAS_NAME}"/"${BUCKET}"/initial-data/ | wc -l)
# Count should match the initial count of 10
if [ "${OBJ_COUNT_AFTER_STOP}" -ne 10 ]; then
echo "Expected 10 objects; received ${OBJ_COUNT_AFTER_STOP}"
exit 1
fi
./mc ready "${ALIAS_NAME}" --json
OUT=$(./mc cp --quiet "${SRC_DIR}"/* "${ALIAS_NAME}"/"${BUCKET}"/new-data/)
RET=${?}
if [ ${RET} -ne 0 ]; then
echo "Error copying objects to new prefix: ${OUT}"
exit 1
fi
OBJ_COUNT_AFTER_COPY=$(./mc ls "${ALIAS_NAME}"/"${BUCKET}"/new-data/ | wc -l)
if [ "${OBJ_COUNT_AFTER_COPY}" -ne "${OBJ_COUNT_AFTER_STOP}" ]; then
echo "Expected ${OBJ_COUNT_AFTER_STOP} objects; received ${OBJ_COUNT_AFTER_COPY}"
exit 1
fi
OUT=$(./mc cp --quiet --recursive "${ALIAS_NAME}"/"${BUCKET}"/new-data/ "${DEST_DIR}"/)
RET=${?}
if [ ${RET} -ne 0 ]; then
echo "Get objects failed: ${OUT}"
exit 1
fi
# Check if check sums match for source and destination directories
CHECK_SUM_SRC=$(sha384sum <(sha384sum "${SRC_DIR}"/* | cut -d " " -f 1 | sort) | cut -d " " -f 1)
CHECK_SUM_DEST=$(sha384sum <(sha384sum "${DEST_DIR}"/* | cut -d " " -f 1 | sort) | cut -d " " -f 1)
if [ "${CHECK_SUM_SRC}" != "${CHECK_SUM_DEST}" ]; then
echo "Checksum verification of source files and destination files failed"
exit 1
fi
echo "script passed" >resiliency-verify.log