reference format obtained doesn't need further validation (#8964)

we don't need to validateFormats again once we have obtained
reference format, because it is possible that at this stage
another server is doing a disk heal during startup, once
in a while due to delays we get false positives and our
server doesn't start.

Format in quorum as reference format can be assumed as valid
and we proceed further, until and unless HealFormat re-inits
the disks after a successful heal.

Also use separate port for healing tests to avoid any
conflicts with regular build testing.

Fixes #8884
This commit is contained in:
Harshavardhana 2020-02-14 03:31:41 +05:30 committed by GitHub
parent 78125ee853
commit d1144c2c7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 24 additions and 28 deletions

View File

@ -34,16 +34,16 @@ function start_minio_3_node() {
export MINIO_SECRET_KEY=minio123 export MINIO_SECRET_KEY=minio123
for i in $(seq 1 3); do for i in $(seq 1 3); do
ARGS+=("http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[9000+$i]${WORK_DIR}/$i/6/") ARGS+=("http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[8000+$i]${WORK_DIR}/$i/6/")
done done
"${MINIO[@]}" --address ":9001" ${ARGS[@]} > "${WORK_DIR}/dist-minio-9001.log" 2>&1 & "${MINIO[@]}" --address ":8001" ${ARGS[@]} > "${WORK_DIR}/dist-minio-8001.log" 2>&1 &
minio_pids[0]=$! minio_pids[0]=$!
"${MINIO[@]}" --address ":9002" ${ARGS[@]} > "${WORK_DIR}/dist-minio-9002.log" 2>&1 & "${MINIO[@]}" --address ":8002" ${ARGS[@]} > "${WORK_DIR}/dist-minio-8002.log" 2>&1 &
minio_pids[1]=$! minio_pids[1]=$!
"${MINIO[@]}" --address ":9003" ${ARGS[@]} > "${WORK_DIR}/dist-minio-9003.log" 2>&1 & "${MINIO[@]}" --address ":8003" ${ARGS[@]} > "${WORK_DIR}/dist-minio-8003.log" 2>&1 &
minio_pids[2]=$! minio_pids[2]=$!
sleep "$1" sleep "$1"
@ -53,7 +53,7 @@ function start_minio_3_node() {
function check_online() { function check_online() {
for i in $(seq 1 3); do for i in $(seq 1 3); do
if grep -q 'Server switching to safe mode' ${WORK_DIR}/dist-minio-$[9000+$i].log; then if grep -q 'Server switching to safe mode' ${WORK_DIR}/dist-minio-$[8000+$i].log; then
echo "1" echo "1"
fi fi
done done
@ -80,7 +80,7 @@ function perform_test_1() {
if ! kill "$pid"; then if ! kill "$pid"; then
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -98,7 +98,7 @@ function perform_test_1() {
if ! kill "$pid"; then if ! kill "$pid"; then
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -113,7 +113,7 @@ function perform_test_1() {
done done
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -127,7 +127,7 @@ function perform_test_2() {
if ! kill "$pid"; then if ! kill "$pid"; then
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -145,7 +145,7 @@ function perform_test_2() {
if ! kill "$pid"; then if ! kill "$pid"; then
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -160,7 +160,7 @@ function perform_test_2() {
done done
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -174,7 +174,7 @@ function perform_test_3() {
if ! kill "$pid"; then if ! kill "$pid"; then
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -192,7 +192,7 @@ function perform_test_3() {
if ! kill "$pid"; then if ! kill "$pid"; then
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"
@ -207,7 +207,7 @@ function perform_test_3() {
done done
for i in $(seq 1 3); do for i in $(seq 1 3); do
echo "server$i log:" echo "server$i log:"
cat "${WORK_DIR}/dist-minio-$[9000+$i].log" cat "${WORK_DIR}/dist-minio-$[8000+$i].log"
done done
echo "FAILED" echo "FAILED"
purge "$WORK_DIR" purge "$WORK_DIR"

View File

@ -75,6 +75,7 @@ func (h *healRoutine) run() {
break break
} }
// Wait and proceed if there are active requests
waitForLowHTTPReq(int32(globalEndpoints.Nodes())) waitForLowHTTPReq(int32(globalEndpoints.Nodes()))
var res madmin.HealResultItem var res madmin.HealResultItem

View File

@ -593,9 +593,13 @@ func listIAMConfigItems(objectAPI ObjectLayer, pathPrefix string, dirs bool,
return return
} }
// Attempt a slow down load only when server is
// active and initialized.
if !globalSafeMode {
// Slow down listing and loading for config items to // Slow down listing and loading for config items to
// reduce load on the server // reduce load on the server
waitForLowHTTPReq(int32(globalEndpoints.Nodes())) waitForLowHTTPReq(int32(globalEndpoints.Nodes()))
}
marker = lo.NextMarker marker = lo.NextMarker
lister := dirList(lo) lister := dirList(lo)

View File

@ -302,17 +302,6 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
return nil, err return nil, err
} }
// Validate all format configs with reference format.
if err = validateXLFormats(format, formatConfigs, endpoints, setCount, drivesPerSet); err != nil {
return nil, err
}
// Get the deploymentID if set.
format.ID, err = formatXLGetDeploymentID(format, formatConfigs)
if err != nil {
return nil, err
}
if format.ID == "" { if format.ID == "" {
// Not a first disk, wait until first disk fixes deploymentID // Not a first disk, wait until first disk fixes deploymentID
if !firstDisk { if !firstDisk {

View File

@ -1664,6 +1664,7 @@ func (s *xlSets) HealObjects(ctx context.Context, bucket, prefix string, healObj
continue continue
} }
// Wait and proceed if there are active requests
waitForLowHTTPReq(int32(s.drivesPerSet)) waitForLowHTTPReq(int32(s.drivesPerSet))
if err := healObject(bucket, entry.Name); err != nil { if err := healObject(bucket, entry.Name); err != nil {

View File

@ -1363,6 +1363,7 @@ func (z *xlZones) HealObjects(ctx context.Context, bucket, prefix string, healOb
continue continue
} }
// Wait and proceed if there are active requests
waitForLowHTTPReq(int32(zoneDrivesPerSet[zoneIndex])) waitForLowHTTPReq(int32(zoneDrivesPerSet[zoneIndex]))
if err := healObject(bucket, entry.Name); err != nil { if err := healObject(bucket, entry.Name); err != nil {