From a0f9e9f6612cf65c9c7712d4cd4f907fcb6e5ea4 Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Fri, 6 Sep 2024 03:51:23 -0700 Subject: [PATCH] readParts: Return error when quorum unavailable (#20389) readParts requires that both part.N and part.N.meta files be present. This change addresses an issue with how an error to return to the upper layers was picked from most drives where a UploadPart operation had failed. --- .github/workflows/replication.yaml | 6 ++ Makefile | 4 + buildscripts/multipart-quorum-test.sh | 126 ++++++++++++++++++++++++++ cmd/erasure-multipart.go | 72 +++++++-------- 4 files changed, 170 insertions(+), 38 deletions(-) create mode 100644 buildscripts/multipart-quorum-test.sh diff --git a/.github/workflows/replication.yaml b/.github/workflows/replication.yaml index b4917348f..753ecf8b9 100644 --- a/.github/workflows/replication.yaml +++ b/.github/workflows/replication.yaml @@ -70,3 +70,9 @@ jobs: sudo sysctl net.ipv6.conf.all.disable_ipv6=0 sudo sysctl net.ipv6.conf.default.disable_ipv6=0 make test-versioning + + - name: Test Multipart upload with failures + run: | + sudo sysctl net.ipv6.conf.all.disable_ipv6=0 + sudo sysctl net.ipv6.conf.default.disable_ipv6=0 + make test-multipart diff --git a/Makefile b/Makefile index 27745ea5c..7594e5181 100644 --- a/Makefile +++ b/Makefile @@ -133,6 +133,10 @@ test-site-replication-minio: install-race ## verify automatic site replication @echo "Running tests for automatic site replication of SSE-C objects with compression enabled for site" @(env bash $(PWD)/docs/site-replication/run-ssec-object-replication-with-compression.sh) +test-multipart: install-race ## test multipart + @echo "Test multipart behavior when part files are missing" + @(env bash $(PWD)/buildscripts/multipart-quorum-test.sh) + verify: install-race ## verify minio various setups @echo "Verifying build with race" @(env bash $(PWD)/buildscripts/verify-build.sh) diff --git a/buildscripts/multipart-quorum-test.sh b/buildscripts/multipart-quorum-test.sh new file mode 100644 index 000000000..f226b0e5a --- /dev/null +++ b/buildscripts/multipart-quorum-test.sh @@ -0,0 +1,126 @@ +#!/bin/bash + +if [ -n "$TEST_DEBUG" ]; then + set -x +fi + +WORK_DIR="$PWD/.verify-$RANDOM" +MINIO_CONFIG_DIR="$WORK_DIR/.minio" +MINIO=("$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server) + +if [ ! -x "$PWD/minio" ]; then + echo "minio executable binary not found in current directory" + exit 1 +fi + +if [ ! -x "$PWD/minio" ]; then + echo "minio executable binary not found in current directory" + exit 1 +fi + +trap 'catch $LINENO' ERR + +function purge() { + rm -rf "$1" +} + +# shellcheck disable=SC2120 +catch() { + if [ $# -ne 0 ]; then + echo "error on line $1" + fi + + echo "Cleaning up instances of MinIO" + pkill minio || true + pkill -9 minio || true + purge "$WORK_DIR" + if [ $# -ne 0 ]; then + exit $# + fi +} + +catch + +function start_minio_10drive() { + start_port=$1 + + export MINIO_ROOT_USER=minio + export MINIO_ROOT_PASSWORD=minio123 + export MC_HOST_minio="http://minio:minio123@127.0.0.1:${start_port}/" + unset MINIO_KMS_AUTO_ENCRYPTION # do not auto-encrypt objects + export MINIO_CI_CD=1 + + mkdir ${WORK_DIR} + C_PWD=${PWD} + if [ ! -x "$PWD/mc" ]; then + MC_BUILD_DIR="mc-$RANDOM" + if ! git clone --quiet https://github.com/minio/mc "$MC_BUILD_DIR"; then + echo "failed to download https://github.com/minio/mc" + purge "${MC_BUILD_DIR}" + exit 1 + fi + + (cd "${MC_BUILD_DIR}" && go build -o "$C_PWD/mc") + + # remove mc source. + purge "${MC_BUILD_DIR}" + fi + + "${MINIO[@]}" --address ":$start_port" "${WORK_DIR}/disk{1...10}" >"${WORK_DIR}/server1.log" 2>&1 & + pid=$! + disown $pid + sleep 5 + + if ! ps -p ${pid} 1>&2 >/dev/null; then + echo "server1 log:" + cat "${WORK_DIR}/server1.log" + echo "FAILED" + purge "$WORK_DIR" + exit 1 + fi + + "${PWD}/mc" mb --with-versioning minio/bucket + + export AWS_ACCESS_KEY_ID=minio + export AWS_SECRET_ACCESS_KEY=minio123 + aws --endpoint-url http://localhost:"$start_port" s3api create-multipart-upload --bucket bucket --key obj-1 >upload-id.json + uploadId=$(jq -r '.UploadId' upload-id.json) + + truncate -s 5MiB file-5mib + for i in {1..2}; do + aws --endpoint-url http://localhost:"$start_port" s3api upload-part \ + --upload-id "$uploadId" --bucket bucket --key obj-1 \ + --part-number "$i" --body ./file-5mib + done + for i in {1..6}; do + find ${WORK_DIR}/disk${i}/.minio.sys/multipart/ -type f -name "part.1" -delete + done + cat <parts.json +{ + "Parts": [ + { + "PartNumber": 1, + "ETag": "5f363e0e58a95f06cbe9bbc662c5dfb6" + }, + { + "PartNumber": 2, + "ETag": "5f363e0e58a95f06cbe9bbc662c5dfb6" + } + ] +} +EOF + err=$(aws --endpoint-url http://localhost:"$start_port" s3api complete-multipart-upload --upload-id "$uploadId" --bucket bucket --key obj-1 --multipart-upload file://./parts.json 2>&1) + rv=$? + if [ $rv -eq 0 ]; then + echo "Failed to receive an error" + exit 1 + fi + echo "Received an error during complete-multipart as expected: $err" +} + +function main() { + start_port=$(shuf -i 10000-65000 -n 1) + start_minio_10drive ${start_port} +} + +main "$@" diff --git a/cmd/erasure-multipart.go b/cmd/erasure-multipart.go index 03e556937..5a4524081 100644 --- a/cmd/erasure-multipart.go +++ b/cmd/erasure-multipart.go @@ -985,27 +985,25 @@ func readParts(ctx context.Context, disks []StorageAPI, bucket string, partMetaP } partInfosInQuorum := make([]ObjectPartInfo, len(partMetaPaths)) - partMetaQuorumMap := make(map[string]int, len(partNumbers)) for pidx := range partMetaPaths { + // partMetaQuorumMap uses + // - path/to/part.N as key to collate errors from failed drives. + // - part ETag to collate part metadata + partMetaQuorumMap := make(map[string]int, len(partNumbers)) var pinfos []*ObjectPartInfo for idx := range disks { - if len(objectPartInfos[idx]) == 0 { + if len(objectPartInfos[idx]) != len(partMetaPaths) { partMetaQuorumMap[partMetaPaths[pidx]]++ continue } pinfo := objectPartInfos[idx][pidx] - if pinfo == nil { - partMetaQuorumMap[partMetaPaths[pidx]]++ - continue - } - - if pinfo.ETag == "" { - partMetaQuorumMap[partMetaPaths[pidx]]++ - } else { + if pinfo != nil && pinfo.ETag != "" { pinfos = append(pinfos, pinfo) partMetaQuorumMap[pinfo.ETag]++ + continue } + partMetaQuorumMap[partMetaPaths[pidx]]++ } var maxQuorum int @@ -1018,50 +1016,48 @@ func readParts(ctx context.Context, disks []StorageAPI, bucket string, partMetaP maxPartMeta = etag } } - - var pinfo *ObjectPartInfo - for _, pinfo = range pinfos { - if pinfo != nil && maxETag != "" && pinfo.ETag == maxETag { + // found is a representative ObjectPartInfo which either has the maximally occurring ETag or an error. + var found *ObjectPartInfo + for _, pinfo := range pinfos { + if pinfo == nil { + continue + } + if maxETag != "" && pinfo.ETag == maxETag { + found = pinfo break } - if maxPartMeta != "" && path.Base(maxPartMeta) == fmt.Sprintf("part.%d.meta", pinfo.Number) { + if pinfo.ETag == "" && maxPartMeta != "" && path.Base(maxPartMeta) == fmt.Sprintf("part.%d.meta", pinfo.Number) { + found = pinfo break } } - if pinfo != nil && pinfo.ETag != "" && partMetaQuorumMap[maxETag] >= readQuorum { - partInfosInQuorum[pidx] = *pinfo + if found != nil && found.ETag != "" && partMetaQuorumMap[maxETag] >= readQuorum { + partInfosInQuorum[pidx] = *found continue } - - if partMetaQuorumMap[maxPartMeta] == len(disks) { - if pinfo != nil && pinfo.Error != "" { - partInfosInQuorum[pidx] = ObjectPartInfo{Error: pinfo.Error} - } else { - partInfosInQuorum[pidx] = ObjectPartInfo{ - Error: InvalidPart{ - PartNumber: partNumbers[pidx], - }.Error(), - } - } - } else { - partInfosInQuorum[pidx] = ObjectPartInfo{Error: errErasureReadQuorum.Error()} + partInfosInQuorum[pidx] = ObjectPartInfo{ + Number: partNumbers[pidx], + Error: InvalidPart{ + PartNumber: partNumbers[pidx], + }.Error(), } + } return partInfosInQuorum, nil } -func errStrToPartErr(errStr string) error { - if strings.Contains(errStr, "file not found") { - return InvalidPart{} +func objPartToPartErr(part ObjectPartInfo) error { + if strings.Contains(part.Error, "file not found") { + return InvalidPart{PartNumber: part.Number} } - if strings.Contains(errStr, "Specified part could not be found") { - return InvalidPart{} + if strings.Contains(part.Error, "Specified part could not be found") { + return InvalidPart{PartNumber: part.Number} } - if strings.Contains(errStr, errErasureReadQuorum.Error()) { + if strings.Contains(part.Error, errErasureReadQuorum.Error()) { return errErasureReadQuorum } - return errors.New(errStr) + return errors.New(part.Error) } // CompleteMultipartUpload - completes an ongoing multipart @@ -1196,7 +1192,7 @@ func (er erasureObjects) CompleteMultipartUpload(ctx context.Context, bucket str for idx, part := range partInfoFiles { if part.Error != "" { - err = errStrToPartErr(part.Error) + err = objPartToPartErr(part) bugLogIf(ctx, err) return oi, err }