mirror of https://github.com/minio/minio.git
readParts: Return error when quorum unavailable (#20389)
readParts requires that both part.N and part.N.meta files be present. This change addresses an issue with how an error to return to the upper layers was picked from most drives where a UploadPart operation had failed.
This commit is contained in:
parent
b6b7cddc9c
commit
a0f9e9f661
|
@ -70,3 +70,9 @@ jobs:
|
|||
sudo sysctl net.ipv6.conf.all.disable_ipv6=0
|
||||
sudo sysctl net.ipv6.conf.default.disable_ipv6=0
|
||||
make test-versioning
|
||||
|
||||
- name: Test Multipart upload with failures
|
||||
run: |
|
||||
sudo sysctl net.ipv6.conf.all.disable_ipv6=0
|
||||
sudo sysctl net.ipv6.conf.default.disable_ipv6=0
|
||||
make test-multipart
|
||||
|
|
4
Makefile
4
Makefile
|
@ -133,6 +133,10 @@ test-site-replication-minio: install-race ## verify automatic site replication
|
|||
@echo "Running tests for automatic site replication of SSE-C objects with compression enabled for site"
|
||||
@(env bash $(PWD)/docs/site-replication/run-ssec-object-replication-with-compression.sh)
|
||||
|
||||
test-multipart: install-race ## test multipart
|
||||
@echo "Test multipart behavior when part files are missing"
|
||||
@(env bash $(PWD)/buildscripts/multipart-quorum-test.sh)
|
||||
|
||||
verify: install-race ## verify minio various setups
|
||||
@echo "Verifying build with race"
|
||||
@(env bash $(PWD)/buildscripts/verify-build.sh)
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
#!/bin/bash
|
||||
|
||||
if [ -n "$TEST_DEBUG" ]; then
|
||||
set -x
|
||||
fi
|
||||
|
||||
WORK_DIR="$PWD/.verify-$RANDOM"
|
||||
MINIO_CONFIG_DIR="$WORK_DIR/.minio"
|
||||
MINIO=("$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server)
|
||||
|
||||
if [ ! -x "$PWD/minio" ]; then
|
||||
echo "minio executable binary not found in current directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -x "$PWD/minio" ]; then
|
||||
echo "minio executable binary not found in current directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
trap 'catch $LINENO' ERR
|
||||
|
||||
function purge() {
|
||||
rm -rf "$1"
|
||||
}
|
||||
|
||||
# shellcheck disable=SC2120
|
||||
catch() {
|
||||
if [ $# -ne 0 ]; then
|
||||
echo "error on line $1"
|
||||
fi
|
||||
|
||||
echo "Cleaning up instances of MinIO"
|
||||
pkill minio || true
|
||||
pkill -9 minio || true
|
||||
purge "$WORK_DIR"
|
||||
if [ $# -ne 0 ]; then
|
||||
exit $#
|
||||
fi
|
||||
}
|
||||
|
||||
catch
|
||||
|
||||
function start_minio_10drive() {
|
||||
start_port=$1
|
||||
|
||||
export MINIO_ROOT_USER=minio
|
||||
export MINIO_ROOT_PASSWORD=minio123
|
||||
export MC_HOST_minio="http://minio:minio123@127.0.0.1:${start_port}/"
|
||||
unset MINIO_KMS_AUTO_ENCRYPTION # do not auto-encrypt objects
|
||||
export MINIO_CI_CD=1
|
||||
|
||||
mkdir ${WORK_DIR}
|
||||
C_PWD=${PWD}
|
||||
if [ ! -x "$PWD/mc" ]; then
|
||||
MC_BUILD_DIR="mc-$RANDOM"
|
||||
if ! git clone --quiet https://github.com/minio/mc "$MC_BUILD_DIR"; then
|
||||
echo "failed to download https://github.com/minio/mc"
|
||||
purge "${MC_BUILD_DIR}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
(cd "${MC_BUILD_DIR}" && go build -o "$C_PWD/mc")
|
||||
|
||||
# remove mc source.
|
||||
purge "${MC_BUILD_DIR}"
|
||||
fi
|
||||
|
||||
"${MINIO[@]}" --address ":$start_port" "${WORK_DIR}/disk{1...10}" >"${WORK_DIR}/server1.log" 2>&1 &
|
||||
pid=$!
|
||||
disown $pid
|
||||
sleep 5
|
||||
|
||||
if ! ps -p ${pid} 1>&2 >/dev/null; then
|
||||
echo "server1 log:"
|
||||
cat "${WORK_DIR}/server1.log"
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
"${PWD}/mc" mb --with-versioning minio/bucket
|
||||
|
||||
export AWS_ACCESS_KEY_ID=minio
|
||||
export AWS_SECRET_ACCESS_KEY=minio123
|
||||
aws --endpoint-url http://localhost:"$start_port" s3api create-multipart-upload --bucket bucket --key obj-1 >upload-id.json
|
||||
uploadId=$(jq -r '.UploadId' upload-id.json)
|
||||
|
||||
truncate -s 5MiB file-5mib
|
||||
for i in {1..2}; do
|
||||
aws --endpoint-url http://localhost:"$start_port" s3api upload-part \
|
||||
--upload-id "$uploadId" --bucket bucket --key obj-1 \
|
||||
--part-number "$i" --body ./file-5mib
|
||||
done
|
||||
for i in {1..6}; do
|
||||
find ${WORK_DIR}/disk${i}/.minio.sys/multipart/ -type f -name "part.1" -delete
|
||||
done
|
||||
cat <<EOF >parts.json
|
||||
{
|
||||
"Parts": [
|
||||
{
|
||||
"PartNumber": 1,
|
||||
"ETag": "5f363e0e58a95f06cbe9bbc662c5dfb6"
|
||||
},
|
||||
{
|
||||
"PartNumber": 2,
|
||||
"ETag": "5f363e0e58a95f06cbe9bbc662c5dfb6"
|
||||
}
|
||||
]
|
||||
}
|
||||
EOF
|
||||
err=$(aws --endpoint-url http://localhost:"$start_port" s3api complete-multipart-upload --upload-id "$uploadId" --bucket bucket --key obj-1 --multipart-upload file://./parts.json 2>&1)
|
||||
rv=$?
|
||||
if [ $rv -eq 0 ]; then
|
||||
echo "Failed to receive an error"
|
||||
exit 1
|
||||
fi
|
||||
echo "Received an error during complete-multipart as expected: $err"
|
||||
}
|
||||
|
||||
function main() {
|
||||
start_port=$(shuf -i 10000-65000 -n 1)
|
||||
start_minio_10drive ${start_port}
|
||||
}
|
||||
|
||||
main "$@"
|
|
@ -985,27 +985,25 @@ func readParts(ctx context.Context, disks []StorageAPI, bucket string, partMetaP
|
|||
}
|
||||
|
||||
partInfosInQuorum := make([]ObjectPartInfo, len(partMetaPaths))
|
||||
partMetaQuorumMap := make(map[string]int, len(partNumbers))
|
||||
for pidx := range partMetaPaths {
|
||||
// partMetaQuorumMap uses
|
||||
// - path/to/part.N as key to collate errors from failed drives.
|
||||
// - part ETag to collate part metadata
|
||||
partMetaQuorumMap := make(map[string]int, len(partNumbers))
|
||||
var pinfos []*ObjectPartInfo
|
||||
for idx := range disks {
|
||||
if len(objectPartInfos[idx]) == 0 {
|
||||
if len(objectPartInfos[idx]) != len(partMetaPaths) {
|
||||
partMetaQuorumMap[partMetaPaths[pidx]]++
|
||||
continue
|
||||
}
|
||||
|
||||
pinfo := objectPartInfos[idx][pidx]
|
||||
if pinfo == nil {
|
||||
partMetaQuorumMap[partMetaPaths[pidx]]++
|
||||
continue
|
||||
}
|
||||
|
||||
if pinfo.ETag == "" {
|
||||
partMetaQuorumMap[partMetaPaths[pidx]]++
|
||||
} else {
|
||||
if pinfo != nil && pinfo.ETag != "" {
|
||||
pinfos = append(pinfos, pinfo)
|
||||
partMetaQuorumMap[pinfo.ETag]++
|
||||
continue
|
||||
}
|
||||
partMetaQuorumMap[partMetaPaths[pidx]]++
|
||||
}
|
||||
|
||||
var maxQuorum int
|
||||
|
@ -1018,50 +1016,48 @@ func readParts(ctx context.Context, disks []StorageAPI, bucket string, partMetaP
|
|||
maxPartMeta = etag
|
||||
}
|
||||
}
|
||||
|
||||
var pinfo *ObjectPartInfo
|
||||
for _, pinfo = range pinfos {
|
||||
if pinfo != nil && maxETag != "" && pinfo.ETag == maxETag {
|
||||
// found is a representative ObjectPartInfo which either has the maximally occurring ETag or an error.
|
||||
var found *ObjectPartInfo
|
||||
for _, pinfo := range pinfos {
|
||||
if pinfo == nil {
|
||||
continue
|
||||
}
|
||||
if maxETag != "" && pinfo.ETag == maxETag {
|
||||
found = pinfo
|
||||
break
|
||||
}
|
||||
if maxPartMeta != "" && path.Base(maxPartMeta) == fmt.Sprintf("part.%d.meta", pinfo.Number) {
|
||||
if pinfo.ETag == "" && maxPartMeta != "" && path.Base(maxPartMeta) == fmt.Sprintf("part.%d.meta", pinfo.Number) {
|
||||
found = pinfo
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if pinfo != nil && pinfo.ETag != "" && partMetaQuorumMap[maxETag] >= readQuorum {
|
||||
partInfosInQuorum[pidx] = *pinfo
|
||||
if found != nil && found.ETag != "" && partMetaQuorumMap[maxETag] >= readQuorum {
|
||||
partInfosInQuorum[pidx] = *found
|
||||
continue
|
||||
}
|
||||
|
||||
if partMetaQuorumMap[maxPartMeta] == len(disks) {
|
||||
if pinfo != nil && pinfo.Error != "" {
|
||||
partInfosInQuorum[pidx] = ObjectPartInfo{Error: pinfo.Error}
|
||||
} else {
|
||||
partInfosInQuorum[pidx] = ObjectPartInfo{
|
||||
Error: InvalidPart{
|
||||
PartNumber: partNumbers[pidx],
|
||||
}.Error(),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
partInfosInQuorum[pidx] = ObjectPartInfo{Error: errErasureReadQuorum.Error()}
|
||||
partInfosInQuorum[pidx] = ObjectPartInfo{
|
||||
Number: partNumbers[pidx],
|
||||
Error: InvalidPart{
|
||||
PartNumber: partNumbers[pidx],
|
||||
}.Error(),
|
||||
}
|
||||
|
||||
}
|
||||
return partInfosInQuorum, nil
|
||||
}
|
||||
|
||||
func errStrToPartErr(errStr string) error {
|
||||
if strings.Contains(errStr, "file not found") {
|
||||
return InvalidPart{}
|
||||
func objPartToPartErr(part ObjectPartInfo) error {
|
||||
if strings.Contains(part.Error, "file not found") {
|
||||
return InvalidPart{PartNumber: part.Number}
|
||||
}
|
||||
if strings.Contains(errStr, "Specified part could not be found") {
|
||||
return InvalidPart{}
|
||||
if strings.Contains(part.Error, "Specified part could not be found") {
|
||||
return InvalidPart{PartNumber: part.Number}
|
||||
}
|
||||
if strings.Contains(errStr, errErasureReadQuorum.Error()) {
|
||||
if strings.Contains(part.Error, errErasureReadQuorum.Error()) {
|
||||
return errErasureReadQuorum
|
||||
}
|
||||
return errors.New(errStr)
|
||||
return errors.New(part.Error)
|
||||
}
|
||||
|
||||
// CompleteMultipartUpload - completes an ongoing multipart
|
||||
|
@ -1196,7 +1192,7 @@ func (er erasureObjects) CompleteMultipartUpload(ctx context.Context, bucket str
|
|||
|
||||
for idx, part := range partInfoFiles {
|
||||
if part.Error != "" {
|
||||
err = errStrToPartErr(part.Error)
|
||||
err = objPartToPartErr(part)
|
||||
bugLogIf(ctx, err)
|
||||
return oi, err
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue