mirror of
https://github.com/minio/minio.git
synced 2025-03-31 09:43:43 -04:00
readParts: Return error when quorum unavailable (#20389)
readParts requires that both part.N and part.N.meta files be present. This change addresses an issue with how an error to return to the upper layers was picked from most drives where a UploadPart operation had failed.
This commit is contained in:
parent
b6b7cddc9c
commit
a0f9e9f661
6
.github/workflows/replication.yaml
vendored
6
.github/workflows/replication.yaml
vendored
@ -70,3 +70,9 @@ jobs:
|
|||||||
sudo sysctl net.ipv6.conf.all.disable_ipv6=0
|
sudo sysctl net.ipv6.conf.all.disable_ipv6=0
|
||||||
sudo sysctl net.ipv6.conf.default.disable_ipv6=0
|
sudo sysctl net.ipv6.conf.default.disable_ipv6=0
|
||||||
make test-versioning
|
make test-versioning
|
||||||
|
|
||||||
|
- name: Test Multipart upload with failures
|
||||||
|
run: |
|
||||||
|
sudo sysctl net.ipv6.conf.all.disable_ipv6=0
|
||||||
|
sudo sysctl net.ipv6.conf.default.disable_ipv6=0
|
||||||
|
make test-multipart
|
||||||
|
4
Makefile
4
Makefile
@ -133,6 +133,10 @@ test-site-replication-minio: install-race ## verify automatic site replication
|
|||||||
@echo "Running tests for automatic site replication of SSE-C objects with compression enabled for site"
|
@echo "Running tests for automatic site replication of SSE-C objects with compression enabled for site"
|
||||||
@(env bash $(PWD)/docs/site-replication/run-ssec-object-replication-with-compression.sh)
|
@(env bash $(PWD)/docs/site-replication/run-ssec-object-replication-with-compression.sh)
|
||||||
|
|
||||||
|
test-multipart: install-race ## test multipart
|
||||||
|
@echo "Test multipart behavior when part files are missing"
|
||||||
|
@(env bash $(PWD)/buildscripts/multipart-quorum-test.sh)
|
||||||
|
|
||||||
verify: install-race ## verify minio various setups
|
verify: install-race ## verify minio various setups
|
||||||
@echo "Verifying build with race"
|
@echo "Verifying build with race"
|
||||||
@(env bash $(PWD)/buildscripts/verify-build.sh)
|
@(env bash $(PWD)/buildscripts/verify-build.sh)
|
||||||
|
126
buildscripts/multipart-quorum-test.sh
Normal file
126
buildscripts/multipart-quorum-test.sh
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if [ -n "$TEST_DEBUG" ]; then
|
||||||
|
set -x
|
||||||
|
fi
|
||||||
|
|
||||||
|
WORK_DIR="$PWD/.verify-$RANDOM"
|
||||||
|
MINIO_CONFIG_DIR="$WORK_DIR/.minio"
|
||||||
|
MINIO=("$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server)
|
||||||
|
|
||||||
|
if [ ! -x "$PWD/minio" ]; then
|
||||||
|
echo "minio executable binary not found in current directory"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -x "$PWD/minio" ]; then
|
||||||
|
echo "minio executable binary not found in current directory"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
trap 'catch $LINENO' ERR
|
||||||
|
|
||||||
|
function purge() {
|
||||||
|
rm -rf "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
# shellcheck disable=SC2120
|
||||||
|
catch() {
|
||||||
|
if [ $# -ne 0 ]; then
|
||||||
|
echo "error on line $1"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Cleaning up instances of MinIO"
|
||||||
|
pkill minio || true
|
||||||
|
pkill -9 minio || true
|
||||||
|
purge "$WORK_DIR"
|
||||||
|
if [ $# -ne 0 ]; then
|
||||||
|
exit $#
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
catch
|
||||||
|
|
||||||
|
function start_minio_10drive() {
|
||||||
|
start_port=$1
|
||||||
|
|
||||||
|
export MINIO_ROOT_USER=minio
|
||||||
|
export MINIO_ROOT_PASSWORD=minio123
|
||||||
|
export MC_HOST_minio="http://minio:minio123@127.0.0.1:${start_port}/"
|
||||||
|
unset MINIO_KMS_AUTO_ENCRYPTION # do not auto-encrypt objects
|
||||||
|
export MINIO_CI_CD=1
|
||||||
|
|
||||||
|
mkdir ${WORK_DIR}
|
||||||
|
C_PWD=${PWD}
|
||||||
|
if [ ! -x "$PWD/mc" ]; then
|
||||||
|
MC_BUILD_DIR="mc-$RANDOM"
|
||||||
|
if ! git clone --quiet https://github.com/minio/mc "$MC_BUILD_DIR"; then
|
||||||
|
echo "failed to download https://github.com/minio/mc"
|
||||||
|
purge "${MC_BUILD_DIR}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
(cd "${MC_BUILD_DIR}" && go build -o "$C_PWD/mc")
|
||||||
|
|
||||||
|
# remove mc source.
|
||||||
|
purge "${MC_BUILD_DIR}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
"${MINIO[@]}" --address ":$start_port" "${WORK_DIR}/disk{1...10}" >"${WORK_DIR}/server1.log" 2>&1 &
|
||||||
|
pid=$!
|
||||||
|
disown $pid
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
if ! ps -p ${pid} 1>&2 >/dev/null; then
|
||||||
|
echo "server1 log:"
|
||||||
|
cat "${WORK_DIR}/server1.log"
|
||||||
|
echo "FAILED"
|
||||||
|
purge "$WORK_DIR"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
"${PWD}/mc" mb --with-versioning minio/bucket
|
||||||
|
|
||||||
|
export AWS_ACCESS_KEY_ID=minio
|
||||||
|
export AWS_SECRET_ACCESS_KEY=minio123
|
||||||
|
aws --endpoint-url http://localhost:"$start_port" s3api create-multipart-upload --bucket bucket --key obj-1 >upload-id.json
|
||||||
|
uploadId=$(jq -r '.UploadId' upload-id.json)
|
||||||
|
|
||||||
|
truncate -s 5MiB file-5mib
|
||||||
|
for i in {1..2}; do
|
||||||
|
aws --endpoint-url http://localhost:"$start_port" s3api upload-part \
|
||||||
|
--upload-id "$uploadId" --bucket bucket --key obj-1 \
|
||||||
|
--part-number "$i" --body ./file-5mib
|
||||||
|
done
|
||||||
|
for i in {1..6}; do
|
||||||
|
find ${WORK_DIR}/disk${i}/.minio.sys/multipart/ -type f -name "part.1" -delete
|
||||||
|
done
|
||||||
|
cat <<EOF >parts.json
|
||||||
|
{
|
||||||
|
"Parts": [
|
||||||
|
{
|
||||||
|
"PartNumber": 1,
|
||||||
|
"ETag": "5f363e0e58a95f06cbe9bbc662c5dfb6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"PartNumber": 2,
|
||||||
|
"ETag": "5f363e0e58a95f06cbe9bbc662c5dfb6"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
err=$(aws --endpoint-url http://localhost:"$start_port" s3api complete-multipart-upload --upload-id "$uploadId" --bucket bucket --key obj-1 --multipart-upload file://./parts.json 2>&1)
|
||||||
|
rv=$?
|
||||||
|
if [ $rv -eq 0 ]; then
|
||||||
|
echo "Failed to receive an error"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "Received an error during complete-multipart as expected: $err"
|
||||||
|
}
|
||||||
|
|
||||||
|
function main() {
|
||||||
|
start_port=$(shuf -i 10000-65000 -n 1)
|
||||||
|
start_minio_10drive ${start_port}
|
||||||
|
}
|
||||||
|
|
||||||
|
main "$@"
|
@ -985,27 +985,25 @@ func readParts(ctx context.Context, disks []StorageAPI, bucket string, partMetaP
|
|||||||
}
|
}
|
||||||
|
|
||||||
partInfosInQuorum := make([]ObjectPartInfo, len(partMetaPaths))
|
partInfosInQuorum := make([]ObjectPartInfo, len(partMetaPaths))
|
||||||
partMetaQuorumMap := make(map[string]int, len(partNumbers))
|
|
||||||
for pidx := range partMetaPaths {
|
for pidx := range partMetaPaths {
|
||||||
|
// partMetaQuorumMap uses
|
||||||
|
// - path/to/part.N as key to collate errors from failed drives.
|
||||||
|
// - part ETag to collate part metadata
|
||||||
|
partMetaQuorumMap := make(map[string]int, len(partNumbers))
|
||||||
var pinfos []*ObjectPartInfo
|
var pinfos []*ObjectPartInfo
|
||||||
for idx := range disks {
|
for idx := range disks {
|
||||||
if len(objectPartInfos[idx]) == 0 {
|
if len(objectPartInfos[idx]) != len(partMetaPaths) {
|
||||||
partMetaQuorumMap[partMetaPaths[pidx]]++
|
partMetaQuorumMap[partMetaPaths[pidx]]++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
pinfo := objectPartInfos[idx][pidx]
|
pinfo := objectPartInfos[idx][pidx]
|
||||||
if pinfo == nil {
|
if pinfo != nil && pinfo.ETag != "" {
|
||||||
partMetaQuorumMap[partMetaPaths[pidx]]++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if pinfo.ETag == "" {
|
|
||||||
partMetaQuorumMap[partMetaPaths[pidx]]++
|
|
||||||
} else {
|
|
||||||
pinfos = append(pinfos, pinfo)
|
pinfos = append(pinfos, pinfo)
|
||||||
partMetaQuorumMap[pinfo.ETag]++
|
partMetaQuorumMap[pinfo.ETag]++
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
partMetaQuorumMap[partMetaPaths[pidx]]++
|
||||||
}
|
}
|
||||||
|
|
||||||
var maxQuorum int
|
var maxQuorum int
|
||||||
@ -1018,50 +1016,48 @@ func readParts(ctx context.Context, disks []StorageAPI, bucket string, partMetaP
|
|||||||
maxPartMeta = etag
|
maxPartMeta = etag
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// found is a representative ObjectPartInfo which either has the maximally occurring ETag or an error.
|
||||||
var pinfo *ObjectPartInfo
|
var found *ObjectPartInfo
|
||||||
for _, pinfo = range pinfos {
|
for _, pinfo := range pinfos {
|
||||||
if pinfo != nil && maxETag != "" && pinfo.ETag == maxETag {
|
if pinfo == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if maxETag != "" && pinfo.ETag == maxETag {
|
||||||
|
found = pinfo
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if maxPartMeta != "" && path.Base(maxPartMeta) == fmt.Sprintf("part.%d.meta", pinfo.Number) {
|
if pinfo.ETag == "" && maxPartMeta != "" && path.Base(maxPartMeta) == fmt.Sprintf("part.%d.meta", pinfo.Number) {
|
||||||
|
found = pinfo
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if pinfo != nil && pinfo.ETag != "" && partMetaQuorumMap[maxETag] >= readQuorum {
|
if found != nil && found.ETag != "" && partMetaQuorumMap[maxETag] >= readQuorum {
|
||||||
partInfosInQuorum[pidx] = *pinfo
|
partInfosInQuorum[pidx] = *found
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
partInfosInQuorum[pidx] = ObjectPartInfo{
|
||||||
if partMetaQuorumMap[maxPartMeta] == len(disks) {
|
Number: partNumbers[pidx],
|
||||||
if pinfo != nil && pinfo.Error != "" {
|
Error: InvalidPart{
|
||||||
partInfosInQuorum[pidx] = ObjectPartInfo{Error: pinfo.Error}
|
PartNumber: partNumbers[pidx],
|
||||||
} else {
|
}.Error(),
|
||||||
partInfosInQuorum[pidx] = ObjectPartInfo{
|
|
||||||
Error: InvalidPart{
|
|
||||||
PartNumber: partNumbers[pidx],
|
|
||||||
}.Error(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
partInfosInQuorum[pidx] = ObjectPartInfo{Error: errErasureReadQuorum.Error()}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
return partInfosInQuorum, nil
|
return partInfosInQuorum, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func errStrToPartErr(errStr string) error {
|
func objPartToPartErr(part ObjectPartInfo) error {
|
||||||
if strings.Contains(errStr, "file not found") {
|
if strings.Contains(part.Error, "file not found") {
|
||||||
return InvalidPart{}
|
return InvalidPart{PartNumber: part.Number}
|
||||||
}
|
}
|
||||||
if strings.Contains(errStr, "Specified part could not be found") {
|
if strings.Contains(part.Error, "Specified part could not be found") {
|
||||||
return InvalidPart{}
|
return InvalidPart{PartNumber: part.Number}
|
||||||
}
|
}
|
||||||
if strings.Contains(errStr, errErasureReadQuorum.Error()) {
|
if strings.Contains(part.Error, errErasureReadQuorum.Error()) {
|
||||||
return errErasureReadQuorum
|
return errErasureReadQuorum
|
||||||
}
|
}
|
||||||
return errors.New(errStr)
|
return errors.New(part.Error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CompleteMultipartUpload - completes an ongoing multipart
|
// CompleteMultipartUpload - completes an ongoing multipart
|
||||||
@ -1196,7 +1192,7 @@ func (er erasureObjects) CompleteMultipartUpload(ctx context.Context, bucket str
|
|||||||
|
|
||||||
for idx, part := range partInfoFiles {
|
for idx, part := range partInfoFiles {
|
||||||
if part.Error != "" {
|
if part.Error != "" {
|
||||||
err = errStrToPartErr(part.Error)
|
err = objPartToPartErr(part)
|
||||||
bugLogIf(ctx, err)
|
bugLogIf(ctx, err)
|
||||||
return oi, err
|
return oi, err
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user