From 8e997eba4aa6fd9721d3225bec7d3138203b8701 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Wed, 7 Sep 2022 07:25:39 -0700 Subject: [PATCH] fix: trigger Heal when xl.meta needs healing during PUT (#15661) This PR is a continuation of the previous change instead of returning an error, instead trigger a spot heal on the 'xl.meta' and return only after the healing is complete. This allows for future GETs on the same resource to be consistent for any version of the object. --- Makefile | 1 + buildscripts/heal-inconsistent-versions.sh | 93 ++++++++++++++++++++++ cmd/erasure-object.go | 86 ++++++++++++++------ cmd/storage-errors.go | 3 + 4 files changed, 159 insertions(+), 24 deletions(-) create mode 100755 buildscripts/heal-inconsistent-versions.sh diff --git a/Makefile b/Makefile index 507a6afdd..5fda03098 100644 --- a/Makefile +++ b/Makefile @@ -89,6 +89,7 @@ verify-healing: ## verify healing and replacing disks with minio binary @GORACE=history_size=7 CGO_ENABLED=1 go build -race -tags kqueue -trimpath --ldflags "$(LDFLAGS)" -o $(PWD)/minio 1>/dev/null @(env bash $(PWD)/buildscripts/verify-healing.sh) @(env bash $(PWD)/buildscripts/unaligned-healing.sh) + @(env bash $(PWD)/buildscripts/heal-inconsistent-versions.sh) verify-healing-with-root-disks: ## verify healing root disks @echo "Verify healing with root drives" diff --git a/buildscripts/heal-inconsistent-versions.sh b/buildscripts/heal-inconsistent-versions.sh new file mode 100755 index 000000000..f88e54712 --- /dev/null +++ b/buildscripts/heal-inconsistent-versions.sh @@ -0,0 +1,93 @@ +#!/bin/bash -e + +set -E +set -o pipefail +set -x + +WORK_DIR="$PWD/.verify-$RANDOM" +MINIO_CONFIG_DIR="$WORK_DIR/.minio" +MINIO=( "$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server ) + +if [ ! -x "$PWD/minio" ]; then + echo "minio executable binary not found in current directory" + exit 1 +fi + +if [ ! -x "$PWD/minio" ]; then + echo "minio executable binary not found in current directory" + exit 1 +fi + +function start_minio_4drive() { + start_port=$1 + + export MINIO_ROOT_USER=minio + export MINIO_ROOT_PASSWORD=minio123 + export MC_HOST_minio="http://minio:minio123@127.0.0.1:${start_port}/" + unset MINIO_KMS_AUTO_ENCRYPTION # do not auto-encrypt objects + export MINIO_CI_CD=1 + + mkdir ${WORK_DIR} + C_PWD=${PWD} + if [ ! -x "$PWD/mc" ]; then + MC_BUILD_DIR="mc-$RANDOM" + if ! git clone --quiet https://github.com/minio/mc "$MC_BUILD_DIR"; then + echo "failed to download https://github.com/minio/mc" + purge "${MC_BUILD_DIR}" + exit 1 + fi + + (cd "${MC_BUILD_DIR}" && go build -o "$C_PWD/mc") + + # remove mc source. + purge "${MC_BUILD_DIR}" + fi + + "${MINIO[@]}" --address ":$start_port" "${WORK_DIR}/disk{1...4}" > "${WORK_DIR}/server1.log" 2>&1 & + pid=$! + disown $pid + sleep 5 + + if ! ps -p ${pid} 1>&2 >/dev/null; then + echo "server1 log:" + cat "${WORK_DIR}/server1.log" + echo "FAILED" + purge "$WORK_DIR" + exit 1 + fi + + "${PWD}/mc" mb --with-versioning minio/bucket + + for i in $(seq 1 4); do + "${PWD}/mc" cp /etc/hosts minio/bucket/testobj + + sudo chown -R root. "${WORK_DIR}/disk${i}" + + "${PWD}/mc" cp /etc/hosts minio/bucket/testobj + + sudo chown -R ${USER}. "${WORK_DIR}/disk${i}" + done + + for vid in $("${PWD}/mc" ls --json --versions minio/bucket/testobj | jq -r .versionId); do + "${PWD}/mc" cat --vid "${vid}" minio/bucket/testobj | md5sum + done + + pkill minio + sleep 3 +} + +function main() { + start_port=$(shuf -i 10000-65000 -n 1) + + start_minio_4drive ${start_port} +} + +function purge() +{ + rm -rf "$1" +} + +( main "$@" ) +rv=$? +purge "$WORK_DIR" +exit "$rv" diff --git a/cmd/erasure-object.go b/cmd/erasure-object.go index 2c8fbec8a..a704e25b9 100644 --- a/cmd/erasure-object.go +++ b/cmd/erasure-object.go @@ -42,6 +42,7 @@ import ( "github.com/minio/minio/internal/logger" "github.com/minio/minio/internal/sync/errgroup" "github.com/minio/pkg/mimedb" + "github.com/minio/pkg/wildcard" uatomic "go.uber.org/atomic" ) @@ -744,24 +745,19 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry str // Wait for all renames to finish. errs := g.Wait() - versions := reduceCommonVersions(diskVersions, writeQuorum) - if versions == 0 { - err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) - if err == nil { - err = errErasureWriteQuorum - } - return nil, err - } - - for index, dversions := range diskVersions { - if versions != dversions { - errs[index] = errFileCorrupt + err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) + if err == nil { + versions := reduceCommonVersions(diskVersions, writeQuorum) + for index, dversions := range diskVersions { + if versions != dversions { + errs[index] = errFileNeedsHealing + } } + err = reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) } // We can safely allow RenameData errors up to len(er.getDisks()) - writeQuorum - // otherwise return failure. Cleanup successful renames. - err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) + // otherwise return failure. return evalDisks(disks, errs), err } @@ -1178,16 +1174,7 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st } // Rename the successfully written temporary object to final location. - if onlineDisks, err = renameData(ctx, onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, bucket, object, writeQuorum); err != nil { - if errors.Is(err, errFileNotFound) { - return ObjectInfo{}, toObjectErr(errErasureWriteQuorum, bucket, object) - } - logger.LogIf(ctx, err) - return ObjectInfo{}, toObjectErr(err, bucket, object) - } - - defer NSUpdated(bucket, object) - + onlineDisks, err = renameData(ctx, onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, bucket, object, writeQuorum) for i := 0; i < len(onlineDisks); i++ { if onlineDisks[i] != nil && onlineDisks[i].IsOnline() { // Object info is the same in all disks, so we can pick @@ -1211,6 +1198,57 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st } } + healEntry := func(entry metaCacheEntry) error { + if entry.isDir() { + return nil + } + // We might land at .metacache, .trash, .multipart + // no need to heal them skip, only when bucket + // is '.minio.sys' + if bucket == minioMetaBucket { + if wildcard.Match("buckets/*/.metacache/*", entry.name) { + return nil + } + if wildcard.Match("tmp/*", entry.name) { + return nil + } + if wildcard.Match("multipart/*", entry.name) { + return nil + } + if wildcard.Match("tmp-old/*", entry.name) { + return nil + } + } + fivs, err := entry.fileInfoVersions(bucket) + if err != nil { + _, err = er.HealObject(ctx, bucket, entry.name, "", madmin.HealOpts{NoLock: true}) + return err + } + + for _, version := range fivs.Versions { + _, err = er.HealObject(ctx, bucket, version.Name, version.VersionID, madmin.HealOpts{NoLock: true}) + if err != nil && !isErrObjectNotFound(err) && !isErrVersionNotFound(err) { + return err + } + } + + return nil + } + + if err != nil { + if errors.Is(err, errFileNotFound) { + return ObjectInfo{}, toObjectErr(errErasureWriteQuorum, bucket, object) + } + if errors.Is(err, errFileNeedsHealing) && !opts.Speedtest { + listAndHeal(ctx, bucket, object, &er, healEntry) + } else { + logger.LogIf(ctx, err) + return ObjectInfo{}, toObjectErr(err, bucket, object) + } + } + + defer NSUpdated(bucket, object) + fi.ReplicationState = opts.PutReplicationState() online = countOnlineDisks(onlineDisks) diff --git a/cmd/storage-errors.go b/cmd/storage-errors.go index 47d3881df..dfbdc854c 100644 --- a/cmd/storage-errors.go +++ b/cmd/storage-errors.go @@ -69,6 +69,9 @@ var errTooManyOpenFiles = StorageErr("too many open files, please increase 'ulim // errFileNameTooLong - given file name is too long than supported length. var errFileNameTooLong = StorageErr("file name too long") +// errFileNeedsHealing - given file name needs to heal. +var errFileNeedsHealing = StorageErr("file name needs healing") + // errVolumeExists - cannot create same volume again. var errVolumeExists = StorageErr("volume already exists")