diff --git a/Makefile b/Makefile index 507a6afdd..5fda03098 100644 --- a/Makefile +++ b/Makefile @@ -89,6 +89,7 @@ verify-healing: ## verify healing and replacing disks with minio binary @GORACE=history_size=7 CGO_ENABLED=1 go build -race -tags kqueue -trimpath --ldflags "$(LDFLAGS)" -o $(PWD)/minio 1>/dev/null @(env bash $(PWD)/buildscripts/verify-healing.sh) @(env bash $(PWD)/buildscripts/unaligned-healing.sh) + @(env bash $(PWD)/buildscripts/heal-inconsistent-versions.sh) verify-healing-with-root-disks: ## verify healing root disks @echo "Verify healing with root drives" diff --git a/buildscripts/heal-inconsistent-versions.sh b/buildscripts/heal-inconsistent-versions.sh new file mode 100755 index 000000000..f88e54712 --- /dev/null +++ b/buildscripts/heal-inconsistent-versions.sh @@ -0,0 +1,93 @@ +#!/bin/bash -e + +set -E +set -o pipefail +set -x + +WORK_DIR="$PWD/.verify-$RANDOM" +MINIO_CONFIG_DIR="$WORK_DIR/.minio" +MINIO=( "$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server ) + +if [ ! -x "$PWD/minio" ]; then + echo "minio executable binary not found in current directory" + exit 1 +fi + +if [ ! -x "$PWD/minio" ]; then + echo "minio executable binary not found in current directory" + exit 1 +fi + +function start_minio_4drive() { + start_port=$1 + + export MINIO_ROOT_USER=minio + export MINIO_ROOT_PASSWORD=minio123 + export MC_HOST_minio="http://minio:minio123@127.0.0.1:${start_port}/" + unset MINIO_KMS_AUTO_ENCRYPTION # do not auto-encrypt objects + export MINIO_CI_CD=1 + + mkdir ${WORK_DIR} + C_PWD=${PWD} + if [ ! -x "$PWD/mc" ]; then + MC_BUILD_DIR="mc-$RANDOM" + if ! git clone --quiet https://github.com/minio/mc "$MC_BUILD_DIR"; then + echo "failed to download https://github.com/minio/mc" + purge "${MC_BUILD_DIR}" + exit 1 + fi + + (cd "${MC_BUILD_DIR}" && go build -o "$C_PWD/mc") + + # remove mc source. + purge "${MC_BUILD_DIR}" + fi + + "${MINIO[@]}" --address ":$start_port" "${WORK_DIR}/disk{1...4}" > "${WORK_DIR}/server1.log" 2>&1 & + pid=$! + disown $pid + sleep 5 + + if ! ps -p ${pid} 1>&2 >/dev/null; then + echo "server1 log:" + cat "${WORK_DIR}/server1.log" + echo "FAILED" + purge "$WORK_DIR" + exit 1 + fi + + "${PWD}/mc" mb --with-versioning minio/bucket + + for i in $(seq 1 4); do + "${PWD}/mc" cp /etc/hosts minio/bucket/testobj + + sudo chown -R root. "${WORK_DIR}/disk${i}" + + "${PWD}/mc" cp /etc/hosts minio/bucket/testobj + + sudo chown -R ${USER}. "${WORK_DIR}/disk${i}" + done + + for vid in $("${PWD}/mc" ls --json --versions minio/bucket/testobj | jq -r .versionId); do + "${PWD}/mc" cat --vid "${vid}" minio/bucket/testobj | md5sum + done + + pkill minio + sleep 3 +} + +function main() { + start_port=$(shuf -i 10000-65000 -n 1) + + start_minio_4drive ${start_port} +} + +function purge() +{ + rm -rf "$1" +} + +( main "$@" ) +rv=$? +purge "$WORK_DIR" +exit "$rv" diff --git a/cmd/erasure-object.go b/cmd/erasure-object.go index 2c8fbec8a..a704e25b9 100644 --- a/cmd/erasure-object.go +++ b/cmd/erasure-object.go @@ -42,6 +42,7 @@ import ( "github.com/minio/minio/internal/logger" "github.com/minio/minio/internal/sync/errgroup" "github.com/minio/pkg/mimedb" + "github.com/minio/pkg/wildcard" uatomic "go.uber.org/atomic" ) @@ -744,24 +745,19 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry str // Wait for all renames to finish. errs := g.Wait() - versions := reduceCommonVersions(diskVersions, writeQuorum) - if versions == 0 { - err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) - if err == nil { - err = errErasureWriteQuorum - } - return nil, err - } - - for index, dversions := range diskVersions { - if versions != dversions { - errs[index] = errFileCorrupt + err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) + if err == nil { + versions := reduceCommonVersions(diskVersions, writeQuorum) + for index, dversions := range diskVersions { + if versions != dversions { + errs[index] = errFileNeedsHealing + } } + err = reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) } // We can safely allow RenameData errors up to len(er.getDisks()) - writeQuorum - // otherwise return failure. Cleanup successful renames. - err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) + // otherwise return failure. return evalDisks(disks, errs), err } @@ -1178,16 +1174,7 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st } // Rename the successfully written temporary object to final location. - if onlineDisks, err = renameData(ctx, onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, bucket, object, writeQuorum); err != nil { - if errors.Is(err, errFileNotFound) { - return ObjectInfo{}, toObjectErr(errErasureWriteQuorum, bucket, object) - } - logger.LogIf(ctx, err) - return ObjectInfo{}, toObjectErr(err, bucket, object) - } - - defer NSUpdated(bucket, object) - + onlineDisks, err = renameData(ctx, onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, bucket, object, writeQuorum) for i := 0; i < len(onlineDisks); i++ { if onlineDisks[i] != nil && onlineDisks[i].IsOnline() { // Object info is the same in all disks, so we can pick @@ -1211,6 +1198,57 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st } } + healEntry := func(entry metaCacheEntry) error { + if entry.isDir() { + return nil + } + // We might land at .metacache, .trash, .multipart + // no need to heal them skip, only when bucket + // is '.minio.sys' + if bucket == minioMetaBucket { + if wildcard.Match("buckets/*/.metacache/*", entry.name) { + return nil + } + if wildcard.Match("tmp/*", entry.name) { + return nil + } + if wildcard.Match("multipart/*", entry.name) { + return nil + } + if wildcard.Match("tmp-old/*", entry.name) { + return nil + } + } + fivs, err := entry.fileInfoVersions(bucket) + if err != nil { + _, err = er.HealObject(ctx, bucket, entry.name, "", madmin.HealOpts{NoLock: true}) + return err + } + + for _, version := range fivs.Versions { + _, err = er.HealObject(ctx, bucket, version.Name, version.VersionID, madmin.HealOpts{NoLock: true}) + if err != nil && !isErrObjectNotFound(err) && !isErrVersionNotFound(err) { + return err + } + } + + return nil + } + + if err != nil { + if errors.Is(err, errFileNotFound) { + return ObjectInfo{}, toObjectErr(errErasureWriteQuorum, bucket, object) + } + if errors.Is(err, errFileNeedsHealing) && !opts.Speedtest { + listAndHeal(ctx, bucket, object, &er, healEntry) + } else { + logger.LogIf(ctx, err) + return ObjectInfo{}, toObjectErr(err, bucket, object) + } + } + + defer NSUpdated(bucket, object) + fi.ReplicationState = opts.PutReplicationState() online = countOnlineDisks(onlineDisks) diff --git a/cmd/storage-errors.go b/cmd/storage-errors.go index 47d3881df..dfbdc854c 100644 --- a/cmd/storage-errors.go +++ b/cmd/storage-errors.go @@ -69,6 +69,9 @@ var errTooManyOpenFiles = StorageErr("too many open files, please increase 'ulim // errFileNameTooLong - given file name is too long than supported length. var errFileNameTooLong = StorageErr("file name too long") +// errFileNeedsHealing - given file name needs to heal. +var errFileNeedsHealing = StorageErr("file name needs healing") + // errVolumeExists - cannot create same volume again. var errVolumeExists = StorageErr("volume already exists")