fix: trigger Heal when xl.meta needs healing during PUT (#15661)

This PR is a continuation of the previous change instead
of returning an error, instead trigger a spot heal on the
'xl.meta' and return only after the healing is complete.

This allows for future GETs on the same resource to be
consistent for any version of the object.
This commit is contained in:
Harshavardhana 2022-09-07 07:25:39 -07:00 committed by GitHub
parent 228c6686f8
commit 8e997eba4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 159 additions and 24 deletions

View File

@ -89,6 +89,7 @@ verify-healing: ## verify healing and replacing disks with minio binary
@GORACE=history_size=7 CGO_ENABLED=1 go build -race -tags kqueue -trimpath --ldflags "$(LDFLAGS)" -o $(PWD)/minio 1>/dev/null
@(env bash $(PWD)/buildscripts/verify-healing.sh)
@(env bash $(PWD)/buildscripts/unaligned-healing.sh)
@(env bash $(PWD)/buildscripts/heal-inconsistent-versions.sh)
verify-healing-with-root-disks: ## verify healing root disks
@echo "Verify healing with root drives"

View File

@ -0,0 +1,93 @@
#!/bin/bash -e
set -E
set -o pipefail
set -x
WORK_DIR="$PWD/.verify-$RANDOM"
MINIO_CONFIG_DIR="$WORK_DIR/.minio"
MINIO=( "$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server )
if [ ! -x "$PWD/minio" ]; then
echo "minio executable binary not found in current directory"
exit 1
fi
if [ ! -x "$PWD/minio" ]; then
echo "minio executable binary not found in current directory"
exit 1
fi
function start_minio_4drive() {
start_port=$1
export MINIO_ROOT_USER=minio
export MINIO_ROOT_PASSWORD=minio123
export MC_HOST_minio="http://minio:minio123@127.0.0.1:${start_port}/"
unset MINIO_KMS_AUTO_ENCRYPTION # do not auto-encrypt objects
export MINIO_CI_CD=1
mkdir ${WORK_DIR}
C_PWD=${PWD}
if [ ! -x "$PWD/mc" ]; then
MC_BUILD_DIR="mc-$RANDOM"
if ! git clone --quiet https://github.com/minio/mc "$MC_BUILD_DIR"; then
echo "failed to download https://github.com/minio/mc"
purge "${MC_BUILD_DIR}"
exit 1
fi
(cd "${MC_BUILD_DIR}" && go build -o "$C_PWD/mc")
# remove mc source.
purge "${MC_BUILD_DIR}"
fi
"${MINIO[@]}" --address ":$start_port" "${WORK_DIR}/disk{1...4}" > "${WORK_DIR}/server1.log" 2>&1 &
pid=$!
disown $pid
sleep 5
if ! ps -p ${pid} 1>&2 >/dev/null; then
echo "server1 log:"
cat "${WORK_DIR}/server1.log"
echo "FAILED"
purge "$WORK_DIR"
exit 1
fi
"${PWD}/mc" mb --with-versioning minio/bucket
for i in $(seq 1 4); do
"${PWD}/mc" cp /etc/hosts minio/bucket/testobj
sudo chown -R root. "${WORK_DIR}/disk${i}"
"${PWD}/mc" cp /etc/hosts minio/bucket/testobj
sudo chown -R ${USER}. "${WORK_DIR}/disk${i}"
done
for vid in $("${PWD}/mc" ls --json --versions minio/bucket/testobj | jq -r .versionId); do
"${PWD}/mc" cat --vid "${vid}" minio/bucket/testobj | md5sum
done
pkill minio
sleep 3
}
function main() {
start_port=$(shuf -i 10000-65000 -n 1)
start_minio_4drive ${start_port}
}
function purge()
{
rm -rf "$1"
}
( main "$@" )
rv=$?
purge "$WORK_DIR"
exit "$rv"

View File

@ -42,6 +42,7 @@ import (
"github.com/minio/minio/internal/logger"
"github.com/minio/minio/internal/sync/errgroup"
"github.com/minio/pkg/mimedb"
"github.com/minio/pkg/wildcard"
uatomic "go.uber.org/atomic"
)
@ -744,24 +745,19 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry str
// Wait for all renames to finish.
errs := g.Wait()
versions := reduceCommonVersions(diskVersions, writeQuorum)
if versions == 0 {
err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum)
if err == nil {
err = errErasureWriteQuorum
}
return nil, err
}
for index, dversions := range diskVersions {
if versions != dversions {
errs[index] = errFileCorrupt
err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum)
if err == nil {
versions := reduceCommonVersions(diskVersions, writeQuorum)
for index, dversions := range diskVersions {
if versions != dversions {
errs[index] = errFileNeedsHealing
}
}
err = reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum)
}
// We can safely allow RenameData errors up to len(er.getDisks()) - writeQuorum
// otherwise return failure. Cleanup successful renames.
err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum)
// otherwise return failure.
return evalDisks(disks, errs), err
}
@ -1178,16 +1174,7 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
}
// Rename the successfully written temporary object to final location.
if onlineDisks, err = renameData(ctx, onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, bucket, object, writeQuorum); err != nil {
if errors.Is(err, errFileNotFound) {
return ObjectInfo{}, toObjectErr(errErasureWriteQuorum, bucket, object)
}
logger.LogIf(ctx, err)
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
defer NSUpdated(bucket, object)
onlineDisks, err = renameData(ctx, onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, bucket, object, writeQuorum)
for i := 0; i < len(onlineDisks); i++ {
if onlineDisks[i] != nil && onlineDisks[i].IsOnline() {
// Object info is the same in all disks, so we can pick
@ -1211,6 +1198,57 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
}
}
healEntry := func(entry metaCacheEntry) error {
if entry.isDir() {
return nil
}
// We might land at .metacache, .trash, .multipart
// no need to heal them skip, only when bucket
// is '.minio.sys'
if bucket == minioMetaBucket {
if wildcard.Match("buckets/*/.metacache/*", entry.name) {
return nil
}
if wildcard.Match("tmp/*", entry.name) {
return nil
}
if wildcard.Match("multipart/*", entry.name) {
return nil
}
if wildcard.Match("tmp-old/*", entry.name) {
return nil
}
}
fivs, err := entry.fileInfoVersions(bucket)
if err != nil {
_, err = er.HealObject(ctx, bucket, entry.name, "", madmin.HealOpts{NoLock: true})
return err
}
for _, version := range fivs.Versions {
_, err = er.HealObject(ctx, bucket, version.Name, version.VersionID, madmin.HealOpts{NoLock: true})
if err != nil && !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
return err
}
}
return nil
}
if err != nil {
if errors.Is(err, errFileNotFound) {
return ObjectInfo{}, toObjectErr(errErasureWriteQuorum, bucket, object)
}
if errors.Is(err, errFileNeedsHealing) && !opts.Speedtest {
listAndHeal(ctx, bucket, object, &er, healEntry)
} else {
logger.LogIf(ctx, err)
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
}
defer NSUpdated(bucket, object)
fi.ReplicationState = opts.PutReplicationState()
online = countOnlineDisks(onlineDisks)

View File

@ -69,6 +69,9 @@ var errTooManyOpenFiles = StorageErr("too many open files, please increase 'ulim
// errFileNameTooLong - given file name is too long than supported length.
var errFileNameTooLong = StorageErr("file name too long")
// errFileNeedsHealing - given file name needs to heal.
var errFileNeedsHealing = StorageErr("file name needs healing")
// errVolumeExists - cannot create same volume again.
var errVolumeExists = StorageErr("volume already exists")