From 8fdfcfb562b285f73e607034926a68acb9cc5ee5 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Mon, 4 Dec 2023 11:33:39 -0800 Subject: [PATCH] upon RenameData() quorum error delete any partial success (#18586) there is potential for danglingWrites when quorum failed, where only some drives took a successful write, generally this is left to the healing routine to pick it up. However it is better that we delete it right away to avoid potential for quorum issues on version signature when there are many versions of an object. --- cmd/erasure-object.go | 11 +++++++++++ cmd/mrf.go | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/cmd/erasure-object.go b/cmd/erasure-object.go index f096f2463..6b6c4e7ed 100644 --- a/cmd/erasure-object.go +++ b/cmd/erasure-object.go @@ -1036,6 +1036,17 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry str var versionsDisparity bool err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) + if err != nil { + for index, nerr := range errs { + // When we are going to return error, attempt to delete success + // on some of the drives, if we cannot we do not have to notify + // caller this dangling object will be now scheduled to be removed + // via active healing. + if nerr == nil { + disks[index].DeleteVersion(ctx, dstBucket, dstEntry, metadata[index], false) + } + } + } if err == nil { versions := reduceCommonVersions(diskVersions, writeQuorum) for index, dversions := range diskVersions { diff --git a/cmd/mrf.go b/cmd/mrf.go index 02c1c767d..b9e86dd4b 100644 --- a/cmd/mrf.go +++ b/cmd/mrf.go @@ -97,7 +97,7 @@ func (m *mrfState) healRoutine() { // let recently failed networks to reconnect // making MRF wait for 1s before retrying, // i.e 4 reconnect attempts. - time.Sleep(1 * time.Second) + time.Sleep(time.Second) } // wait on timer per heal