From b7c5e45fff935994791f89237a4891185c55b91f Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Fri, 7 Jan 2022 19:11:54 -0800 Subject: [PATCH] heal: isObjectDangling should return false when it cannot decide (#14053) In a multi-pool setup when disks are coming up, or in a single pool setup let's say with 100's of erasure sets with a slow network. It's possible when healing is attempted on `.minio.sys/config` folder, it can lead to healing unexpectedly deleting some policy files as dangling due to a mistake in understanding when `isObjectDangling` is considered to be 'true'. This issue happened in commit 30135eed86f470407123775f3c11ecd3e991337b when we assumed the validMeta with empty ErasureInfo is considered to be fully dangling. This implementation issue gets exposed when the server is starting up. This is most easily seen with multiple-pool setups because of the disconnected fashion pools that come up. The decision to purge the object as dangling is taken incorrectly prior to the correct state being achieved on each pool, when the corresponding drive let's say returns 'errDiskNotFound', a 'delete' is triggered. At this point, the 'drive' comes online because this is part of the startup sequence as drives can come online lazily. This kind of situation exists because we allow (totalDisks/2) number of drives to be online when the server is being restarted. Implementation made an incorrect assumption here leading to policies getting deleted. Added tests to capture the implementation requirements. --- .github/workflows/replication.yaml | 6 + Makefile | 4 + cmd/erasure-healing.go | 125 ++++++----- cmd/erasure-healing_test.go | 186 ++++++++++++++++ cmd/xl-storage-format-v2.go | 2 +- docs/site-replication/run-multi-site-ldap.sh | 10 +- .../run-multi-site-minio-idp.sh | 199 ++++++++++++++++++ 7 files changed, 461 insertions(+), 71 deletions(-) create mode 100755 docs/site-replication/run-multi-site-minio-idp.sh diff --git a/.github/workflows/replication.yaml b/.github/workflows/replication.yaml index 5e88ae552..c011f9737 100644 --- a/.github/workflows/replication.yaml +++ b/.github/workflows/replication.yaml @@ -38,3 +38,9 @@ jobs: sudo sysctl net.ipv6.conf.all.disable_ipv6=0 sudo sysctl net.ipv6.conf.default.disable_ipv6=0 make test-replication + - name: Test MinIO IDP for automatic site replication + run: | + sudo sysctl net.ipv6.conf.all.disable_ipv6=0 + sudo sysctl net.ipv6.conf.default.disable_ipv6=0 + make test-site-replication-minio + diff --git a/Makefile b/Makefile index 0c92b2208..fb55d4dd1 100644 --- a/Makefile +++ b/Makefile @@ -67,6 +67,10 @@ test-site-replication-oidc: install ## verify automatic site replication @echo "Running tests for automatic site replication of IAM (with OIDC)" @(env bash $(PWD)/docs/site-replication/run-multi-site-oidc.sh) +test-site-replication-minio: install ## verify automatic site replication + @echo "Running tests for automatic site replication of IAM (with MinIO IDP)" + @(env bash $(PWD)/docs/site-replication/run-multi-site-minio-idp.sh) + verify: ## verify minio various setups @echo "Verifying build with race" @CGO_ENABLED=1 go build -race -tags kqueue -trimpath --ldflags "$(LDFLAGS)" -o $(PWD)/minio 1>/dev/null diff --git a/cmd/erasure-healing.go b/cmd/erasure-healing.go index 20f26a826..b10594b4b 100644 --- a/cmd/erasure-healing.go +++ b/cmd/erasure-healing.go @@ -246,7 +246,7 @@ func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, latestMeta File switch { case errors.Is(erErr, errFileNotFound) || errors.Is(erErr, errFileVersionNotFound): return true - case errors.Is(erErr, errCorruptedFormat): + case errors.Is(erErr, errFileCorrupt): return true } if erErr == nil { @@ -312,13 +312,13 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s readQuorum, _, err := objectQuorumFromMeta(ctx, partsMetadata, errs, er.defaultParityCount) if err != nil { - return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, []error{}, opts) + return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, nil, opts) } result.ParityBlocks = result.DiskCount - readQuorum result.DataBlocks = readQuorum - // List of disks having latest version of the object er.meta + // List of disks having latest version of the object xl.meta // (by modtime). onlineDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs) @@ -356,10 +356,6 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s // If data is sane on any one disk, we can // extract the correct object size. result.ObjectSize = partsMetadata[i].Size - if partsMetadata[i].Erasure.ParityBlocks > 0 && partsMetadata[i].Erasure.DataBlocks > 0 { - result.ParityBlocks = partsMetadata[i].Erasure.ParityBlocks - result.DataBlocks = partsMetadata[i].Erasure.DataBlocks - } case errs[i] == errDiskNotFound, dataErrs[i] == errDiskNotFound: driveState = madmin.DriveStateOffline case errs[i] == errFileNotFound, errs[i] == errFileVersionNotFound, errs[i] == errVolumeNotFound: @@ -406,7 +402,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s // If less than read quorum number of disks have all the parts // of the data, we can't reconstruct the erasure-coded data. - if numAvailableDisks < result.DataBlocks { + if numAvailableDisks < readQuorum { return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, dataErrs, opts) } @@ -466,9 +462,6 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s copyPartsMetadata = shufflePartsMetadata(copyPartsMetadata, latestMeta.Erasure.Distribution) if !latestMeta.Deleted && !latestMeta.IsRemote() { - result.DataBlocks = latestMeta.Erasure.DataBlocks - result.ParityBlocks = latestMeta.Erasure.ParityBlocks - // Heal each part. erasureHealFile() will write the healed // part to .minio/tmp/uuid/ which needs to be renamed later to // the final location. @@ -819,21 +812,20 @@ func (er erasureObjects) purgeObjectDangling(ctx context.Context, bucket, object // remove we simply delete it from namespace. m, ok := isObjectDangling(metaArr, errs, dataErrs) if ok { - parityBlocks := m.Erasure.ParityBlocks - if m.Erasure.ParityBlocks == 0 { - parityBlocks = er.defaultParityCount - } - dataBlocks := m.Erasure.DataBlocks - if m.Erasure.DataBlocks == 0 { - dataBlocks = len(storageDisks) - parityBlocks + parityBlocks := er.defaultParityCount + dataBlocks := len(storageDisks) - parityBlocks + if m.IsValid() { + parityBlocks = m.Erasure.ParityBlocks + dataBlocks = m.Erasure.DataBlocks } + writeQuorum := dataBlocks if dataBlocks == parityBlocks { writeQuorum++ } + var err error - var returnNotFound bool - if !opts.DryRun && opts.Remove { + if opts.Remove { err = er.deleteObjectVersion(ctx, bucket, object, writeQuorum, FileInfo{ VersionID: versionID, }, false) @@ -849,19 +841,10 @@ func (er erasureObjects) purgeObjectDangling(ctx context.Context, bucket, object m.Size = 0 } - // Delete successfully purged dangling content, return ObjectNotFound/VersionNotFound instead. - if countErrs(errs, nil) == len(errs) { - returnNotFound = true - } - } - if returnNotFound { - err = toObjectErr(errFileNotFound, bucket, object) - if versionID != "" { - err = toObjectErr(errFileVersionNotFound, bucket, object, versionID) - } - return er.defaultHealResult(m, storageDisks, storageEndpoints, - errs, bucket, object, versionID), err + return er.defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, + errs, bucket, object, versionID), nil } + return er.defaultHealResult(m, storageDisks, storageEndpoints, errs, bucket, object, versionID), toObjectErr(err, bucket, object, versionID) } @@ -878,54 +861,64 @@ func (er erasureObjects) purgeObjectDangling(ctx context.Context, bucket, object // files is lesser than number of data blocks. func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (validMeta FileInfo, ok bool) { // We can consider an object data not reliable - // when er.meta is not found in read quorum disks. - // or when er.meta is not readable in read quorum disks. - var notFoundErasureMeta, corruptedErasureMeta int - for _, readErr := range errs { - if errors.Is(readErr, errFileNotFound) || errors.Is(readErr, errFileVersionNotFound) { - notFoundErasureMeta++ - } else if errors.Is(readErr, errCorruptedFormat) { - corruptedErasureMeta++ - } - } - var notFoundParts int - for i := range dataErrs { - // Only count part errors, if the error is not - // same as er.meta error. This is to avoid - // double counting when both parts and er.meta - // are not available. - if errs[i] != dataErrs[i] { - if IsErr(dataErrs[i], []error{ - errFileNotFound, - errFileVersionNotFound, - }...) { - notFoundParts++ + // when xl.meta is not found in read quorum disks. + // or when xl.meta is not readable in read quorum disks. + danglingErrsCount := func(cerrs []error) (int, int) { + var ( + notFoundCount int + corruptedCount int + ) + for _, readErr := range cerrs { + if errors.Is(readErr, errFileNotFound) || errors.Is(readErr, errFileVersionNotFound) { + notFoundCount++ + } else if errors.Is(readErr, errFileCorrupt) { + corruptedCount++ } } + return notFoundCount, corruptedCount } + ndataErrs := make([]error, len(dataErrs)) + for i := range dataErrs { + if errs[i] != dataErrs[i] { + // Only count part errors, if the error is not + // same as xl.meta error. This is to avoid + // double counting when both parts and xl.meta + // are not available. + ndataErrs[i] = dataErrs[i] + } + } + + notFoundMetaErrs, corruptedMetaErrs := danglingErrsCount(errs) + notFoundPartsErrs, corruptedPartsErrs := danglingErrsCount(ndataErrs) + for _, m := range metaArr { - if !m.IsValid() { - continue + if m.IsValid() { + validMeta = m + break } - validMeta = m - break } - if validMeta.Deleted || validMeta.IsRemote() { - // notFoundParts is ignored since a + if !validMeta.IsValid() { + // We have no idea what this file is, leave it as is. + return validMeta, false + } + + if validMeta.Deleted { + // notFoundPartsErrs is ignored since // - delete marker does not have any parts - // - transition status of complete has no parts - return validMeta, corruptedErasureMeta+notFoundErasureMeta > len(errs)/2 + return validMeta, corruptedMetaErrs+notFoundMetaErrs > len(errs)/2 } - // We couldn't find any valid meta we are indeed corrupted, return true right away. - if validMeta.Erasure.DataBlocks == 0 { - return validMeta, true + totalErrs := notFoundMetaErrs + corruptedMetaErrs + notFoundPartsErrs + corruptedPartsErrs + if validMeta.IsRemote() { + // notFoundPartsErrs is ignored since + // - transition status of complete has no parts + totalErrs = notFoundMetaErrs + corruptedMetaErrs } // We have valid meta, now verify if we have enough files with parity blocks. - return validMeta, corruptedErasureMeta+notFoundErasureMeta+notFoundParts > validMeta.Erasure.ParityBlocks + return validMeta, totalErrs > validMeta.Erasure.ParityBlocks } // HealObject - heal the given object, automatically deletes the object if stale/corrupted if `remove` is true. diff --git a/cmd/erasure-healing_test.go b/cmd/erasure-healing_test.go index 0efa5190b..f961dbe41 100644 --- a/cmd/erasure-healing_test.go +++ b/cmd/erasure-healing_test.go @@ -34,6 +34,192 @@ import ( "github.com/minio/madmin-go" ) +// Tests isObjectDangling function +func TestIsObjectDangling(t *testing.T) { + fi := newFileInfo("test-object", 2, 2) + fi.Erasure.Index = 1 + + testCases := []struct { + name string + metaArr []FileInfo + errs []error + dataErrs []error + expectedMeta FileInfo + expectedDangling bool + }{ + { + name: "FileInfoExists-case1", + metaArr: []FileInfo{ + {}, + {}, + fi, + fi, + }, + errs: []error{ + errFileNotFound, + errDiskNotFound, + nil, + nil, + }, + dataErrs: nil, + expectedMeta: fi, + expectedDangling: false, + }, + { + name: "FileInfoExists-case2", + metaArr: []FileInfo{ + {}, + {}, + fi, + fi, + }, + errs: []error{ + errFileNotFound, + errFileNotFound, + nil, + nil, + }, + dataErrs: nil, + expectedMeta: fi, + expectedDangling: false, + }, + { + name: "FileInfoUndecided-case1", + metaArr: []FileInfo{ + {}, + {}, + {}, + fi, + }, + errs: []error{ + errFileNotFound, + errDiskNotFound, + errDiskNotFound, + nil, + }, + dataErrs: nil, + expectedMeta: fi, + expectedDangling: false, + }, + { + name: "FileInfoUndecided-case2", + metaArr: []FileInfo{}, + errs: []error{ + errFileNotFound, + errDiskNotFound, + errDiskNotFound, + errFileNotFound, + }, + dataErrs: nil, + expectedMeta: FileInfo{}, + expectedDangling: false, + }, + { + name: "FileInfoUndecided-case3(file deleted)", + metaArr: []FileInfo{}, + errs: []error{ + errFileNotFound, + errFileNotFound, + errFileNotFound, + errFileNotFound, + }, + dataErrs: nil, + expectedMeta: FileInfo{}, + expectedDangling: false, + }, + { + name: "FileInfoDecided-case1", + metaArr: []FileInfo{ + {}, + {}, + {}, + fi, + }, + errs: []error{ + errFileNotFound, + errFileNotFound, + errFileNotFound, + nil, + }, + dataErrs: nil, + expectedMeta: fi, + expectedDangling: true, + }, + { + name: "FileInfoDecided-case2", + metaArr: []FileInfo{ + {}, + {}, + {}, + fi, + }, + errs: []error{ + errFileNotFound, + errFileCorrupt, + errFileCorrupt, + nil, + }, + dataErrs: nil, + expectedMeta: fi, + expectedDangling: true, + }, + { + name: "FileInfoDecided-case2-delete-marker", + metaArr: []FileInfo{ + {}, + {}, + {}, + {Deleted: true}, + }, + errs: []error{ + errFileNotFound, + errFileCorrupt, + errFileCorrupt, + nil, + }, + dataErrs: nil, + expectedMeta: FileInfo{Deleted: true}, + expectedDangling: true, + }, + { + name: "FileInfoDecided-case2-(duplicate data errors)", + metaArr: []FileInfo{ + {}, + {}, + {}, + {Deleted: true}, + }, + errs: []error{ + errFileNotFound, + errFileCorrupt, + errFileCorrupt, + nil, + }, + dataErrs: []error{ + errFileNotFound, + errFileCorrupt, + nil, + nil, + }, + expectedMeta: FileInfo{Deleted: true}, + expectedDangling: true, + }, + // Add new cases as seen + } + for _, testCase := range testCases { + testCase := testCase + t.Run(testCase.name, func(t *testing.T) { + gotMeta, dangling := isObjectDangling(testCase.metaArr, testCase.errs, testCase.dataErrs) + if !gotMeta.Equals(testCase.expectedMeta) { + t.Errorf("Expected %#v, got %#v", testCase.expectedMeta, gotMeta) + } + if dangling != testCase.expectedDangling { + t.Errorf("Expected dangling %t, got %t", testCase.expectedDangling, dangling) + } + }) + } +} + // Tests both object and bucket healing. func TestHealing(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) diff --git a/cmd/xl-storage-format-v2.go b/cmd/xl-storage-format-v2.go index 34a5178ac..0174322f1 100644 --- a/cmd/xl-storage-format-v2.go +++ b/cmd/xl-storage-format-v2.go @@ -927,7 +927,7 @@ func (x *xlMetaV2) loadLegacy(buf []byte) error { return errors.New("unknown major metadata version") } if allMeta == nil { - return errCorruptedFormat + return errFileCorrupt } // bts will shrink as we decode. bts := allMeta diff --git a/docs/site-replication/run-multi-site-ldap.sh b/docs/site-replication/run-multi-site-ldap.sh index 54899b394..c438da2cc 100755 --- a/docs/site-replication/run-multi-site-ldap.sh +++ b/docs/site-replication/run-multi-site-ldap.sh @@ -10,7 +10,7 @@ cleanup() { echo "Cleaning up instances of MinIO" pkill minio pkill -9 minio - rm -rf /tmp/minio{1,2,3} + rm -rf /tmp/minio-ldap-idp{1,2,3} } cleanup @@ -40,9 +40,9 @@ if [ ! -f ./mc ]; then && chmod +x mc fi -minio server --address ":9001" /tmp/minio1/{1...4} >/tmp/minio1_1.log 2>&1 & -minio server --address ":9002" /tmp/minio2/{1...4} >/tmp/minio2_1.log 2>&1 & -minio server --address ":9003" /tmp/minio3/{1...4} >/tmp/minio3_1.log 2>&1 & +minio server --config-dir /tmp/minio-ldap --address ":9001" /tmp/minio-ldap-idp1/{1...4} >/tmp/minio1_1.log 2>&1 & +minio server --config-dir /tmp/minio-ldap --address ":9002" /tmp/minio-ldap-idp2/{1...4} >/tmp/minio2_1.log 2>&1 & +minio server --config-dir /tmp/minio-ldap --address ":9003" /tmp/minio-ldap-idp3/{1...4} >/tmp/minio3_1.log 2>&1 & sleep 10 @@ -205,3 +205,5 @@ if [ "${enabled_minio1}" != "Enabled" ]; then echo "expected bucket to be mirrored with object-lock enabled, exiting..." exit_1; fi + +cleanup diff --git a/docs/site-replication/run-multi-site-minio-idp.sh b/docs/site-replication/run-multi-site-minio-idp.sh new file mode 100755 index 000000000..f3edb1f1f --- /dev/null +++ b/docs/site-replication/run-multi-site-minio-idp.sh @@ -0,0 +1,199 @@ +#!/usr/bin/env bash + +# shellcheck disable=SC2120 +exit_1() { + cleanup + exit 1 +} + +cleanup() { + echo "Cleaning up instances of MinIO" + pkill minio + pkill -9 minio + rm -rf /tmp/minio-internal-idp{1,2,3} +} + +cleanup + +unset MINIO_KMS_KES_CERT_FILE +unset MINIO_KMS_KES_KEY_FILE +unset MINIO_KMS_KES_ENDPOINT +unset MINIO_KMS_KES_KEY_NAME + +export MINIO_BROWSER=off +export MINIO_ROOT_USER="minio" +export MINIO_ROOT_PASSWORD="minio123" +export MINIO_KMS_AUTO_ENCRYPTION=off +export MINIO_PROMETHEUS_AUTH_TYPE=public +export MINIO_KMS_SECRET_KEY=my-minio-key:OSMM+vkKUTCvQs9YL/CVMIMt43HFhkUpqJxTmGl6rYw= + +if [ ! -f ./mc ]; then + wget -O mc https://dl.minio.io/client/mc/release/linux-amd64/mc \ + && chmod +x mc +fi + +minio server --config-dir /tmp/minio-internal --address ":9001" /tmp/minio-internal-idp1/{1...4} >/tmp/minio1_1.log 2>&1 & +minio server --config-dir /tmp/minio-internal --address ":9002" /tmp/minio-internal-idp2/{1...4} >/tmp/minio2_1.log 2>&1 & +minio server --config-dir /tmp/minio-internal --address ":9003" /tmp/minio-internal-idp3/{1...4} >/tmp/minio3_1.log 2>&1 & + +sleep 10 + +export MC_HOST_minio1=http://minio:minio123@localhost:9001 +export MC_HOST_minio2=http://minio:minio123@localhost:9002 +export MC_HOST_minio3=http://minio:minio123@localhost:9003 + +./mc admin replicate add minio1 minio2 minio3 + +./mc admin user add minio1 foobar foo12345 +./mc admin policy set minio1 consoleAdmin user=foobar +sleep 5 + +./mc admin user info minio2 foobar +./mc admin user info minio3 foobar +./mc admin policy add minio1 rw ./docs/site-replication/rw.json + +sleep 5 +./mc admin policy info minio2 rw >/dev/null 2>&1 +./mc admin policy info minio3 rw >/dev/null 2>&1 + +./mc admin policy remove minio3 rw + +sleep 10 +./mc admin policy info minio1 rw +if [ $? -eq 0 ]; then + echo "expecting the command to fail, exiting.." + exit_1; +fi + +./mc admin policy info minio2 rw +if [ $? -eq 0 ]; then + echo "expecting the command to fail, exiting.." + exit_1; +fi + +./mc admin user info minio1 foobar +if [ $? -ne 0 ]; then + echo "policy mapping missing, exiting.." + exit_1; +fi + +./mc admin user info minio2 foobar +if [ $? -ne 0 ]; then + echo "policy mapping missing, exiting.." + exit_1; +fi + +./mc admin user info minio3 foobar +if [ $? -ne 0 ]; then + echo "policy mapping missing, exiting.." + exit_1; +fi + +./mc admin user svcacct add minio2 foobar --access-key testsvc --secret-key testsvc123 +if [ $? -ne 0 ]; then + echo "adding svc account failed, exiting.." + exit_1; +fi + +sleep 10 + +./mc admin user svcacct info minio1 testsvc +if [ $? -ne 0 ]; then + echo "svc account not mirrored, exiting.." + exit_1; +fi + +./mc admin user svcacct info minio2 testsvc +if [ $? -ne 0 ]; then + echo "svc account not mirrored, exiting.." + exit_1; +fi + +./mc admin user svcacct rm minio1 testsvc +if [ $? -ne 0 ]; then + echo "removing svc account failed, exiting.." + exit_1; +fi + +sleep 10 +./mc admin user svcacct info minio2 testsvc +if [ $? -eq 0 ]; then + echo "svc account found after delete, exiting.." + exit_1; +fi + +./mc admin user svcacct info minio3 testsvc +if [ $? -eq 0 ]; then + echo "svc account found after delete, exiting.." + exit_1; +fi + +./mc mb minio1/newbucket + +sleep 5 +./mc stat minio2/newbucket +if [ $? -ne 0 ]; then + echo "expecting bucket to be present. exiting.." + exit_1; +fi + +./mc stat minio3/newbucket +if [ $? -ne 0 ]; then + echo "expecting bucket to be present. exiting.." + exit_1; +fi + +./mc cp README.md minio2/newbucket/ + +sleep 5 +./mc stat minio1/newbucket/README.md +if [ $? -ne 0 ]; then + echo "expecting object to be present. exiting.." + exit_1; +fi + +./mc stat minio3/newbucket/README.md +if [ $? -ne 0 ]; then + echo "expecting object to be present. exiting.." + exit_1; +fi + +./mc rm minio3/newbucket/README.md +sleep 5 + +./mc stat minio2/newbucket/README.md +if [ $? -eq 0 ]; then + echo "expected file to be deleted, exiting.." + exit_1; +fi + +./mc stat minio1/newbucket/README.md +if [ $? -eq 0 ]; then + echo "expected file to be deleted, exiting.." + exit_1; +fi + +./mc mb --with-lock minio3/newbucket-olock +sleep 5 + +enabled_minio2=$(./mc stat --json minio2/newbucket-olock| jq -r .metadata.ObjectLock.enabled) +if [ $? -ne 0 ]; then + echo "expected bucket to be mirrored with object-lock but not present, exiting..." + exit_1; +fi + +if [ "${enabled_minio2}" != "Enabled" ]; then + echo "expected bucket to be mirrored with object-lock enabled, exiting..." + exit_1; +fi + +enabled_minio1=$(./mc stat --json minio1/newbucket-olock| jq -r .metadata.ObjectLock.enabled) +if [ $? -ne 0 ]; then + echo "expected bucket to be mirrored with object-lock but not present, exiting..." + exit_1; +fi + +if [ "${enabled_minio1}" != "Enabled" ]; then + echo "expected bucket to be mirrored with object-lock enabled, exiting..." + exit_1; +fi