diff --git a/buildscripts/minio-upgrade.sh b/buildscripts/minio-upgrade.sh index ed24ce4ce..d90dfb503 100644 --- a/buildscripts/minio-upgrade.sh +++ b/buildscripts/minio-upgrade.sh @@ -10,6 +10,38 @@ cleanup() { docker volume prune -f } +verify_checksum_after_heal() { + local sum1 + sum1=$(curl -s "$2" | sha256sum); + mc admin heal --json -r "$1" >/dev/null; # test after healing + local sum1_heal + sum1_heal=$(curl -s "$2" | sha256sum); + + if [ "${sum1_heal}" != "${sum1}" ]; then + echo "mismatch expected ${sum1_heal}, got ${sum1}" + exit 1; + fi +} + +verify_checksum_mc() { + local expected + expected=$(mc cat "$1" | sha256sum) + local got + got=$(mc cat "$2" | sha256sum) + + if [ "${expected}" != "${got}" ]; then + echo "mismatch - expected ${expected}, got ${got}" + exit 1; + fi + echo "matches - ${expected}, got ${got}" +} + +add_alias() { + until (mc alias set minio http://127.0.0.1:9000 minioadmin minioadmin); do + echo "...waiting... for 5secs" && sleep 5 + done +} + __init__() { sudo apt install curl -y export GOPATH=/tmp/gopath @@ -22,48 +54,25 @@ __init__() { MINIO_VERSION=RELEASE.2019-12-19T22-52-26Z docker-compose \ -f "buildscripts/upgrade-tests/compose.yml" \ up -d --build - until (mc alias set minio http://127.0.0.1:9000 minioadmin minioadmin); do - echo "...waiting..." && sleep 5; - done + + add_alias mc mb minio/minio-test/ mc cp ./minio minio/minio-test/to-read/ mc cp /etc/hosts minio/minio-test/to-read/hosts mc policy set download minio/minio-test - mc cat minio/minio-test/to-read/minio | sha256sum - mc cat ./minio | sha256sum + + verify_checksum_mc ./minio minio/minio-test/to-read/minio + curl -s http://127.0.0.1:9000/minio-test/to-read/hosts | sha256sum MINIO_VERSION=dev docker-compose -f "buildscripts/upgrade-tests/compose.yml" stop } -verify_checksum_after_heal() { - sum1=$(curl -s "$2" | sha256sum); - mc admin heal --json -r "$1" >/dev/null; # test after healing - sum1_heal=$(curl -s "$2" | sha256sum); - - if [ "${sum1_heal}" != "${sum1}" ]; then - echo "mismatch expected ${sum1_heal}, got ${sum1}" - exit 1; - fi -} - -verify_checksum_mc() { - expected=$(mc cat "$1" | sha256sum) - got=$(mc cat "$2" | sha256sum) - - if [ "${expected}" != "${got}" ]; then - echo "mismatch expected ${expected}, got ${got}" - exit 1; - fi -} - main() { MINIO_VERSION=dev docker-compose -f "buildscripts/upgrade-tests/compose.yml" up -d --build - until (mc alias set minio http://127.0.0.1:9000 minioadmin minioadmin); do - echo "...waiting..." && sleep 5 - done + add_alias verify_checksum_after_heal minio/minio-test http://127.0.0.1:9000/minio-test/to-read/hosts diff --git a/cmd/erasure-healing-common.go b/cmd/erasure-healing-common.go index 07f696927..0f64c8d44 100644 --- a/cmd/erasure-healing-common.go +++ b/cmd/erasure-healing-common.go @@ -134,9 +134,9 @@ func listOnlineDisks(disks []StorageAPI, partsMetadata []FileInfo, errs []error) } // Returns the latest updated FileInfo files and error in case of failure. -func getLatestFileInfo(ctx context.Context, partsMetadata []FileInfo, errs []error, quorum int) (FileInfo, error) { +func getLatestFileInfo(ctx context.Context, partsMetadata []FileInfo, errs []error) (FileInfo, error) { // There should be atleast half correct entries, if not return failure - reducedErr := reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, quorum) + reducedErr := reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, len(partsMetadata)/2) if reducedErr != nil { return FileInfo{}, reducedErr } @@ -151,6 +151,10 @@ func getLatestFileInfo(ctx context.Context, partsMetadata []FileInfo, errs []err // Reduce list of UUIDs to a single common value - i.e. the last updated Time modTime := commonTime(modTimes) + if modTime.IsZero() || modTime.Equal(timeSentinel) { + return FileInfo{}, errErasureReadQuorum + } + // Interate through all the modTimes and count the FileInfo(s) with latest time. for index, t := range modTimes { if partsMetadata[index].IsValid() && t.Equal(modTime) { @@ -159,7 +163,11 @@ func getLatestFileInfo(ctx context.Context, partsMetadata []FileInfo, errs []err } } - if count < quorum { + if !latestFileInfo.IsValid() { + return FileInfo{}, errErasureReadQuorum + } + + if count < latestFileInfo.Erasure.DataBlocks { return FileInfo{}, errErasureReadQuorum } @@ -174,10 +182,8 @@ func getLatestFileInfo(ctx context.Context, partsMetadata []FileInfo, errs []err // - slice of errors about the state of data files on disk - can have // a not-found error or a hash-mismatch error. func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []FileInfo, - errs []error, bucket, object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) { - - // List of disks having latest version of the object xl.meta (by modtime) - _, modTime := listOnlineDisks(onlineDisks, partsMetadata, errs) + errs []error, latestMeta FileInfo, + bucket, object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) { availableDisks := make([]StorageAPI, len(onlineDisks)) dataErrs := make([]error, len(onlineDisks)) @@ -214,13 +220,13 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad dataErrs[i] = errs[i] continue } - if onlineDisk == nil { + if onlineDisk == OfflineDisk { dataErrs[i] = errDiskNotFound continue } meta := partsMetadata[i] - if !meta.ModTime.Equal(modTime) { + if !meta.ModTime.Equal(latestMeta.ModTime) || meta.DataDir != latestMeta.DataDir { dataErrs[i] = errFileCorrupt partsMetadata[i] = FileInfo{} continue @@ -268,17 +274,18 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad continue } + meta.DataDir = latestMeta.DataDir switch scanMode { case madmin.HealDeepScan: // disk has a valid xl.meta but may not have all the // parts. This is considered an outdated disk, since // it needs healing too. - if !partsMetadata[i].Deleted && !partsMetadata[i].IsRemote() { - dataErrs[i] = onlineDisk.VerifyFile(ctx, bucket, object, partsMetadata[i]) + if !meta.Deleted && !meta.IsRemote() { + dataErrs[i] = onlineDisk.VerifyFile(ctx, bucket, object, meta) } case madmin.HealNormalScan: - if !partsMetadata[i].Deleted && !partsMetadata[i].IsRemote() { - dataErrs[i] = onlineDisk.CheckParts(ctx, bucket, object, partsMetadata[i]) + if !meta.Deleted && !meta.IsRemote() { + dataErrs[i] = onlineDisk.CheckParts(ctx, bucket, object, meta) } } diff --git a/cmd/erasure-healing-common_test.go b/cmd/erasure-healing-common_test.go index 3dde563c6..d5c7963a7 100644 --- a/cmd/erasure-healing-common_test.go +++ b/cmd/erasure-healing-common_test.go @@ -20,6 +20,7 @@ package cmd import ( "bytes" "context" + "errors" "fmt" "os" "path/filepath" @@ -193,7 +194,7 @@ func TestListOnlineDisks(t *testing.T) { } partsMetadata, errs := readAllFileInfo(ctx, erasureDisks, bucket, object, "", false) - fi, err := getLatestFileInfo(ctx, partsMetadata, errs, getReadQuorum(len(disks))) + fi, err := getLatestFileInfo(ctx, partsMetadata, errs) if err != nil { t.Fatalf("Failed to getLatestFileInfo %v", err) } @@ -250,7 +251,7 @@ func TestListOnlineDisks(t *testing.T) { t.Fatalf("Expected modTime to be equal to %v but was found to be %v", test.expectedTime, modTime) } - availableDisks, newErrs := disksWithAllParts(ctx, onlineDisks, partsMetadata, test.errs, bucket, object, madmin.HealDeepScan) + availableDisks, newErrs := disksWithAllParts(ctx, onlineDisks, partsMetadata, test.errs, fi, bucket, object, madmin.HealDeepScan) test.errs = newErrs if test._tamperBackend != noTamper { @@ -365,7 +366,7 @@ func TestListOnlineDisksSmallObjects(t *testing.T) { } partsMetadata, errs := readAllFileInfo(ctx, erasureDisks, bucket, object, "", true) - _, err = getLatestFileInfo(ctx, partsMetadata, errs, getReadQuorum(len(disks))) + _, err = getLatestFileInfo(ctx, partsMetadata, errs) if err != nil { t.Fatalf("Failed to getLatestFileInfo %v", err) } @@ -421,9 +422,9 @@ func TestListOnlineDisksSmallObjects(t *testing.T) { } partsMetadata, errs = readAllFileInfo(ctx, erasureDisks, bucket, object, "", true) - _, err = getLatestFileInfo(ctx, partsMetadata, errs, len(disks)/2) - if err != nil { - t.Fatalf("Failed to getLatestFileInfo %v", err) + fi, err := getLatestFileInfo(ctx, partsMetadata, errs) + if !errors.Is(err, errErasureReadQuorum) { + t.Fatalf("Failed to getLatestFileInfo, expected %v, got %v", errErasureReadQuorum, err) } onlineDisks, modTime := listOnlineDisks(erasureDisks, partsMetadata, test.errs) @@ -432,7 +433,7 @@ func TestListOnlineDisksSmallObjects(t *testing.T) { test.expectedTime, modTime) } - availableDisks, newErrs := disksWithAllParts(ctx, onlineDisks, partsMetadata, test.errs, bucket, object, madmin.HealDeepScan) + availableDisks, newErrs := disksWithAllParts(ctx, onlineDisks, partsMetadata, test.errs, fi, bucket, object, madmin.HealDeepScan) test.errs = newErrs if test._tamperBackend != noTamper { @@ -486,9 +487,14 @@ func TestDisksWithAllParts(t *testing.T) { t.Fatalf("Failed to read xl meta data %v", err) } + fi, err := getLatestFileInfo(ctx, partsMetadata, errs) + if err != nil { + t.Fatalf("Failed to get quorum consistent fileInfo %v", err) + } + erasureDisks, _ = listOnlineDisks(erasureDisks, partsMetadata, errs) - filteredDisks, errs := disksWithAllParts(ctx, erasureDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan) + filteredDisks, errs := disksWithAllParts(ctx, erasureDisks, partsMetadata, errs, fi, bucket, object, madmin.HealDeepScan) if len(filteredDisks) != len(erasureDisks) { t.Errorf("Unexpected number of disks: %d", len(filteredDisks)) @@ -509,7 +515,7 @@ func TestDisksWithAllParts(t *testing.T) { partsMetadata[0].ModTime = partsMetadata[0].ModTime.Add(-1 * time.Hour) errs = make([]error, len(erasureDisks)) - filteredDisks, _ = disksWithAllParts(ctx, erasureDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan) + filteredDisks, _ = disksWithAllParts(ctx, erasureDisks, partsMetadata, errs, fi, bucket, object, madmin.HealDeepScan) if len(filteredDisks) != len(erasureDisks) { t.Errorf("Unexpected number of disks: %d", len(filteredDisks)) @@ -529,7 +535,7 @@ func TestDisksWithAllParts(t *testing.T) { partsMetadata[1].DataDir = "foo-random" errs = make([]error, len(erasureDisks)) - filteredDisks, _ = disksWithAllParts(ctx, erasureDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan) + filteredDisks, _ = disksWithAllParts(ctx, erasureDisks, partsMetadata, errs, fi, bucket, object, madmin.HealDeepScan) if len(filteredDisks) != len(erasureDisks) { t.Errorf("Unexpected number of disks: %d", len(filteredDisks)) @@ -565,7 +571,7 @@ func TestDisksWithAllParts(t *testing.T) { } errs = make([]error, len(erasureDisks)) - filteredDisks, errs = disksWithAllParts(ctx, erasureDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan) + filteredDisks, errs = disksWithAllParts(ctx, erasureDisks, partsMetadata, errs, fi, bucket, object, madmin.HealDeepScan) if len(filteredDisks) != len(erasureDisks) { t.Errorf("Unexpected number of disks: %d", len(filteredDisks)) diff --git a/cmd/erasure-healing.go b/cmd/erasure-healing.go index 15c8aa41d..8616c441b 100644 --- a/cmd/erasure-healing.go +++ b/cmd/erasure-healing.go @@ -288,13 +288,20 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s // Re-read when we have lock... partsMetadata, errs := readAllFileInfo(ctx, storageDisks, bucket, object, versionID, true) - if _, err = getLatestFileInfo(ctx, partsMetadata, errs, er.defaultParityCount); err != nil { + if _, err = getLatestFileInfo(ctx, partsMetadata, errs); err != nil { return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, []error{}, opts) } // List of disks having latest version of the object er.meta // (by modtime). - _, modTime := listOnlineDisks(storageDisks, partsMetadata, errs) + onlineDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs) + + // Latest FileInfo for reference. If a valid metadata is not + // present, it is as good as object not found. + latestMeta, err := pickValidFileInfo(ctx, partsMetadata, modTime, result.DataBlocks) + if err != nil { + return result, toObjectErr(err, bucket, object, versionID) + } // List of disks having all parts as per latest metadata. // NOTE: do not pass in latestDisks to diskWithAllParts since @@ -305,15 +312,8 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s // used here for reconstruction. This is done to ensure that // we do not skip drives that have inconsistent metadata to be // skipped from purging when they are stale. - availableDisks, dataErrs := disksWithAllParts(ctx, storageDisks, partsMetadata, - errs, bucket, object, scanMode) - - // Latest FileInfo for reference. If a valid metadata is not - // present, it is as good as object not found. - latestMeta, err := pickValidFileInfo(ctx, partsMetadata, modTime, result.DataBlocks) - if err != nil { - return result, toObjectErr(err, bucket, object, versionID) - } + availableDisks, dataErrs := disksWithAllParts(ctx, onlineDisks, partsMetadata, + errs, latestMeta, bucket, object, scanMode) // Loop to find number of disks with valid data, per-drive // data state and a list of outdated disks on which data needs diff --git a/cmd/erasure-healing_test.go b/cmd/erasure-healing_test.go index ecc82b54f..da91061da 100644 --- a/cmd/erasure-healing_test.go +++ b/cmd/erasure-healing_test.go @@ -416,7 +416,7 @@ func TestHealObjectCorrupted(t *testing.T) { } fileInfos, errs := readAllFileInfo(ctx, erasureDisks, bucket, object, "", false) - fi, err := getLatestFileInfo(ctx, fileInfos, errs, er.defaultParityCount) + fi, err := getLatestFileInfo(ctx, fileInfos, errs) if err != nil { t.Fatalf("Failed to getLatestFileInfo - %v", err) } @@ -441,7 +441,7 @@ func TestHealObjectCorrupted(t *testing.T) { } fileInfos, errs = readAllFileInfo(ctx, erasureDisks, bucket, object, "", false) - nfi, err := getLatestFileInfo(ctx, fileInfos, errs, er.defaultParityCount) + nfi, err := getLatestFileInfo(ctx, fileInfos, errs) if err != nil { t.Fatalf("Failed to getLatestFileInfo - %v", err) } @@ -467,7 +467,7 @@ func TestHealObjectCorrupted(t *testing.T) { } fileInfos, errs = readAllFileInfo(ctx, erasureDisks, bucket, object, "", false) - nfi, err = getLatestFileInfo(ctx, fileInfos, errs, er.defaultParityCount) + nfi, err = getLatestFileInfo(ctx, fileInfos, errs) if err != nil { t.Fatalf("Failed to getLatestFileInfo - %v", err) } diff --git a/cmd/erasure-metadata.go b/cmd/erasure-metadata.go index 30efc0e9e..4efe4b49f 100644 --- a/cmd/erasure-metadata.go +++ b/cmd/erasure-metadata.go @@ -390,15 +390,11 @@ func writeUniqueFileInfo(ctx context.Context, disks []StorageAPI, bucket, prefix // writeQuorum is the min required disks to write data. func objectQuorumFromMeta(ctx context.Context, partsMetaData []FileInfo, errs []error, defaultParityCount int) (objectReadQuorum, objectWriteQuorum int, err error) { // get the latest updated Metadata and a count of all the latest updated FileInfo(s) - latestFileInfo, err := getLatestFileInfo(ctx, partsMetaData, errs, defaultParityCount) + latestFileInfo, err := getLatestFileInfo(ctx, partsMetaData, errs) if err != nil { return 0, 0, err } - if !latestFileInfo.IsValid() { - return 0, 0, errErasureReadQuorum - } - parityBlocks := globalStorageClass.GetParityForSC(latestFileInfo.Metadata[xhttp.AmzStorageClass]) if parityBlocks <= 0 { parityBlocks = defaultParityCount