fix: ensure proper usage of DataDir (#12300)

- GetObject() should always use a common dataDir to
  read from when it starts reading, this allows the
  code in erasure decoding to have sane expectations.

- Healing should always heal on the common dataDir, this
  allows the code in dangling object detection to purge
  dangling content.

These both situations can happen under certain types of
retries during PUT when server is restarting etc, some
namespace entries might be left over.
This commit is contained in:
Harshavardhana
2021-05-14 16:50:47 -07:00
committed by GitHub
parent 5b18c57a54
commit d84261aa6d
7 changed files with 49 additions and 31 deletions

View File

@@ -28,7 +28,6 @@ import (
"github.com/minio/madmin-go"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/bucket/lifecycle"
"github.com/minio/minio/pkg/sync/errgroup"
)
@@ -267,10 +266,27 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
// List of disks having latest version of the object er.meta
// (by modtime).
latestDisks, modTime, dataDir := listOnlineDisks(storageDisks, partsMetadata, errs)
_, modTime, dataDir := listOnlineDisks(storageDisks, partsMetadata, errs)
// make sure all parts metadata dataDir is same as returned by listOnlineDisks()
// the reason is its possible that some of the disks might have stale data, for those
// we simply override them with maximally occurring 'dataDir' - this ensures that
// disksWithAllParts() verifies same dataDir across all drives.
for i := range partsMetadata {
partsMetadata[i].DataDir = dataDir
}
// List of disks having all parts as per latest metadata.
// NOTE: do not pass in latestDisks to diskWithAllParts since
// the diskWithAllParts needs to reach the drive to ensure
// validity of the metadata content, we should make sure that
// we pass in disks as is for it to be verified. Once verified
// the disksWithAllParts() returns the actual disks that can be
// used here for reconstruction. This is done to ensure that
// we do not skip drives that have inconsistent metadata to be
// skipped from purging when they are stale.
availableDisks, dataErrs := disksWithAllParts(ctx, storageDisks, partsMetadata, errs, bucket, object, scanMode)
// List of disks having all parts as per latest er.meta.
availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object, scanMode)
// Loop to find number of disks with valid data, per-drive
// data state and a list of outdated disks on which data needs
// to be healed.
@@ -336,6 +352,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
// File is fully gone, fileInfo is empty.
return defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs, bucket, object, versionID, er.defaultParityCount), err
}
// If less than read quorum number of disks have all the parts
// of the data, we can't reconstruct the erasure-coded data.
if numAvailableDisks < result.DataBlocks {
@@ -405,7 +422,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
result.ParityBlocks = latestMeta.Erasure.ParityBlocks
// Reorder so that we have data disks first and parity disks next.
latestDisks = shuffleDisks(availableDisks, latestMeta.Erasure.Distribution)
latestDisks := shuffleDisks(availableDisks, latestMeta.Erasure.Distribution)
outDatedDisks = shuffleDisks(outDatedDisks, latestMeta.Erasure.Distribution)
partsMetadata = shufflePartsMetadata(partsMetadata, latestMeta.Erasure.Distribution)
copyPartsMetadata = shufflePartsMetadata(copyPartsMetadata, latestMeta.Erasure.Distribution)
@@ -823,7 +840,7 @@ func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (valid
break
}
if validMeta.Deleted || validMeta.TransitionStatus == lifecycle.TransitionComplete {
if validMeta.Deleted || validMeta.IsRemote() {
// notFoundParts is ignored since a
// - delete marker does not have any parts
// - transition status of complete has no parts