add necessary protection err, fileInfo slice reads and writes (#18854)

protection was in place. However, it covered only some
areas, so we re-arranged the code to ensure we could hold
locks properly.

Along with this, remove the DataShardFix code altogether,
in deployments with many drive replacements, this can affect
and lead to quorum loss.
This commit is contained in:
Harshavardhana
2024-01-24 01:08:23 -08:00
committed by GitHub
parent 152023e837
commit 708cebe7f0
7 changed files with 57 additions and 339 deletions

View File

@@ -69,8 +69,6 @@ import (
// serverDebugLog will enable debug printing
var serverDebugLog = env.Get("_MINIO_SERVER_DEBUG", config.EnableOff) == config.EnableOn
var shardDiskTimeDelta time.Duration
func init() {
if runtime.GOOS == "windows" {
if mousetrap.StartedByExplorer() {
@@ -107,12 +105,6 @@ func init() {
gob.Register(madmin.XFSErrorConfigs{})
gob.Register(map[string]interface{}{})
var err error
shardDiskTimeDelta, err = time.ParseDuration(env.Get("_MINIO_SHARD_DISKTIME_DELTA", "1m"))
if err != nil {
shardDiskTimeDelta = 1 * time.Minute
}
// All minio-go and madmin-go API operations shall be performed only once,
// another way to look at this is we are turning off retries.
minio.MaxRetry = 1

View File

@@ -191,19 +191,6 @@ func filterOnlineDisksInplace(fi FileInfo, partsMetadata []FileInfo, onlineDisks
}
}
// Extracts list of disk mtimes from FileInfo slice and returns, skips
// slice elements that have errors.
func listObjectDiskMtimes(partsMetadata []FileInfo) (diskMTimes []time.Time) {
diskMTimes = bootModtimes(len(partsMetadata))
for index, metadata := range partsMetadata {
if metadata.IsValid() {
// Once the file is found, save the disk mtime saved on disk.
diskMTimes[index] = metadata.DiskMTime
}
}
return diskMTimes
}
// Notes:
// There are 5 possible states a disk could be in,
// 1. __online__ - has the latest copy of xl.meta - returned by listOnlineDisks
@@ -277,13 +264,6 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
errs []error, latestMeta FileInfo, bucket, object string,
scanMode madmin.HealScanMode) ([]StorageAPI, []error, time.Time,
) {
var diskMTime time.Time
var shardFix bool
if !latestMeta.DataShardFixed() {
diskMTime = pickValidDiskTimeWithQuorum(partsMetadata,
latestMeta.Erasure.DataBlocks)
}
availableDisks := make([]StorageAPI, len(onlineDisks))
dataErrs := make([]error, len(onlineDisks))
@@ -351,23 +331,6 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
}
}
if !diskMTime.Equal(timeSentinel) && !diskMTime.IsZero() {
if !partsMetadata[i].AcceptableDelta(diskMTime, shardDiskTimeDelta) {
// not with in acceptable delta, skip.
// If disk mTime mismatches it is considered outdated
// https://github.com/minio/minio/pull/13803
//
// This check only is active if we could find maximally
// occurring disk mtimes that are somewhat same across
// the quorum. Allowing to skip those shards which we
// might think are wrong.
shardFix = true
partsMetadata[i] = FileInfo{}
dataErrs[i] = errFileCorrupt
continue
}
}
// Always check data, if we got it.
if (len(meta.Data) > 0 || meta.Size == 0) && len(meta.Parts) > 0 {
checksumInfo := meta.Erasure.GetChecksumInfo(meta.Parts[0].Number)
@@ -410,10 +373,5 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
}
}
if shardFix {
// Only when shard is fixed return an appropriate disk mtime value.
return availableDisks, dataErrs, diskMTime
} // else return timeSentinel for disk mtime
return availableDisks, dataErrs, timeSentinel
}

View File

@@ -33,8 +33,6 @@ import (
"github.com/minio/pkg/v2/sync/errgroup"
)
const reservedMetadataPrefixLowerDataShardFix = ReservedMetadataPrefixLower + "data-shard-fix"
//go:generate stringer -type=healingMetric -trimprefix=healingMetric $GOFILE
type healingMetric uint8
@@ -45,29 +43,6 @@ const (
healingMetricCheckAbandonedParts
)
// AcceptableDelta returns 'true' if the fi.DiskMTime is under
// acceptable delta of "delta" duration with maxTime.
//
// This code is primarily used for heuristic detection of
// incorrect shards, as per https://github.com/minio/minio/pull/13803
//
// This check only is active if we could find maximally
// occurring disk mtimes that are somewhat same across
// the quorum. Allowing to skip those shards which we
// might think are wrong.
func (fi FileInfo) AcceptableDelta(maxTime time.Time, delta time.Duration) bool {
diff := maxTime.Sub(fi.DiskMTime)
if diff < 0 {
diff = -diff
}
return diff < delta
}
// DataShardFixed - data shard fixed?
func (fi FileInfo) DataShardFixed() bool {
return fi.Metadata[reservedMetadataPrefixLowerDataShardFix] == "true"
}
func (er erasureObjects) listAndHeal(bucket, prefix string, scanMode madmin.HealScanMode, healEntry func(string, metaCacheEntry, madmin.HealScanMode) error) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
@@ -311,7 +286,7 @@ func (er *erasureObjects) healObject(ctx context.Context, bucket string, object
// used here for reconstruction. This is done to ensure that
// we do not skip drives that have inconsistent metadata to be
// skipped from purging when they are stale.
availableDisks, dataErrs, diskMTime := disksWithAllParts(ctx, onlineDisks, partsMetadata,
availableDisks, dataErrs, _ := disksWithAllParts(ctx, onlineDisks, partsMetadata,
errs, latestMeta, bucket, object, scanMode)
var erasure Erasure
@@ -627,20 +602,6 @@ func (er *erasureObjects) healObject(ctx context.Context, bucket string, object
}
}
if !diskMTime.Equal(timeSentinel) && !diskMTime.IsZero() {
// Update metadata to indicate special fix.
_, err = er.PutObjectMetadata(ctx, bucket, object, ObjectOptions{
NoLock: true,
UserDefined: map[string]string{
reservedMetadataPrefixLowerDataShardFix: "true",
// another reserved metadata to capture original disk-mtime
// captured for this version of the object, to be used
// possibly in future to heal other versions if possible.
ReservedMetadataPrefixLower + "disk-mtime": diskMTime.String(),
},
})
}
return result, nil
}
@@ -919,21 +880,25 @@ func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (valid
// or when xl.meta is not readable in read quorum disks.
danglingErrsCount := func(cerrs []error) (int, int, int) {
var (
notFoundCount int
corruptedCount int
diskNotFoundCount int
notFoundCount int
corruptedCount int
driveNotFoundCount int
)
for _, readErr := range cerrs {
if readErr == nil {
continue
}
switch {
case errors.Is(readErr, errFileNotFound) || errors.Is(readErr, errFileVersionNotFound):
notFoundCount++
case errors.Is(readErr, errFileCorrupt):
corruptedCount++
case errors.Is(readErr, errDiskNotFound):
diskNotFoundCount++
default:
// All other errors are non-actionable
driveNotFoundCount++
}
}
return notFoundCount, corruptedCount, diskNotFoundCount
return notFoundCount, corruptedCount, driveNotFoundCount
}
ndataErrs := make([]error, len(dataErrs))

View File

@@ -374,17 +374,6 @@ func findFileInfoInQuorum(ctx context.Context, metaArr []FileInfo, modTime time.
return FileInfo{}, errErasureReadQuorum
}
func pickValidDiskTimeWithQuorum(metaArr []FileInfo, quorum int) time.Time {
diskMTimes := listObjectDiskMtimes(metaArr)
diskMTime, diskMaxima := commonTimeAndOccurence(diskMTimes, shardDiskTimeDelta)
if diskMaxima >= quorum {
return diskMTime
}
return timeSentinel
}
// pickValidFileInfo - picks one valid FileInfo content and returns from a
// slice of FileInfo.
func pickValidFileInfo(ctx context.Context, metaArr []FileInfo, modTime time.Time, etag string, quorum int) (FileInfo, error) {

View File

@@ -227,30 +227,6 @@ func (er erasureObjects) GetObjectNInfo(ctx context.Context, bucket, object stri
return nil, toObjectErr(err, bucket, object)
}
if !fi.DataShardFixed() {
diskMTime := pickValidDiskTimeWithQuorum(metaArr, fi.Erasure.DataBlocks)
if !diskMTime.Equal(timeSentinel) && !diskMTime.IsZero() {
for index := range onlineDisks {
if onlineDisks[index] == OfflineDisk {
continue
}
if !metaArr[index].IsValid() {
continue
}
if !metaArr[index].AcceptableDelta(diskMTime, shardDiskTimeDelta) {
// If disk mTime mismatches it is considered outdated
// https://github.com/minio/minio/pull/13803
//
// This check only is active if we could find maximally
// occurring disk mtimes that are somewhat same across
// the quorum. Allowing to skip those shards which we
// might think are wrong.
onlineDisks[index] = OfflineDisk
}
}
}
}
objInfo := fi.ToObjectInfo(bucket, object, opts.Versioned || opts.VersionSuspended)
if objInfo.DeleteMarker {
if opts.VersionID == "" {
@@ -511,7 +487,7 @@ func (er erasureObjects) deleteIfDangling(ctx context.Context, bucket, object st
// count the number of offline disks
offline := 0
for i := 0; i < max(len(errs), len(dataErrs)); i++ {
if i < len(errs) && errs[i] == errDiskNotFound || i < len(dataErrs) && dataErrs[i] == errDiskNotFound {
if i < len(errs) && errors.Is(errs[i], errDiskNotFound) || i < len(dataErrs) && errors.Is(dataErrs[i], errDiskNotFound) {
offline++
}
}
@@ -768,8 +744,6 @@ func readAllXL(ctx context.Context, disks []StorageAPI, bucket, object string, r
}
func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object string, opts ObjectOptions, readData bool) (FileInfo, []FileInfo, []StorageAPI, error) {
var mu sync.Mutex
rawArr := make([]RawFileInfo, er.setDriveCount)
metaArr := make([]FileInfo, er.setDriveCount)
errs := make([]error, er.setDriveCount)
@@ -780,14 +754,16 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s
done := make(chan bool, er.setDriveCount)
disks := er.getDisks()
mrfCheck := make(chan FileInfo)
defer close(mrfCheck)
ropts := ReadOptions{
ReadData: readData,
Healing: false,
}
mrfCheck := make(chan FileInfo)
defer close(mrfCheck)
var rw sync.Mutex
// Ask for all disks first;
go func() {
ctx, cancel := context.WithCancel(ctx)
@@ -799,31 +775,36 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s
done <- false
continue
}
if !disk.IsOnline() {
done <- false
continue
}
wg.Add(1)
go func(i int, disk StorageAPI) {
defer wg.Done()
var fi FileInfo
var err error
var (
fi FileInfo
rfi RawFileInfo
err error
)
if opts.VersionID != "" {
// Read a specific version ID
fi, err = readFileInfo(ctx, disk, bucket, object, opts.VersionID, ropts)
mu.Lock()
metaArr[i], errs[i] = fi, err
mu.Unlock()
} else {
// Read the latest version
var ri RawFileInfo
ri, err = readRawFileInfo(ctx, disk, bucket, object, readData)
mu.Lock()
rawArr[i], errs[i] = ri, err
mu.Unlock()
rfi, err = readRawFileInfo(ctx, disk, bucket, object, readData)
if err == nil {
fi, err = fileInfoFromRaw(ri, bucket, object, readData, opts.InclFreeVersions, true)
mu.Lock()
metaArr[i], errs[i] = fi, err
mu.Unlock()
fi, err = fileInfoFromRaw(rfi, bucket, object, readData, opts.InclFreeVersions, true)
}
}
rw.Lock()
rawArr[i] = rfi
metaArr[i], errs[i] = fi, err
rw.Unlock()
done <- err == nil
}(i, disk)
}
@@ -835,20 +816,24 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s
if !ok {
return
}
if fi.Deleted {
return
}
// if one of the disk is offline, return right here no need
// to attempt a heal on the object.
if countErrs(errs, errDiskNotFound) > 0 {
return
}
var missingBlocks int
for i := range errs {
if errors.Is(errs[i], errFileNotFound) {
missingBlocks++
}
}
// if missing metadata can be reconstructed, attempt to reconstruct.
// additionally do not heal delete markers inline, let them be
// healed upon regular heal process.
@@ -879,13 +864,12 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s
minDisks = er.setDriveCount - er.defaultParityCount
}
calcQuorum := func() (FileInfo, []FileInfo, []StorageAPI, time.Time, string, error) {
calcQuorum := func(metaArr []FileInfo, errs []error) (FileInfo, []FileInfo, []StorageAPI, time.Time, string, error) {
readQuorum, _, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount)
if err != nil {
return FileInfo{}, nil, nil, time.Time{}, "", err
}
err = reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, readQuorum)
if err != nil {
if err := reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, readQuorum); err != nil {
return FileInfo{}, nil, nil, time.Time{}, "", err
}
onlineDisks, modTime, etag := listOnlineDisks(disks, metaArr, errs, readQuorum)
@@ -895,7 +879,11 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s
}
onlineMeta := make([]FileInfo, len(metaArr))
copy(onlineMeta, metaArr)
for i, disk := range onlineDisks {
if disk != nil {
onlineMeta[i] = metaArr[i]
}
}
return fi, onlineMeta, onlineDisks, modTime, etag, nil
}
@@ -922,20 +910,24 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s
continue
}
}
rw.Lock()
if opts.VersionID == "" && totalResp == er.setDriveCount {
// Disks cannot agree about the latest version, pass this to a more advanced code
metaArr, errs = pickLatestQuorumFilesInfo(ctx, rawArr, errs, bucket, object, readData, opts.InclFreeVersions, true)
fi, onlineMeta, onlineDisks, modTime, etag, err = calcQuorum(pickLatestQuorumFilesInfo(ctx,
rawArr, errs, bucket, object, readData, opts.InclFreeVersions, true))
} else {
fi, onlineMeta, onlineDisks, modTime, etag, err = calcQuorum(metaArr, errs)
}
mu.Lock()
fi, onlineMeta, onlineDisks, modTime, etag, err = calcQuorum()
mu.Unlock()
rw.Unlock()
if err == nil && fi.InlineData() {
break
}
}
if err != nil {
if shouldCheckForDangling(err, errs, bucket) {
// We can only look for dangling if we received all the responses, if we did
// not we simply ignore it, since we can't tell for sure if its dangling object.
if totalResp == er.setDriveCount && shouldCheckForDangling(err, errs, bucket) {
_, derr := er.deleteIfDangling(context.Background(), bucket, object, metaArr, errs, nil, opts)
if derr != nil {
err = derr