mirror of
https://github.com/minio/minio.git
synced 2025-01-11 15:03:22 -05:00
Return always a default heal item upon unexpected error (#6556)
Never return an empty result item even upon error, choose all the default values and based on the errors make sure to send right result reply.
This commit is contained in:
parent
274b35154c
commit
223967fd32
@ -158,17 +158,15 @@ func getLatestXLMeta(ctx context.Context, partsMetadata []xlMetaV1, errs []error
|
||||
//
|
||||
// - slice of errors about the state of data files on disk - can have
|
||||
// a not-found error or a hash-mismatch error.
|
||||
//
|
||||
// - non-nil error if any of the disks failed unexpectedly (i.e. error
|
||||
// other than file not found and not a checksum error).
|
||||
func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket,
|
||||
object string) ([]StorageAPI, []error, error) {
|
||||
object string) ([]StorageAPI, []error) {
|
||||
availableDisks := make([]StorageAPI, len(onlineDisks))
|
||||
buffer := []byte{}
|
||||
dataErrs := make([]error, len(onlineDisks))
|
||||
|
||||
for i, onlineDisk := range onlineDisks {
|
||||
if onlineDisk == nil {
|
||||
dataErrs[i] = errDiskNotFound
|
||||
continue
|
||||
}
|
||||
|
||||
@ -196,8 +194,8 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
|
||||
break
|
||||
case hErr != nil:
|
||||
logger.LogIf(ctx, hErr)
|
||||
// abort on unhandled errors
|
||||
return nil, nil, hErr
|
||||
dataErrs[i] = hErr
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
@ -207,5 +205,5 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
|
||||
}
|
||||
}
|
||||
|
||||
return availableDisks, dataErrs, nil
|
||||
return availableDisks, dataErrs
|
||||
}
|
||||
|
@ -258,7 +258,7 @@ func TestListOnlineDisks(t *testing.T) {
|
||||
i+1, test.expectedTime, modTime)
|
||||
}
|
||||
|
||||
availableDisks, newErrs, _ := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object)
|
||||
availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object)
|
||||
test.errs = newErrs
|
||||
|
||||
if test._tamperBackend != noTamper {
|
||||
@ -318,10 +318,7 @@ func TestDisksWithAllParts(t *testing.T) {
|
||||
}
|
||||
|
||||
errs = make([]error, len(xlDisks))
|
||||
filteredDisks, errs, err := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %s", err)
|
||||
}
|
||||
filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
|
||||
|
||||
if len(filteredDisks) != len(xlDisks) {
|
||||
t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
|
||||
@ -353,10 +350,7 @@ func TestDisksWithAllParts(t *testing.T) {
|
||||
t.Fatalf("Failed to read xl meta data %v", err)
|
||||
}
|
||||
|
||||
filteredDisks, errs, err = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %s", err)
|
||||
}
|
||||
filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
|
||||
|
||||
if len(filteredDisks) != len(xlDisks) {
|
||||
t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
|
||||
|
@ -295,7 +295,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
|
||||
// continue to return filled madmin.HealResultItem struct which includes info
|
||||
// on what disks the file is available etc.
|
||||
if reducedErr := reduceReadQuorumErrs(ctx, errs, nil, quorum); reducedErr != nil {
|
||||
return result, toObjectErr(reducedErr, bucket, object)
|
||||
return defaultHealResult(storageDisks, errs, bucket, object), toObjectErr(reducedErr, bucket, object)
|
||||
}
|
||||
}
|
||||
|
||||
@ -304,10 +304,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
|
||||
latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs)
|
||||
|
||||
// List of disks having all parts as per latest xl.json.
|
||||
availableDisks, dataErrs, aErr := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object)
|
||||
if aErr != nil {
|
||||
return result, toObjectErr(aErr, bucket, object)
|
||||
}
|
||||
availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object)
|
||||
|
||||
// Initialize heal result object
|
||||
result = madmin.HealResultItem{
|
||||
@ -338,7 +335,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
|
||||
result.ObjectSize = partsMetadata[i].Stat.Size
|
||||
result.ParityBlocks = partsMetadata[i].Erasure.ParityBlocks
|
||||
result.DataBlocks = partsMetadata[i].Erasure.DataBlocks
|
||||
case errs[i] == errDiskNotFound:
|
||||
case errs[i] == errDiskNotFound, dataErrs[i] == errDiskNotFound:
|
||||
driveState = madmin.DriveStateOffline
|
||||
case errs[i] == errFileNotFound, errs[i] == errVolumeNotFound:
|
||||
fallthrough
|
||||
@ -388,6 +385,10 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
|
||||
// If less than read quorum number of disks have all the parts
|
||||
// of the data, we can't reconstruct the erasure-coded data.
|
||||
if numAvailableDisks < quorum {
|
||||
// Default to most common configuration for erasure
|
||||
// blocks upon returning quorum error.
|
||||
result.ParityBlocks = len(storageDisks) / 2
|
||||
result.DataBlocks = len(storageDisks) / 2
|
||||
return result, toObjectErr(errXLReadQuorum, bucket, object)
|
||||
}
|
||||
|
||||
@ -512,7 +513,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
|
||||
}
|
||||
|
||||
// Generate and write `xl.json` generated from other disks.
|
||||
outDatedDisks, aErr = writeUniqueXLMetadata(ctx, outDatedDisks, minioMetaTmpBucket, tmpID,
|
||||
outDatedDisks, aErr := writeUniqueXLMetadata(ctx, outDatedDisks, minioMetaTmpBucket, tmpID,
|
||||
partsMetadata, diskCount(outDatedDisks))
|
||||
if aErr != nil {
|
||||
return result, toObjectErr(aErr, bucket, object)
|
||||
@ -602,6 +603,58 @@ func (xl xlObjects) healObjectDir(ctx context.Context, bucket, object string, dr
|
||||
return hr, nil
|
||||
}
|
||||
|
||||
// Populates default heal result item entries with possible values when we are returning prematurely.
|
||||
// This is to ensure that in any circumstance we are not returning empty arrays with wrong values.
|
||||
func defaultHealResult(storageDisks []StorageAPI, errs []error, bucket, object string) madmin.HealResultItem {
|
||||
// Initialize heal result object
|
||||
result := madmin.HealResultItem{
|
||||
Type: madmin.HealItemObject,
|
||||
Bucket: bucket,
|
||||
Object: object,
|
||||
DiskCount: len(storageDisks),
|
||||
|
||||
// Initialize object size to -1, so we can detect if we are
|
||||
// unable to reliably find the object size.
|
||||
ObjectSize: -1,
|
||||
}
|
||||
|
||||
for index, disk := range storageDisks {
|
||||
if disk == nil {
|
||||
result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{
|
||||
UUID: "",
|
||||
State: madmin.DriveStateOffline,
|
||||
})
|
||||
result.After.Drives = append(result.After.Drives, madmin.HealDriveInfo{
|
||||
UUID: "",
|
||||
State: madmin.DriveStateOffline,
|
||||
})
|
||||
continue
|
||||
}
|
||||
drive := disk.String()
|
||||
driveState := madmin.DriveStateCorrupt
|
||||
switch errs[index] {
|
||||
case errFileNotFound, errVolumeNotFound:
|
||||
driveState = madmin.DriveStateMissing
|
||||
}
|
||||
result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{
|
||||
UUID: "",
|
||||
Endpoint: drive,
|
||||
State: driveState,
|
||||
})
|
||||
result.After.Drives = append(result.After.Drives, madmin.HealDriveInfo{
|
||||
UUID: "",
|
||||
Endpoint: drive,
|
||||
State: driveState,
|
||||
})
|
||||
}
|
||||
|
||||
// Default to most common configuration for erasure blocks.
|
||||
result.ParityBlocks = len(storageDisks) / 2
|
||||
result.DataBlocks = len(storageDisks) / 2
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// HealObject - heal the given object.
|
||||
//
|
||||
// FIXME: If an object object was deleted and one disk was down,
|
||||
@ -624,19 +677,21 @@ func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRu
|
||||
return xl.healObjectDir(healCtx, bucket, object, dryRun)
|
||||
}
|
||||
|
||||
storageDisks := xl.getDisks()
|
||||
|
||||
// FIXME: Metadata is read again in the healObject() call below.
|
||||
// Read metadata files from all the disks
|
||||
partsMetadata, errs := readAllXLMetadata(healCtx, xl.getDisks(), bucket, object)
|
||||
partsMetadata, errs := readAllXLMetadata(healCtx, storageDisks, bucket, object)
|
||||
|
||||
latestXLMeta, err := getLatestXLMeta(healCtx, partsMetadata, errs)
|
||||
if err != nil {
|
||||
return hr, toObjectErr(err, bucket, object)
|
||||
return defaultHealResult(storageDisks, errs, bucket, object), toObjectErr(err, bucket, object)
|
||||
}
|
||||
|
||||
// Lock the object before healing.
|
||||
objectLock := xl.nsMutex.NewNSLock(bucket, object)
|
||||
if lerr := objectLock.GetRLock(globalHealingTimeout); lerr != nil {
|
||||
return hr, lerr
|
||||
return defaultHealResult(storageDisks, errs, bucket, object), lerr
|
||||
}
|
||||
defer objectLock.RUnlock()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user