Return always a default heal item upon unexpected error (#6556)

Never return an empty result item even upon error,
choose all the default values and based on the errors
make sure to send right result reply.
This commit is contained in:
Harshavardhana 2018-10-02 17:13:51 -07:00 committed by Dee Koder
parent 274b35154c
commit 223967fd32
3 changed files with 73 additions and 26 deletions

View File

@ -158,17 +158,15 @@ func getLatestXLMeta(ctx context.Context, partsMetadata []xlMetaV1, errs []error
// //
// - slice of errors about the state of data files on disk - can have // - slice of errors about the state of data files on disk - can have
// a not-found error or a hash-mismatch error. // a not-found error or a hash-mismatch error.
//
// - non-nil error if any of the disks failed unexpectedly (i.e. error
// other than file not found and not a checksum error).
func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket, func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket,
object string) ([]StorageAPI, []error, error) { object string) ([]StorageAPI, []error) {
availableDisks := make([]StorageAPI, len(onlineDisks)) availableDisks := make([]StorageAPI, len(onlineDisks))
buffer := []byte{} buffer := []byte{}
dataErrs := make([]error, len(onlineDisks)) dataErrs := make([]error, len(onlineDisks))
for i, onlineDisk := range onlineDisks { for i, onlineDisk := range onlineDisks {
if onlineDisk == nil { if onlineDisk == nil {
dataErrs[i] = errDiskNotFound
continue continue
} }
@ -196,8 +194,8 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
break break
case hErr != nil: case hErr != nil:
logger.LogIf(ctx, hErr) logger.LogIf(ctx, hErr)
// abort on unhandled errors dataErrs[i] = hErr
return nil, nil, hErr break
} }
} }
@ -207,5 +205,5 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
} }
} }
return availableDisks, dataErrs, nil return availableDisks, dataErrs
} }

View File

@ -258,7 +258,7 @@ func TestListOnlineDisks(t *testing.T) {
i+1, test.expectedTime, modTime) i+1, test.expectedTime, modTime)
} }
availableDisks, newErrs, _ := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object) availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object)
test.errs = newErrs test.errs = newErrs
if test._tamperBackend != noTamper { if test._tamperBackend != noTamper {
@ -318,10 +318,7 @@ func TestDisksWithAllParts(t *testing.T) {
} }
errs = make([]error, len(xlDisks)) errs = make([]error, len(xlDisks))
filteredDisks, errs, err := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object) filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
if err != nil {
t.Errorf("Unexpected error: %s", err)
}
if len(filteredDisks) != len(xlDisks) { if len(filteredDisks) != len(xlDisks) {
t.Errorf("Unexpected number of disks: %d", len(filteredDisks)) t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
@ -353,10 +350,7 @@ func TestDisksWithAllParts(t *testing.T) {
t.Fatalf("Failed to read xl meta data %v", err) t.Fatalf("Failed to read xl meta data %v", err)
} }
filteredDisks, errs, err = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object) filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
if err != nil {
t.Errorf("Unexpected error: %s", err)
}
if len(filteredDisks) != len(xlDisks) { if len(filteredDisks) != len(xlDisks) {
t.Errorf("Unexpected number of disks: %d", len(filteredDisks)) t.Errorf("Unexpected number of disks: %d", len(filteredDisks))

View File

@ -295,7 +295,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
// continue to return filled madmin.HealResultItem struct which includes info // continue to return filled madmin.HealResultItem struct which includes info
// on what disks the file is available etc. // on what disks the file is available etc.
if reducedErr := reduceReadQuorumErrs(ctx, errs, nil, quorum); reducedErr != nil { if reducedErr := reduceReadQuorumErrs(ctx, errs, nil, quorum); reducedErr != nil {
return result, toObjectErr(reducedErr, bucket, object) return defaultHealResult(storageDisks, errs, bucket, object), toObjectErr(reducedErr, bucket, object)
} }
} }
@ -304,10 +304,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs) latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs)
// List of disks having all parts as per latest xl.json. // List of disks having all parts as per latest xl.json.
availableDisks, dataErrs, aErr := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object) availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object)
if aErr != nil {
return result, toObjectErr(aErr, bucket, object)
}
// Initialize heal result object // Initialize heal result object
result = madmin.HealResultItem{ result = madmin.HealResultItem{
@ -338,7 +335,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
result.ObjectSize = partsMetadata[i].Stat.Size result.ObjectSize = partsMetadata[i].Stat.Size
result.ParityBlocks = partsMetadata[i].Erasure.ParityBlocks result.ParityBlocks = partsMetadata[i].Erasure.ParityBlocks
result.DataBlocks = partsMetadata[i].Erasure.DataBlocks result.DataBlocks = partsMetadata[i].Erasure.DataBlocks
case errs[i] == errDiskNotFound: case errs[i] == errDiskNotFound, dataErrs[i] == errDiskNotFound:
driveState = madmin.DriveStateOffline driveState = madmin.DriveStateOffline
case errs[i] == errFileNotFound, errs[i] == errVolumeNotFound: case errs[i] == errFileNotFound, errs[i] == errVolumeNotFound:
fallthrough fallthrough
@ -388,6 +385,10 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
// If less than read quorum number of disks have all the parts // If less than read quorum number of disks have all the parts
// of the data, we can't reconstruct the erasure-coded data. // of the data, we can't reconstruct the erasure-coded data.
if numAvailableDisks < quorum { if numAvailableDisks < quorum {
// Default to most common configuration for erasure
// blocks upon returning quorum error.
result.ParityBlocks = len(storageDisks) / 2
result.DataBlocks = len(storageDisks) / 2
return result, toObjectErr(errXLReadQuorum, bucket, object) return result, toObjectErr(errXLReadQuorum, bucket, object)
} }
@ -512,7 +513,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
} }
// Generate and write `xl.json` generated from other disks. // Generate and write `xl.json` generated from other disks.
outDatedDisks, aErr = writeUniqueXLMetadata(ctx, outDatedDisks, minioMetaTmpBucket, tmpID, outDatedDisks, aErr := writeUniqueXLMetadata(ctx, outDatedDisks, minioMetaTmpBucket, tmpID,
partsMetadata, diskCount(outDatedDisks)) partsMetadata, diskCount(outDatedDisks))
if aErr != nil { if aErr != nil {
return result, toObjectErr(aErr, bucket, object) return result, toObjectErr(aErr, bucket, object)
@ -602,6 +603,58 @@ func (xl xlObjects) healObjectDir(ctx context.Context, bucket, object string, dr
return hr, nil return hr, nil
} }
// Populates default heal result item entries with possible values when we are returning prematurely.
// This is to ensure that in any circumstance we are not returning empty arrays with wrong values.
func defaultHealResult(storageDisks []StorageAPI, errs []error, bucket, object string) madmin.HealResultItem {
// Initialize heal result object
result := madmin.HealResultItem{
Type: madmin.HealItemObject,
Bucket: bucket,
Object: object,
DiskCount: len(storageDisks),
// Initialize object size to -1, so we can detect if we are
// unable to reliably find the object size.
ObjectSize: -1,
}
for index, disk := range storageDisks {
if disk == nil {
result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{
UUID: "",
State: madmin.DriveStateOffline,
})
result.After.Drives = append(result.After.Drives, madmin.HealDriveInfo{
UUID: "",
State: madmin.DriveStateOffline,
})
continue
}
drive := disk.String()
driveState := madmin.DriveStateCorrupt
switch errs[index] {
case errFileNotFound, errVolumeNotFound:
driveState = madmin.DriveStateMissing
}
result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{
UUID: "",
Endpoint: drive,
State: driveState,
})
result.After.Drives = append(result.After.Drives, madmin.HealDriveInfo{
UUID: "",
Endpoint: drive,
State: driveState,
})
}
// Default to most common configuration for erasure blocks.
result.ParityBlocks = len(storageDisks) / 2
result.DataBlocks = len(storageDisks) / 2
return result
}
// HealObject - heal the given object. // HealObject - heal the given object.
// //
// FIXME: If an object object was deleted and one disk was down, // FIXME: If an object object was deleted and one disk was down,
@ -624,19 +677,21 @@ func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRu
return xl.healObjectDir(healCtx, bucket, object, dryRun) return xl.healObjectDir(healCtx, bucket, object, dryRun)
} }
storageDisks := xl.getDisks()
// FIXME: Metadata is read again in the healObject() call below. // FIXME: Metadata is read again in the healObject() call below.
// Read metadata files from all the disks // Read metadata files from all the disks
partsMetadata, errs := readAllXLMetadata(healCtx, xl.getDisks(), bucket, object) partsMetadata, errs := readAllXLMetadata(healCtx, storageDisks, bucket, object)
latestXLMeta, err := getLatestXLMeta(healCtx, partsMetadata, errs) latestXLMeta, err := getLatestXLMeta(healCtx, partsMetadata, errs)
if err != nil { if err != nil {
return hr, toObjectErr(err, bucket, object) return defaultHealResult(storageDisks, errs, bucket, object), toObjectErr(err, bucket, object)
} }
// Lock the object before healing. // Lock the object before healing.
objectLock := xl.nsMutex.NewNSLock(bucket, object) objectLock := xl.nsMutex.NewNSLock(bucket, object)
if lerr := objectLock.GetRLock(globalHealingTimeout); lerr != nil { if lerr := objectLock.GetRLock(globalHealingTimeout); lerr != nil {
return hr, lerr return defaultHealResult(storageDisks, errs, bucket, object), lerr
} }
defer objectLock.RUnlock() defer objectLock.RUnlock()