heal: Report correctly in multip-pools setup (#15117)

`mc admin heal -r <alias>` in a multi setup pools returns incorrectly
grey objects. The reason is that erasure-server-pools.HealObject() runs
HealObject in all pools and returns the result of the first nil
error. However, in the lower erasureObject level, HealObject() returns
nil if an object does not exist + missing error in each disk of the object
in that pool, therefore confusing mc.

Make erasureObject.HealObject() to return not found error in the lower
level, so at least erasureServerPools will know what pools to ignore.
This commit is contained in:
Anis Elleuch 2022-06-20 16:07:45 +01:00 committed by GitHub
parent ce6c23a360
commit 73733a8fb9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 137 additions and 119 deletions

View File

@ -306,9 +306,13 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
// Re-read when we have lock...
partsMetadata, errs := readAllFileInfo(ctx, storageDisks, bucket, object, versionID, true)
if isAllNotFound(errs) {
err := errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
// Nothing to do, file is already gone.
return er.defaultHealResult(FileInfo{}, storageDisks, storageEndpoints,
errs, bucket, object, versionID), nil
errs, bucket, object, versionID), err
}
readQuorum, _, err := objectQuorumFromMeta(ctx, partsMetadata, errs, er.defaultParityCount)
@ -327,7 +331,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
// present, it is as good as object not found.
latestMeta, err := pickValidFileInfo(ctx, partsMetadata, modTime, readQuorum)
if err != nil {
return result, toObjectErr(err, bucket, object, versionID)
return result, err
}
// List of disks having all parts as per latest metadata.
@ -397,8 +401,12 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
if isAllNotFound(errs) {
// File is fully gone, fileInfo is empty.
err := errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
return er.defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs,
bucket, object, versionID), nil
bucket, object, versionID), err
}
// If less than read quorum number of disks have all the parts
@ -485,7 +493,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
erasure, err := NewErasure(ctx, latestMeta.Erasure.DataBlocks,
latestMeta.Erasure.ParityBlocks, latestMeta.Erasure.BlockSize)
if err != nil {
return result, toObjectErr(err, bucket, object)
return result, err
}
erasureInfo := latestMeta.Erasure
@ -523,7 +531,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
closeBitrotReaders(readers)
closeBitrotWriters(writers)
if err != nil {
return result, toObjectErr(err, bucket, object)
return result, err
}
// outDatedDisks that had write errors should not be
@ -578,7 +586,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
// Attempt a rename now from healed data to final location.
if err = disk.RenameData(ctx, minioMetaTmpBucket, tmpID, partsMetadata[i], bucket, object); err != nil {
logger.LogIf(ctx, err)
return result, toObjectErr(err, bucket, object)
return result, err
}
// Remove any remaining parts from outdated disks from before transition.
@ -674,10 +682,16 @@ func (er erasureObjects) healObjectDir(ctx context.Context, bucket, object strin
hr.After.Drives[i] = madmin.HealDriveInfo{Endpoint: drive, State: madmin.DriveStateCorrupt}
}
}
if dryRun || danglingObject || isAllNotFound(errs) {
if danglingObject || isAllNotFound(errs) {
// Nothing to do, file is already gone.
return hr, errFileNotFound
}
if dryRun {
// Quit without try to heal the object dir
return hr, nil
}
for i, err := range errs {
if err == errVolumeNotFound || err == errFileNotFound {
// Bucket or prefix/directory not found
@ -837,8 +851,13 @@ func (er erasureObjects) purgeObjectDangling(ctx context.Context, bucket, object
// Dangling object successfully purged, size is '0'
m.Size = 0
}
// Generate file/version not found with default heal result
err = errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
return er.defaultHealResult(m, storageDisks, storageEndpoints,
errs, bucket, object, versionID), nil
errs, bucket, object, versionID), err
}
// Object is considered dangling/corrupted if any only
@ -908,12 +927,6 @@ func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (valid
// HealObject - heal the given object, automatically deletes the object if stale/corrupted if `remove` is true.
func (er erasureObjects) HealObject(ctx context.Context, bucket, object, versionID string, opts madmin.HealOpts) (hr madmin.HealResultItem, err error) {
defer func() {
if isErrObjectNotFound(err) || isErrVersionNotFound(err) {
err = nil
}
}()
// Create context that also contains information about the object and bucket.
// The top level handler might not have this information.
reqInfo := logger.GetReqInfo(ctx)
@ -927,7 +940,8 @@ func (er erasureObjects) HealObject(ctx context.Context, bucket, object, version
// Healing directories handle it separately.
if HasSuffix(object, SlashSeparator) {
return er.healObjectDir(healCtx, bucket, object, opts.DryRun, opts.Remove)
hr, err := er.healObjectDir(healCtx, bucket, object, opts.DryRun, opts.Remove)
return hr, toObjectErr(err, bucket, object)
}
storageDisks := er.getDisks()
@ -942,9 +956,13 @@ func (er erasureObjects) HealObject(ctx context.Context, bucket, object, version
// This allows to quickly check if all is ok or all are missing.
_, errs := readAllFileInfo(healCtx, storageDisks, bucket, object, versionID, false)
if isAllNotFound(errs) {
err := errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
// Nothing to do, file is already gone.
return er.defaultHealResult(FileInfo{}, storageDisks, storageEndpoints,
errs, bucket, object, versionID), nil
errs, bucket, object, versionID), toObjectErr(err, bucket, object, versionID)
}
// Heal the object.
@ -955,5 +973,5 @@ func (er erasureObjects) HealObject(ctx context.Context, bucket, object, version
opts.ScanMode = madmin.HealDeepScan
hr, err = er.healObject(healCtx, bucket, object, versionID, opts)
}
return hr, err
return hr, toObjectErr(err, bucket, object, versionID)
}

View File

@ -684,16 +684,18 @@ func TestHealObjectCorruptedPools(t *testing.T) {
t.Fatalf("Failed to make a bucket - %v", err)
}
// Create an object with multiple parts uploaded in decreasing
// part number.
uploadID, err := objLayer.NewMultipartUpload(ctx, bucket, object, opts)
// Upload a multipart object in the second pool
z := objLayer.(*erasureServerPools)
set := z.serverPools[1]
uploadID, err := set.NewMultipartUpload(ctx, bucket, object, opts)
if err != nil {
t.Fatalf("Failed to create a multipart upload - %v", err)
}
var uploadedParts []CompletePart
for _, partID := range []int{2, 1} {
pInfo, err1 := objLayer.PutObjectPart(ctx, bucket, object, uploadID, partID, mustGetPutObjReader(t, bytes.NewReader(data), int64(len(data)), "", ""), opts)
pInfo, err1 := set.PutObjectPart(ctx, bucket, object, uploadID, partID, mustGetPutObjReader(t, bytes.NewReader(data), int64(len(data)), "", ""), opts)
if err1 != nil {
t.Fatalf("Failed to upload a part - %v", err1)
}
@ -703,120 +705,114 @@ func TestHealObjectCorruptedPools(t *testing.T) {
})
}
_, err = objLayer.CompleteMultipartUpload(ctx, bucket, object, uploadID, uploadedParts, ObjectOptions{})
_, err = set.CompleteMultipartUpload(ctx, bucket, object, uploadID, uploadedParts, ObjectOptions{})
if err != nil {
t.Fatalf("Failed to complete multipart upload - %v", err)
}
// Test 1: Remove the object backend files from the first disk.
z := objLayer.(*erasureServerPools)
for _, set := range z.serverPools {
er := set.sets[0]
erasureDisks := er.getDisks()
firstDisk := erasureDisks[0]
err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false)
if err != nil {
t.Fatalf("Failed to delete a file - %v", err)
}
er := set.sets[0]
erasureDisks := er.getDisks()
firstDisk := erasureDisks[0]
err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false)
if err != nil {
t.Fatalf("Failed to delete a file - %v", err)
}
_, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{ScanMode: madmin.HealNormalScan})
if err != nil {
t.Fatalf("Failed to heal object - %v", err)
}
_, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{ScanMode: madmin.HealNormalScan})
if err != nil {
t.Fatalf("Failed to heal object - %v", err)
}
fileInfos, errs := readAllFileInfo(ctx, erasureDisks, bucket, object, "", false)
fi, err := getLatestFileInfo(ctx, fileInfos, errs)
if errors.Is(err, errFileNotFound) {
continue
}
if err != nil {
t.Fatalf("Failed to getLatestFileInfo - %v", err)
}
fileInfos, errs := readAllFileInfo(ctx, erasureDisks, bucket, object, "", false)
fi, err := getLatestFileInfo(ctx, fileInfos, errs)
if err != nil {
t.Fatalf("Failed to getLatestFileInfo - %v", err)
}
if _, err = firstDisk.StatInfoFile(context.Background(), bucket, object+"/"+xlStorageFormatFile, false); err != nil {
t.Errorf("Expected xl.meta file to be present but stat failed - %v", err)
}
if _, err = firstDisk.StatInfoFile(context.Background(), bucket, object+"/"+xlStorageFormatFile, false); err != nil {
t.Errorf("Expected xl.meta file to be present but stat failed - %v", err)
}
err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false)
if err != nil {
t.Errorf("Failure during deleting part.1 - %v", err)
}
err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false)
if err != nil {
t.Errorf("Failure during deleting part.1 - %v", err)
}
err = firstDisk.WriteAll(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), []byte{})
if err != nil {
t.Errorf("Failure during creating part.1 - %v", err)
}
err = firstDisk.WriteAll(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), []byte{})
if err != nil {
t.Errorf("Failure during creating part.1 - %v", err)
}
_, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{DryRun: false, Remove: true, ScanMode: madmin.HealDeepScan})
if err != nil {
t.Errorf("Expected nil but received %v", err)
}
_, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{DryRun: false, Remove: true, ScanMode: madmin.HealDeepScan})
if err != nil {
t.Errorf("Expected nil but received %v", err)
}
fileInfos, errs = readAllFileInfo(ctx, erasureDisks, bucket, object, "", false)
nfi, err := getLatestFileInfo(ctx, fileInfos, errs)
if err != nil {
t.Fatalf("Failed to getLatestFileInfo - %v", err)
}
fileInfos, errs = readAllFileInfo(ctx, erasureDisks, bucket, object, "", false)
nfi, err := getLatestFileInfo(ctx, fileInfos, errs)
if err != nil {
t.Fatalf("Failed to getLatestFileInfo - %v", err)
}
fi.DiskMTime = time.Time{}
nfi.DiskMTime = time.Time{}
if !reflect.DeepEqual(fi, nfi) {
t.Fatalf("FileInfo not equal after healing: %v != %v", fi, nfi)
}
fi.DiskMTime = time.Time{}
nfi.DiskMTime = time.Time{}
if !reflect.DeepEqual(fi, nfi) {
t.Fatalf("FileInfo not equal after healing: %v != %v", fi, nfi)
}
err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false)
if err != nil {
t.Errorf("Failure during deleting part.1 - %v", err)
}
err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false)
if err != nil {
t.Errorf("Failure during deleting part.1 - %v", err)
}
bdata := bytes.Repeat([]byte("b"), int(nfi.Size))
err = firstDisk.WriteAll(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), bdata)
if err != nil {
t.Errorf("Failure during creating part.1 - %v", err)
}
bdata := bytes.Repeat([]byte("b"), int(nfi.Size))
err = firstDisk.WriteAll(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), bdata)
if err != nil {
t.Errorf("Failure during creating part.1 - %v", err)
}
_, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{DryRun: false, Remove: true, ScanMode: madmin.HealDeepScan})
if err != nil {
t.Errorf("Expected nil but received %v", err)
}
_, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{DryRun: false, Remove: true, ScanMode: madmin.HealDeepScan})
if err != nil {
t.Errorf("Expected nil but received %v", err)
}
fileInfos, errs = readAllFileInfo(ctx, erasureDisks, bucket, object, "", false)
nfi, err = getLatestFileInfo(ctx, fileInfos, errs)
if err != nil {
t.Fatalf("Failed to getLatestFileInfo - %v", err)
}
fileInfos, errs = readAllFileInfo(ctx, erasureDisks, bucket, object, "", false)
nfi, err = getLatestFileInfo(ctx, fileInfos, errs)
if err != nil {
t.Fatalf("Failed to getLatestFileInfo - %v", err)
}
fi.DiskMTime = time.Time{}
nfi.DiskMTime = time.Time{}
if !reflect.DeepEqual(fi, nfi) {
t.Fatalf("FileInfo not equal after healing: %v != %v", fi, nfi)
}
fi.DiskMTime = time.Time{}
nfi.DiskMTime = time.Time{}
if !reflect.DeepEqual(fi, nfi) {
t.Fatalf("FileInfo not equal after healing: %v != %v", fi, nfi)
}
// Test 4: checks if HealObject returns an error when xl.meta is not found
// in more than read quorum number of disks, to create a corrupted situation.
for i := 0; i <= nfi.Erasure.DataBlocks; i++ {
erasureDisks[i].Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false)
}
// Test 4: checks if HealObject returns an error when xl.meta is not found
// in more than read quorum number of disks, to create a corrupted situation.
for i := 0; i <= nfi.Erasure.DataBlocks; i++ {
erasureDisks[i].Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false)
}
// Try healing now, expect to receive errFileNotFound.
_, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{DryRun: false, Remove: true, ScanMode: madmin.HealDeepScan})
if err != nil {
if _, ok := err.(ObjectNotFound); !ok {
t.Errorf("Expect %v but received %v", ObjectNotFound{Bucket: bucket, Object: object}, err)
}
}
// since majority of xl.meta's are not available, object should be successfully deleted.
_, err = objLayer.GetObjectInfo(ctx, bucket, object, ObjectOptions{})
// Try healing now, expect to receive errFileNotFound.
_, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{DryRun: false, Remove: true, ScanMode: madmin.HealDeepScan})
if err != nil {
if _, ok := err.(ObjectNotFound); !ok {
t.Errorf("Expect %v but received %v", ObjectNotFound{Bucket: bucket, Object: object}, err)
}
}
for i := 0; i < (nfi.Erasure.DataBlocks + nfi.Erasure.ParityBlocks); i++ {
_, err = erasureDisks[i].StatInfoFile(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false)
if err == nil {
t.Errorf("Expected xl.meta file to be not present, but succeeeded")
}
// since majority of xl.meta's are not available, object should be successfully deleted.
_, err = objLayer.GetObjectInfo(ctx, bucket, object, ObjectOptions{})
if _, ok := err.(ObjectNotFound); !ok {
t.Errorf("Expect %v but received %v", ObjectNotFound{Bucket: bucket, Object: object}, err)
}
for i := 0; i < (nfi.Erasure.DataBlocks + nfi.Erasure.ParityBlocks); i++ {
_, err = erasureDisks[i].StatInfoFile(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false)
if err == nil {
t.Errorf("Expected xl.meta file to be not present, but succeeeded")
}
}
}
@ -1202,7 +1198,7 @@ func TestHealObjectErasure(t *testing.T) {
// since majority of xl.meta's are not available, object quorum
// can't be read properly will be deleted automatically and
// err is nil
if err != nil {
if !isErrObjectNotFound(err) {
t.Fatal(err)
}
}

View File

@ -1939,7 +1939,8 @@ func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix str
}
for _, version := range fivs.Versions {
if err := healObjectFn(bucket, version.Name, version.VersionID); err != nil {
err := healObjectFn(bucket, version.Name, version.VersionID)
if err != nil && !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
return err
}
}
@ -2000,18 +2001,21 @@ func (z *erasureServerPools) HealObject(ctx context.Context, bucket, object, ver
}
wg.Wait()
for _, err := range errs {
if err != nil {
return madmin.HealResultItem{}, err
// Return the first nil error
for idx, err := range errs {
if err == nil {
return results[idx], nil
}
}
for _, result := range results {
if result.Object != "" {
return result, nil
// No pool returned a nil error, return the first non 'not found' error
for idx, err := range errs {
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
return results[idx], err
}
}
// At this stage, all errors are 'not found'
if versionID != "" {
return madmin.HealResultItem{}, VersionNotFound{
Bucket: bucket,