heal: Report correctly in multip-pools setup (#15117)

`mc admin heal -r <alias>` in a multi setup pools returns incorrectly
grey objects. The reason is that erasure-server-pools.HealObject() runs
HealObject in all pools and returns the result of the first nil
error. However, in the lower erasureObject level, HealObject() returns
nil if an object does not exist + missing error in each disk of the object
in that pool, therefore confusing mc.

Make erasureObject.HealObject() to return not found error in the lower
level, so at least erasureServerPools will know what pools to ignore.
This commit is contained in:
Anis Elleuch 2022-06-20 16:07:45 +01:00 committed by GitHub
parent ce6c23a360
commit 73733a8fb9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 137 additions and 119 deletions

View File

@ -306,9 +306,13 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
// Re-read when we have lock... // Re-read when we have lock...
partsMetadata, errs := readAllFileInfo(ctx, storageDisks, bucket, object, versionID, true) partsMetadata, errs := readAllFileInfo(ctx, storageDisks, bucket, object, versionID, true)
if isAllNotFound(errs) { if isAllNotFound(errs) {
err := errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
// Nothing to do, file is already gone. // Nothing to do, file is already gone.
return er.defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, return er.defaultHealResult(FileInfo{}, storageDisks, storageEndpoints,
errs, bucket, object, versionID), nil errs, bucket, object, versionID), err
} }
readQuorum, _, err := objectQuorumFromMeta(ctx, partsMetadata, errs, er.defaultParityCount) readQuorum, _, err := objectQuorumFromMeta(ctx, partsMetadata, errs, er.defaultParityCount)
@ -327,7 +331,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
// present, it is as good as object not found. // present, it is as good as object not found.
latestMeta, err := pickValidFileInfo(ctx, partsMetadata, modTime, readQuorum) latestMeta, err := pickValidFileInfo(ctx, partsMetadata, modTime, readQuorum)
if err != nil { if err != nil {
return result, toObjectErr(err, bucket, object, versionID) return result, err
} }
// List of disks having all parts as per latest metadata. // List of disks having all parts as per latest metadata.
@ -397,8 +401,12 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
if isAllNotFound(errs) { if isAllNotFound(errs) {
// File is fully gone, fileInfo is empty. // File is fully gone, fileInfo is empty.
err := errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
return er.defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs, return er.defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs,
bucket, object, versionID), nil bucket, object, versionID), err
} }
// If less than read quorum number of disks have all the parts // If less than read quorum number of disks have all the parts
@ -485,7 +493,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
erasure, err := NewErasure(ctx, latestMeta.Erasure.DataBlocks, erasure, err := NewErasure(ctx, latestMeta.Erasure.DataBlocks,
latestMeta.Erasure.ParityBlocks, latestMeta.Erasure.BlockSize) latestMeta.Erasure.ParityBlocks, latestMeta.Erasure.BlockSize)
if err != nil { if err != nil {
return result, toObjectErr(err, bucket, object) return result, err
} }
erasureInfo := latestMeta.Erasure erasureInfo := latestMeta.Erasure
@ -523,7 +531,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
closeBitrotReaders(readers) closeBitrotReaders(readers)
closeBitrotWriters(writers) closeBitrotWriters(writers)
if err != nil { if err != nil {
return result, toObjectErr(err, bucket, object) return result, err
} }
// outDatedDisks that had write errors should not be // outDatedDisks that had write errors should not be
@ -578,7 +586,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
// Attempt a rename now from healed data to final location. // Attempt a rename now from healed data to final location.
if err = disk.RenameData(ctx, minioMetaTmpBucket, tmpID, partsMetadata[i], bucket, object); err != nil { if err = disk.RenameData(ctx, minioMetaTmpBucket, tmpID, partsMetadata[i], bucket, object); err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
return result, toObjectErr(err, bucket, object) return result, err
} }
// Remove any remaining parts from outdated disks from before transition. // Remove any remaining parts from outdated disks from before transition.
@ -674,10 +682,16 @@ func (er erasureObjects) healObjectDir(ctx context.Context, bucket, object strin
hr.After.Drives[i] = madmin.HealDriveInfo{Endpoint: drive, State: madmin.DriveStateCorrupt} hr.After.Drives[i] = madmin.HealDriveInfo{Endpoint: drive, State: madmin.DriveStateCorrupt}
} }
} }
if dryRun || danglingObject || isAllNotFound(errs) { if danglingObject || isAllNotFound(errs) {
// Nothing to do, file is already gone. // Nothing to do, file is already gone.
return hr, errFileNotFound
}
if dryRun {
// Quit without try to heal the object dir
return hr, nil return hr, nil
} }
for i, err := range errs { for i, err := range errs {
if err == errVolumeNotFound || err == errFileNotFound { if err == errVolumeNotFound || err == errFileNotFound {
// Bucket or prefix/directory not found // Bucket or prefix/directory not found
@ -837,8 +851,13 @@ func (er erasureObjects) purgeObjectDangling(ctx context.Context, bucket, object
// Dangling object successfully purged, size is '0' // Dangling object successfully purged, size is '0'
m.Size = 0 m.Size = 0
} }
// Generate file/version not found with default heal result
err = errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
return er.defaultHealResult(m, storageDisks, storageEndpoints, return er.defaultHealResult(m, storageDisks, storageEndpoints,
errs, bucket, object, versionID), nil errs, bucket, object, versionID), err
} }
// Object is considered dangling/corrupted if any only // Object is considered dangling/corrupted if any only
@ -908,12 +927,6 @@ func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (valid
// HealObject - heal the given object, automatically deletes the object if stale/corrupted if `remove` is true. // HealObject - heal the given object, automatically deletes the object if stale/corrupted if `remove` is true.
func (er erasureObjects) HealObject(ctx context.Context, bucket, object, versionID string, opts madmin.HealOpts) (hr madmin.HealResultItem, err error) { func (er erasureObjects) HealObject(ctx context.Context, bucket, object, versionID string, opts madmin.HealOpts) (hr madmin.HealResultItem, err error) {
defer func() {
if isErrObjectNotFound(err) || isErrVersionNotFound(err) {
err = nil
}
}()
// Create context that also contains information about the object and bucket. // Create context that also contains information about the object and bucket.
// The top level handler might not have this information. // The top level handler might not have this information.
reqInfo := logger.GetReqInfo(ctx) reqInfo := logger.GetReqInfo(ctx)
@ -927,7 +940,8 @@ func (er erasureObjects) HealObject(ctx context.Context, bucket, object, version
// Healing directories handle it separately. // Healing directories handle it separately.
if HasSuffix(object, SlashSeparator) { if HasSuffix(object, SlashSeparator) {
return er.healObjectDir(healCtx, bucket, object, opts.DryRun, opts.Remove) hr, err := er.healObjectDir(healCtx, bucket, object, opts.DryRun, opts.Remove)
return hr, toObjectErr(err, bucket, object)
} }
storageDisks := er.getDisks() storageDisks := er.getDisks()
@ -942,9 +956,13 @@ func (er erasureObjects) HealObject(ctx context.Context, bucket, object, version
// This allows to quickly check if all is ok or all are missing. // This allows to quickly check if all is ok or all are missing.
_, errs := readAllFileInfo(healCtx, storageDisks, bucket, object, versionID, false) _, errs := readAllFileInfo(healCtx, storageDisks, bucket, object, versionID, false)
if isAllNotFound(errs) { if isAllNotFound(errs) {
err := errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
// Nothing to do, file is already gone. // Nothing to do, file is already gone.
return er.defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, return er.defaultHealResult(FileInfo{}, storageDisks, storageEndpoints,
errs, bucket, object, versionID), nil errs, bucket, object, versionID), toObjectErr(err, bucket, object, versionID)
} }
// Heal the object. // Heal the object.
@ -955,5 +973,5 @@ func (er erasureObjects) HealObject(ctx context.Context, bucket, object, version
opts.ScanMode = madmin.HealDeepScan opts.ScanMode = madmin.HealDeepScan
hr, err = er.healObject(healCtx, bucket, object, versionID, opts) hr, err = er.healObject(healCtx, bucket, object, versionID, opts)
} }
return hr, err return hr, toObjectErr(err, bucket, object, versionID)
} }

View File

@ -684,16 +684,18 @@ func TestHealObjectCorruptedPools(t *testing.T) {
t.Fatalf("Failed to make a bucket - %v", err) t.Fatalf("Failed to make a bucket - %v", err)
} }
// Create an object with multiple parts uploaded in decreasing // Upload a multipart object in the second pool
// part number. z := objLayer.(*erasureServerPools)
uploadID, err := objLayer.NewMultipartUpload(ctx, bucket, object, opts) set := z.serverPools[1]
uploadID, err := set.NewMultipartUpload(ctx, bucket, object, opts)
if err != nil { if err != nil {
t.Fatalf("Failed to create a multipart upload - %v", err) t.Fatalf("Failed to create a multipart upload - %v", err)
} }
var uploadedParts []CompletePart var uploadedParts []CompletePart
for _, partID := range []int{2, 1} { for _, partID := range []int{2, 1} {
pInfo, err1 := objLayer.PutObjectPart(ctx, bucket, object, uploadID, partID, mustGetPutObjReader(t, bytes.NewReader(data), int64(len(data)), "", ""), opts) pInfo, err1 := set.PutObjectPart(ctx, bucket, object, uploadID, partID, mustGetPutObjReader(t, bytes.NewReader(data), int64(len(data)), "", ""), opts)
if err1 != nil { if err1 != nil {
t.Fatalf("Failed to upload a part - %v", err1) t.Fatalf("Failed to upload a part - %v", err1)
} }
@ -703,120 +705,114 @@ func TestHealObjectCorruptedPools(t *testing.T) {
}) })
} }
_, err = objLayer.CompleteMultipartUpload(ctx, bucket, object, uploadID, uploadedParts, ObjectOptions{}) _, err = set.CompleteMultipartUpload(ctx, bucket, object, uploadID, uploadedParts, ObjectOptions{})
if err != nil { if err != nil {
t.Fatalf("Failed to complete multipart upload - %v", err) t.Fatalf("Failed to complete multipart upload - %v", err)
} }
// Test 1: Remove the object backend files from the first disk. // Test 1: Remove the object backend files from the first disk.
z := objLayer.(*erasureServerPools) er := set.sets[0]
for _, set := range z.serverPools { erasureDisks := er.getDisks()
er := set.sets[0] firstDisk := erasureDisks[0]
erasureDisks := er.getDisks() err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false)
firstDisk := erasureDisks[0] if err != nil {
err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false) t.Fatalf("Failed to delete a file - %v", err)
if err != nil { }
t.Fatalf("Failed to delete a file - %v", err)
}
_, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{ScanMode: madmin.HealNormalScan}) _, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{ScanMode: madmin.HealNormalScan})
if err != nil { if err != nil {
t.Fatalf("Failed to heal object - %v", err) t.Fatalf("Failed to heal object - %v", err)
} }
fileInfos, errs := readAllFileInfo(ctx, erasureDisks, bucket, object, "", false) fileInfos, errs := readAllFileInfo(ctx, erasureDisks, bucket, object, "", false)
fi, err := getLatestFileInfo(ctx, fileInfos, errs) fi, err := getLatestFileInfo(ctx, fileInfos, errs)
if errors.Is(err, errFileNotFound) { if err != nil {
continue t.Fatalf("Failed to getLatestFileInfo - %v", err)
} }
if err != nil {
t.Fatalf("Failed to getLatestFileInfo - %v", err)
}
if _, err = firstDisk.StatInfoFile(context.Background(), bucket, object+"/"+xlStorageFormatFile, false); err != nil { if _, err = firstDisk.StatInfoFile(context.Background(), bucket, object+"/"+xlStorageFormatFile, false); err != nil {
t.Errorf("Expected xl.meta file to be present but stat failed - %v", err) t.Errorf("Expected xl.meta file to be present but stat failed - %v", err)
} }
err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false) err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false)
if err != nil { if err != nil {
t.Errorf("Failure during deleting part.1 - %v", err) t.Errorf("Failure during deleting part.1 - %v", err)
} }
err = firstDisk.WriteAll(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), []byte{}) err = firstDisk.WriteAll(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), []byte{})
if err != nil { if err != nil {
t.Errorf("Failure during creating part.1 - %v", err) t.Errorf("Failure during creating part.1 - %v", err)
} }
_, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{DryRun: false, Remove: true, ScanMode: madmin.HealDeepScan}) _, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{DryRun: false, Remove: true, ScanMode: madmin.HealDeepScan})
if err != nil { if err != nil {
t.Errorf("Expected nil but received %v", err) t.Errorf("Expected nil but received %v", err)
} }
fileInfos, errs = readAllFileInfo(ctx, erasureDisks, bucket, object, "", false) fileInfos, errs = readAllFileInfo(ctx, erasureDisks, bucket, object, "", false)
nfi, err := getLatestFileInfo(ctx, fileInfos, errs) nfi, err := getLatestFileInfo(ctx, fileInfos, errs)
if err != nil { if err != nil {
t.Fatalf("Failed to getLatestFileInfo - %v", err) t.Fatalf("Failed to getLatestFileInfo - %v", err)
} }
fi.DiskMTime = time.Time{} fi.DiskMTime = time.Time{}
nfi.DiskMTime = time.Time{} nfi.DiskMTime = time.Time{}
if !reflect.DeepEqual(fi, nfi) { if !reflect.DeepEqual(fi, nfi) {
t.Fatalf("FileInfo not equal after healing: %v != %v", fi, nfi) t.Fatalf("FileInfo not equal after healing: %v != %v", fi, nfi)
} }
err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false) err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false)
if err != nil { if err != nil {
t.Errorf("Failure during deleting part.1 - %v", err) t.Errorf("Failure during deleting part.1 - %v", err)
} }
bdata := bytes.Repeat([]byte("b"), int(nfi.Size)) bdata := bytes.Repeat([]byte("b"), int(nfi.Size))
err = firstDisk.WriteAll(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), bdata) err = firstDisk.WriteAll(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), bdata)
if err != nil { if err != nil {
t.Errorf("Failure during creating part.1 - %v", err) t.Errorf("Failure during creating part.1 - %v", err)
} }
_, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{DryRun: false, Remove: true, ScanMode: madmin.HealDeepScan}) _, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{DryRun: false, Remove: true, ScanMode: madmin.HealDeepScan})
if err != nil { if err != nil {
t.Errorf("Expected nil but received %v", err) t.Errorf("Expected nil but received %v", err)
} }
fileInfos, errs = readAllFileInfo(ctx, erasureDisks, bucket, object, "", false) fileInfos, errs = readAllFileInfo(ctx, erasureDisks, bucket, object, "", false)
nfi, err = getLatestFileInfo(ctx, fileInfos, errs) nfi, err = getLatestFileInfo(ctx, fileInfos, errs)
if err != nil { if err != nil {
t.Fatalf("Failed to getLatestFileInfo - %v", err) t.Fatalf("Failed to getLatestFileInfo - %v", err)
} }
fi.DiskMTime = time.Time{} fi.DiskMTime = time.Time{}
nfi.DiskMTime = time.Time{} nfi.DiskMTime = time.Time{}
if !reflect.DeepEqual(fi, nfi) { if !reflect.DeepEqual(fi, nfi) {
t.Fatalf("FileInfo not equal after healing: %v != %v", fi, nfi) t.Fatalf("FileInfo not equal after healing: %v != %v", fi, nfi)
} }
// Test 4: checks if HealObject returns an error when xl.meta is not found // Test 4: checks if HealObject returns an error when xl.meta is not found
// in more than read quorum number of disks, to create a corrupted situation. // in more than read quorum number of disks, to create a corrupted situation.
for i := 0; i <= nfi.Erasure.DataBlocks; i++ { for i := 0; i <= nfi.Erasure.DataBlocks; i++ {
erasureDisks[i].Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false) erasureDisks[i].Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false)
} }
// Try healing now, expect to receive errFileNotFound. // Try healing now, expect to receive errFileNotFound.
_, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{DryRun: false, Remove: true, ScanMode: madmin.HealDeepScan}) _, err = objLayer.HealObject(ctx, bucket, object, "", madmin.HealOpts{DryRun: false, Remove: true, ScanMode: madmin.HealDeepScan})
if err != nil { if err != nil {
if _, ok := err.(ObjectNotFound); !ok {
t.Errorf("Expect %v but received %v", ObjectNotFound{Bucket: bucket, Object: object}, err)
}
}
// since majority of xl.meta's are not available, object should be successfully deleted.
_, err = objLayer.GetObjectInfo(ctx, bucket, object, ObjectOptions{})
if _, ok := err.(ObjectNotFound); !ok { if _, ok := err.(ObjectNotFound); !ok {
t.Errorf("Expect %v but received %v", ObjectNotFound{Bucket: bucket, Object: object}, err) t.Errorf("Expect %v but received %v", ObjectNotFound{Bucket: bucket, Object: object}, err)
} }
}
for i := 0; i < (nfi.Erasure.DataBlocks + nfi.Erasure.ParityBlocks); i++ { // since majority of xl.meta's are not available, object should be successfully deleted.
_, err = erasureDisks[i].StatInfoFile(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false) _, err = objLayer.GetObjectInfo(ctx, bucket, object, ObjectOptions{})
if err == nil { if _, ok := err.(ObjectNotFound); !ok {
t.Errorf("Expected xl.meta file to be not present, but succeeeded") t.Errorf("Expect %v but received %v", ObjectNotFound{Bucket: bucket, Object: object}, err)
} }
for i := 0; i < (nfi.Erasure.DataBlocks + nfi.Erasure.ParityBlocks); i++ {
_, err = erasureDisks[i].StatInfoFile(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false)
if err == nil {
t.Errorf("Expected xl.meta file to be not present, but succeeeded")
} }
} }
} }
@ -1202,7 +1198,7 @@ func TestHealObjectErasure(t *testing.T) {
// since majority of xl.meta's are not available, object quorum // since majority of xl.meta's are not available, object quorum
// can't be read properly will be deleted automatically and // can't be read properly will be deleted automatically and
// err is nil // err is nil
if err != nil { if !isErrObjectNotFound(err) {
t.Fatal(err) t.Fatal(err)
} }
} }

View File

@ -1939,7 +1939,8 @@ func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix str
} }
for _, version := range fivs.Versions { for _, version := range fivs.Versions {
if err := healObjectFn(bucket, version.Name, version.VersionID); err != nil { err := healObjectFn(bucket, version.Name, version.VersionID)
if err != nil && !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
return err return err
} }
} }
@ -2000,18 +2001,21 @@ func (z *erasureServerPools) HealObject(ctx context.Context, bucket, object, ver
} }
wg.Wait() wg.Wait()
for _, err := range errs { // Return the first nil error
if err != nil { for idx, err := range errs {
return madmin.HealResultItem{}, err if err == nil {
return results[idx], nil
} }
} }
for _, result := range results { // No pool returned a nil error, return the first non 'not found' error
if result.Object != "" { for idx, err := range errs {
return result, nil if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
return results[idx], err
} }
} }
// At this stage, all errors are 'not found'
if versionID != "" { if versionID != "" {
return madmin.HealResultItem{}, VersionNotFound{ return madmin.HealResultItem{}, VersionNotFound{
Bucket: bucket, Bucket: bucket,