mirror of
https://github.com/minio/minio.git
synced 2025-01-11 15:03:22 -05:00
Add normal/deep type of heal scanning (#7251)
Healing scan used to read all objects parts to check for bitrot checksum. This commit will add a quicker way of healing scan by only checking if parts are actually present in disks or not.
This commit is contained in:
parent
233824bf92
commit
facbd653ba
@ -575,7 +575,7 @@ func (h *healSequence) healMinioSysMeta(metaPrefix string) func() error {
|
||||
if h.isQuitting() {
|
||||
return errHealStopSignalled
|
||||
}
|
||||
res, herr := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove)
|
||||
res, herr := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove, h.settings.ScanMode)
|
||||
// Object might have been deleted, by the time heal
|
||||
// was attempted we ignore this object an move on.
|
||||
if isErrObjectNotFound(herr) {
|
||||
@ -718,7 +718,7 @@ func (h *healSequence) healObject(bucket, object string) error {
|
||||
return errServerNotInitialized
|
||||
}
|
||||
|
||||
hri, err := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove)
|
||||
hri, err := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove, h.settings.ScanMode)
|
||||
if isErrObjectNotFound(err) {
|
||||
return nil
|
||||
}
|
||||
|
@ -1240,7 +1240,7 @@ func (fs *FSObjects) HealFormat(ctx context.Context, dryRun bool) (madmin.HealRe
|
||||
}
|
||||
|
||||
// HealObject - no-op for fs. Valid only for XL.
|
||||
func (fs *FSObjects) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (
|
||||
func (fs *FSObjects) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (
|
||||
res madmin.HealResultItem, err error) {
|
||||
logger.LogIf(ctx, NotImplemented{})
|
||||
return res, NotImplemented{}
|
||||
|
@ -22,6 +22,8 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/minio/minio/pkg/madmin"
|
||||
)
|
||||
|
||||
// Tests for if parent directory is object
|
||||
@ -390,7 +392,7 @@ func TestFSHealObject(t *testing.T) {
|
||||
defer os.RemoveAll(disk)
|
||||
|
||||
obj := initFSObjects(disk, t)
|
||||
_, err := obj.HealObject(context.Background(), "bucket", "object", false, false)
|
||||
_, err := obj.HealObject(context.Background(), "bucket", "object", false, false, madmin.HealDeepScan)
|
||||
if err == nil || !isSameType(err, NotImplemented{}) {
|
||||
t.Fatalf("Heal Object should return NotImplemented error ")
|
||||
}
|
||||
|
@ -102,7 +102,7 @@ func (a GatewayUnsupported) ListBucketsHeal(ctx context.Context) (buckets []Buck
|
||||
}
|
||||
|
||||
// HealObject - Not implemented stub
|
||||
func (a GatewayUnsupported) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (h madmin.HealResultItem, e error) {
|
||||
func (a GatewayUnsupported) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (h madmin.HealResultItem, e error) {
|
||||
return h, NotImplemented{}
|
||||
}
|
||||
|
||||
|
@ -88,7 +88,7 @@ type ObjectLayer interface {
|
||||
ReloadFormat(ctx context.Context, dryRun bool) error
|
||||
HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error)
|
||||
HealBucket(ctx context.Context, bucket string, dryRun, remove bool) (madmin.HealResultItem, error)
|
||||
HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (madmin.HealResultItem, error)
|
||||
HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (madmin.HealResultItem, error)
|
||||
ListBucketsHeal(ctx context.Context) (buckets []BucketInfo, err error)
|
||||
HealObjects(ctx context.Context, bucket, prefix string, healObjectFn func(string, string) error) error
|
||||
|
||||
|
@ -1296,8 +1296,8 @@ func (s *xlSets) HealBucket(ctx context.Context, bucket string, dryRun, remove b
|
||||
}
|
||||
|
||||
// HealObject - heals inconsistent object on a hashedSet based on object name.
|
||||
func (s *xlSets) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (madmin.HealResultItem, error) {
|
||||
return s.getHashedSet(object).HealObject(ctx, bucket, object, dryRun, remove)
|
||||
func (s *xlSets) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (madmin.HealResultItem, error) {
|
||||
return s.getHashedSet(object).HealObject(ctx, bucket, object, dryRun, remove, scanMode)
|
||||
}
|
||||
|
||||
// Lists all buckets which need healing.
|
||||
|
@ -22,6 +22,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
"github.com/minio/minio/pkg/madmin"
|
||||
)
|
||||
|
||||
// commonTime returns a maximally occurring time from a list of time.
|
||||
@ -158,7 +159,7 @@ func getLatestXLMeta(ctx context.Context, partsMetadata []xlMetaV1, errs []error
|
||||
// - slice of errors about the state of data files on disk - can have
|
||||
// a not-found error or a hash-mismatch error.
|
||||
func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket,
|
||||
object string) ([]StorageAPI, []error) {
|
||||
object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) {
|
||||
availableDisks := make([]StorageAPI, len(onlineDisks))
|
||||
dataErrs := make([]error, len(onlineDisks))
|
||||
|
||||
@ -168,27 +169,38 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
|
||||
continue
|
||||
}
|
||||
|
||||
erasureInfo := partsMetadata[i].Erasure
|
||||
erasure, err := NewErasure(ctx, erasureInfo.DataBlocks, erasureInfo.ParityBlocks, erasureInfo.BlockSize)
|
||||
if err != nil {
|
||||
dataErrs[i] = err
|
||||
continue
|
||||
}
|
||||
|
||||
// disk has a valid xl.json but may not have all the
|
||||
// parts. This is considered an outdated disk, since
|
||||
// it needs healing too.
|
||||
for _, part := range partsMetadata[i].Parts {
|
||||
checksumInfo := erasureInfo.GetChecksumInfo(part.Name)
|
||||
tillOffset := erasure.ShardFileTillOffset(0, part.Size, part.Size)
|
||||
err = bitrotCheckFile(onlineDisk, bucket, pathJoin(object, part.Name), tillOffset, checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
|
||||
switch scanMode {
|
||||
case madmin.HealDeepScan:
|
||||
erasureInfo := partsMetadata[i].Erasure
|
||||
erasure, err := NewErasure(ctx, erasureInfo.DataBlocks, erasureInfo.ParityBlocks, erasureInfo.BlockSize)
|
||||
if err != nil {
|
||||
isCorrupt := strings.HasPrefix(err.Error(), "Bitrot verification mismatch - expected ")
|
||||
if !isCorrupt && err != errFileNotFound && err != errVolumeNotFound {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
dataErrs[i] = err
|
||||
break
|
||||
continue
|
||||
}
|
||||
|
||||
// disk has a valid xl.json but may not have all the
|
||||
// parts. This is considered an outdated disk, since
|
||||
// it needs healing too.
|
||||
for _, part := range partsMetadata[i].Parts {
|
||||
checksumInfo := erasureInfo.GetChecksumInfo(part.Name)
|
||||
tillOffset := erasure.ShardFileTillOffset(0, part.Size, part.Size)
|
||||
err = bitrotCheckFile(onlineDisk, bucket, pathJoin(object, part.Name), tillOffset, checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
|
||||
if err != nil {
|
||||
isCorrupt := strings.HasPrefix(err.Error(), "Bitrot verification mismatch - expected ")
|
||||
if !isCorrupt && err != errFileNotFound && err != errVolumeNotFound {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
dataErrs[i] = err
|
||||
break
|
||||
}
|
||||
}
|
||||
case madmin.HealNormalScan:
|
||||
for _, part := range partsMetadata[i].Parts {
|
||||
_, err := onlineDisk.StatFile(bucket, pathJoin(object, part.Name))
|
||||
if err != nil {
|
||||
dataErrs[i] = err
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,8 @@ import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/minio/minio/pkg/madmin"
|
||||
)
|
||||
|
||||
// validates functionality provided to find most common
|
||||
@ -239,7 +241,7 @@ func TestListOnlineDisks(t *testing.T) {
|
||||
i+1, test.expectedTime, modTime)
|
||||
}
|
||||
|
||||
availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object)
|
||||
availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object, madmin.HealDeepScan)
|
||||
test.errs = newErrs
|
||||
|
||||
if test._tamperBackend != noTamper {
|
||||
@ -291,7 +293,7 @@ func TestDisksWithAllParts(t *testing.T) {
|
||||
t.Fatalf("Failed to read xl meta data %v", err)
|
||||
}
|
||||
|
||||
filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
|
||||
filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan)
|
||||
|
||||
if len(filteredDisks) != len(xlDisks) {
|
||||
t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
|
||||
@ -328,7 +330,7 @@ func TestDisksWithAllParts(t *testing.T) {
|
||||
}
|
||||
|
||||
errs = make([]error, len(xlDisks))
|
||||
filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
|
||||
filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan)
|
||||
|
||||
if len(filteredDisks) != len(xlDisks) {
|
||||
t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
|
||||
|
@ -208,7 +208,8 @@ func shouldHealObjectOnDisk(xlErr, dataErr error, meta xlMetaV1, quorumModTime t
|
||||
|
||||
// Heals an object by re-writing corrupt/missing erasure blocks.
|
||||
func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, object string,
|
||||
quorum int, dryRun bool) (result madmin.HealResultItem, err error) {
|
||||
quorum int, dryRun bool, scanMode madmin.HealScanMode) (result madmin.HealResultItem, err error) {
|
||||
|
||||
partsMetadata, errs := readAllXLMetadata(ctx, storageDisks, bucket, object)
|
||||
|
||||
errCount := 0
|
||||
@ -232,7 +233,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
|
||||
latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs)
|
||||
|
||||
// List of disks having all parts as per latest xl.json.
|
||||
availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object)
|
||||
availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object, scanMode)
|
||||
|
||||
// Initialize heal result object
|
||||
result = madmin.HealResultItem{
|
||||
@ -621,7 +622,7 @@ func (xl xlObjects) isObjectDangling(metaArr []xlMetaV1, errs []error) (validMet
|
||||
// FIXME: If an object object was deleted and one disk was down,
|
||||
// and later the disk comes back up again, heal on the object
|
||||
// should delete it.
|
||||
func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRun bool, remove bool) (hr madmin.HealResultItem, err error) {
|
||||
func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRun bool, remove bool, scanMode madmin.HealScanMode) (hr madmin.HealResultItem, err error) {
|
||||
// Create context that also contains information about the object and bucket.
|
||||
// The top level handler might not have this information.
|
||||
reqInfo := logger.GetReqInfo(ctx)
|
||||
@ -670,5 +671,5 @@ func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRu
|
||||
defer objectLock.RUnlock()
|
||||
|
||||
// Heal the object.
|
||||
return healObject(healCtx, xl.getDisks(), bucket, object, latestXLMeta.Erasure.DataBlocks, dryRun)
|
||||
return healObject(healCtx, xl.getDisks(), bucket, object, latestXLMeta.Erasure.DataBlocks, dryRun, scanMode)
|
||||
}
|
||||
|
@ -21,6 +21,8 @@ import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/minio/minio/pkg/madmin"
|
||||
)
|
||||
|
||||
// Tests undoes and validates if the undoing completes successfully.
|
||||
@ -114,7 +116,7 @@ func TestHealObjectXL(t *testing.T) {
|
||||
t.Fatalf("Failed to delete a file - %v", err)
|
||||
}
|
||||
|
||||
_, err = obj.HealObject(context.Background(), bucket, object, false, false)
|
||||
_, err = obj.HealObject(context.Background(), bucket, object, false, false, madmin.HealNormalScan)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to heal object - %v", err)
|
||||
}
|
||||
@ -130,7 +132,7 @@ func TestHealObjectXL(t *testing.T) {
|
||||
}
|
||||
|
||||
// Try healing now, expect to receive errDiskNotFound.
|
||||
_, err = obj.HealObject(context.Background(), bucket, object, false, false)
|
||||
_, err = obj.HealObject(context.Background(), bucket, object, false, false, madmin.HealDeepScan)
|
||||
// since majority of xl.jsons are not available, object quorum can't be read properly and error will be errXLReadQuorum
|
||||
if _, ok := err.(InsufficientReadQuorum); !ok {
|
||||
t.Errorf("Expected %v but received %v", InsufficientReadQuorum{}, err)
|
||||
|
@ -28,6 +28,7 @@ import (
|
||||
"time"
|
||||
|
||||
humanize "github.com/dustin/go-humanize"
|
||||
"github.com/minio/minio/pkg/madmin"
|
||||
)
|
||||
|
||||
func TestRepeatPutObjectPart(t *testing.T) {
|
||||
@ -308,7 +309,7 @@ func TestHealing(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, err = xl.HealObject(context.Background(), bucket, object, false, false)
|
||||
_, err = xl.HealObject(context.Background(), bucket, object, false, false, madmin.HealNormalScan)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -337,7 +338,7 @@ func TestHealing(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, err = xl.HealObject(context.Background(), bucket, object, false, false)
|
||||
_, err = xl.HealObject(context.Background(), bucket, object, false, false, madmin.HealDeepScan)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -26,11 +26,22 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// HealScanMode represents the type of healing scan
|
||||
type HealScanMode int
|
||||
|
||||
const (
|
||||
// HealNormalScan checks if parts are present and not outdated
|
||||
HealNormalScan HealScanMode = iota
|
||||
// HealDeepScan checks for parts bitrot checksums
|
||||
HealDeepScan
|
||||
)
|
||||
|
||||
// HealOpts - collection of options for a heal sequence
|
||||
type HealOpts struct {
|
||||
Recursive bool `json:"recursive"`
|
||||
DryRun bool `json:"dryRun"`
|
||||
Remove bool `json:"remove"`
|
||||
Recursive bool `json:"recursive"`
|
||||
DryRun bool `json:"dryRun"`
|
||||
Remove bool `json:"remove"`
|
||||
ScanMode HealScanMode `json:"scanMode"`
|
||||
}
|
||||
|
||||
// HealStartSuccess - holds information about a successfully started
|
||||
|
Loading…
Reference in New Issue
Block a user