mirror of
https://github.com/minio/minio.git
synced 2025-04-04 03:40:30 -04:00
Add normal/deep type of heal scanning (#7251)
Healing scan used to read all objects parts to check for bitrot checksum. This commit will add a quicker way of healing scan by only checking if parts are actually present in disks or not.
This commit is contained in:
parent
233824bf92
commit
facbd653ba
@ -575,7 +575,7 @@ func (h *healSequence) healMinioSysMeta(metaPrefix string) func() error {
|
|||||||
if h.isQuitting() {
|
if h.isQuitting() {
|
||||||
return errHealStopSignalled
|
return errHealStopSignalled
|
||||||
}
|
}
|
||||||
res, herr := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove)
|
res, herr := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove, h.settings.ScanMode)
|
||||||
// Object might have been deleted, by the time heal
|
// Object might have been deleted, by the time heal
|
||||||
// was attempted we ignore this object an move on.
|
// was attempted we ignore this object an move on.
|
||||||
if isErrObjectNotFound(herr) {
|
if isErrObjectNotFound(herr) {
|
||||||
@ -718,7 +718,7 @@ func (h *healSequence) healObject(bucket, object string) error {
|
|||||||
return errServerNotInitialized
|
return errServerNotInitialized
|
||||||
}
|
}
|
||||||
|
|
||||||
hri, err := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove)
|
hri, err := objectAPI.HealObject(h.ctx, bucket, object, h.settings.DryRun, h.settings.Remove, h.settings.ScanMode)
|
||||||
if isErrObjectNotFound(err) {
|
if isErrObjectNotFound(err) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -1240,7 +1240,7 @@ func (fs *FSObjects) HealFormat(ctx context.Context, dryRun bool) (madmin.HealRe
|
|||||||
}
|
}
|
||||||
|
|
||||||
// HealObject - no-op for fs. Valid only for XL.
|
// HealObject - no-op for fs. Valid only for XL.
|
||||||
func (fs *FSObjects) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (
|
func (fs *FSObjects) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (
|
||||||
res madmin.HealResultItem, err error) {
|
res madmin.HealResultItem, err error) {
|
||||||
logger.LogIf(ctx, NotImplemented{})
|
logger.LogIf(ctx, NotImplemented{})
|
||||||
return res, NotImplemented{}
|
return res, NotImplemented{}
|
||||||
|
@ -22,6 +22,8 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/minio/minio/pkg/madmin"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Tests for if parent directory is object
|
// Tests for if parent directory is object
|
||||||
@ -390,7 +392,7 @@ func TestFSHealObject(t *testing.T) {
|
|||||||
defer os.RemoveAll(disk)
|
defer os.RemoveAll(disk)
|
||||||
|
|
||||||
obj := initFSObjects(disk, t)
|
obj := initFSObjects(disk, t)
|
||||||
_, err := obj.HealObject(context.Background(), "bucket", "object", false, false)
|
_, err := obj.HealObject(context.Background(), "bucket", "object", false, false, madmin.HealDeepScan)
|
||||||
if err == nil || !isSameType(err, NotImplemented{}) {
|
if err == nil || !isSameType(err, NotImplemented{}) {
|
||||||
t.Fatalf("Heal Object should return NotImplemented error ")
|
t.Fatalf("Heal Object should return NotImplemented error ")
|
||||||
}
|
}
|
||||||
|
@ -102,7 +102,7 @@ func (a GatewayUnsupported) ListBucketsHeal(ctx context.Context) (buckets []Buck
|
|||||||
}
|
}
|
||||||
|
|
||||||
// HealObject - Not implemented stub
|
// HealObject - Not implemented stub
|
||||||
func (a GatewayUnsupported) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (h madmin.HealResultItem, e error) {
|
func (a GatewayUnsupported) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (h madmin.HealResultItem, e error) {
|
||||||
return h, NotImplemented{}
|
return h, NotImplemented{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,7 +88,7 @@ type ObjectLayer interface {
|
|||||||
ReloadFormat(ctx context.Context, dryRun bool) error
|
ReloadFormat(ctx context.Context, dryRun bool) error
|
||||||
HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error)
|
HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error)
|
||||||
HealBucket(ctx context.Context, bucket string, dryRun, remove bool) (madmin.HealResultItem, error)
|
HealBucket(ctx context.Context, bucket string, dryRun, remove bool) (madmin.HealResultItem, error)
|
||||||
HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (madmin.HealResultItem, error)
|
HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (madmin.HealResultItem, error)
|
||||||
ListBucketsHeal(ctx context.Context) (buckets []BucketInfo, err error)
|
ListBucketsHeal(ctx context.Context) (buckets []BucketInfo, err error)
|
||||||
HealObjects(ctx context.Context, bucket, prefix string, healObjectFn func(string, string) error) error
|
HealObjects(ctx context.Context, bucket, prefix string, healObjectFn func(string, string) error) error
|
||||||
|
|
||||||
|
@ -1296,8 +1296,8 @@ func (s *xlSets) HealBucket(ctx context.Context, bucket string, dryRun, remove b
|
|||||||
}
|
}
|
||||||
|
|
||||||
// HealObject - heals inconsistent object on a hashedSet based on object name.
|
// HealObject - heals inconsistent object on a hashedSet based on object name.
|
||||||
func (s *xlSets) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool) (madmin.HealResultItem, error) {
|
func (s *xlSets) HealObject(ctx context.Context, bucket, object string, dryRun, remove bool, scanMode madmin.HealScanMode) (madmin.HealResultItem, error) {
|
||||||
return s.getHashedSet(object).HealObject(ctx, bucket, object, dryRun, remove)
|
return s.getHashedSet(object).HealObject(ctx, bucket, object, dryRun, remove, scanMode)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Lists all buckets which need healing.
|
// Lists all buckets which need healing.
|
||||||
|
@ -22,6 +22,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/minio/minio/cmd/logger"
|
"github.com/minio/minio/cmd/logger"
|
||||||
|
"github.com/minio/minio/pkg/madmin"
|
||||||
)
|
)
|
||||||
|
|
||||||
// commonTime returns a maximally occurring time from a list of time.
|
// commonTime returns a maximally occurring time from a list of time.
|
||||||
@ -158,7 +159,7 @@ func getLatestXLMeta(ctx context.Context, partsMetadata []xlMetaV1, errs []error
|
|||||||
// - slice of errors about the state of data files on disk - can have
|
// - slice of errors about the state of data files on disk - can have
|
||||||
// a not-found error or a hash-mismatch error.
|
// a not-found error or a hash-mismatch error.
|
||||||
func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket,
|
func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket,
|
||||||
object string) ([]StorageAPI, []error) {
|
object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) {
|
||||||
availableDisks := make([]StorageAPI, len(onlineDisks))
|
availableDisks := make([]StorageAPI, len(onlineDisks))
|
||||||
dataErrs := make([]error, len(onlineDisks))
|
dataErrs := make([]error, len(onlineDisks))
|
||||||
|
|
||||||
@ -168,27 +169,38 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
erasureInfo := partsMetadata[i].Erasure
|
switch scanMode {
|
||||||
erasure, err := NewErasure(ctx, erasureInfo.DataBlocks, erasureInfo.ParityBlocks, erasureInfo.BlockSize)
|
case madmin.HealDeepScan:
|
||||||
if err != nil {
|
erasureInfo := partsMetadata[i].Erasure
|
||||||
dataErrs[i] = err
|
erasure, err := NewErasure(ctx, erasureInfo.DataBlocks, erasureInfo.ParityBlocks, erasureInfo.BlockSize)
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// disk has a valid xl.json but may not have all the
|
|
||||||
// parts. This is considered an outdated disk, since
|
|
||||||
// it needs healing too.
|
|
||||||
for _, part := range partsMetadata[i].Parts {
|
|
||||||
checksumInfo := erasureInfo.GetChecksumInfo(part.Name)
|
|
||||||
tillOffset := erasure.ShardFileTillOffset(0, part.Size, part.Size)
|
|
||||||
err = bitrotCheckFile(onlineDisk, bucket, pathJoin(object, part.Name), tillOffset, checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
isCorrupt := strings.HasPrefix(err.Error(), "Bitrot verification mismatch - expected ")
|
|
||||||
if !isCorrupt && err != errFileNotFound && err != errVolumeNotFound {
|
|
||||||
logger.LogIf(ctx, err)
|
|
||||||
}
|
|
||||||
dataErrs[i] = err
|
dataErrs[i] = err
|
||||||
break
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// disk has a valid xl.json but may not have all the
|
||||||
|
// parts. This is considered an outdated disk, since
|
||||||
|
// it needs healing too.
|
||||||
|
for _, part := range partsMetadata[i].Parts {
|
||||||
|
checksumInfo := erasureInfo.GetChecksumInfo(part.Name)
|
||||||
|
tillOffset := erasure.ShardFileTillOffset(0, part.Size, part.Size)
|
||||||
|
err = bitrotCheckFile(onlineDisk, bucket, pathJoin(object, part.Name), tillOffset, checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
|
||||||
|
if err != nil {
|
||||||
|
isCorrupt := strings.HasPrefix(err.Error(), "Bitrot verification mismatch - expected ")
|
||||||
|
if !isCorrupt && err != errFileNotFound && err != errVolumeNotFound {
|
||||||
|
logger.LogIf(ctx, err)
|
||||||
|
}
|
||||||
|
dataErrs[i] = err
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case madmin.HealNormalScan:
|
||||||
|
for _, part := range partsMetadata[i].Parts {
|
||||||
|
_, err := onlineDisk.StatFile(bucket, pathJoin(object, part.Name))
|
||||||
|
if err != nil {
|
||||||
|
dataErrs[i] = err
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,6 +23,8 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/minio/minio/pkg/madmin"
|
||||||
)
|
)
|
||||||
|
|
||||||
// validates functionality provided to find most common
|
// validates functionality provided to find most common
|
||||||
@ -239,7 +241,7 @@ func TestListOnlineDisks(t *testing.T) {
|
|||||||
i+1, test.expectedTime, modTime)
|
i+1, test.expectedTime, modTime)
|
||||||
}
|
}
|
||||||
|
|
||||||
availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object)
|
availableDisks, newErrs := disksWithAllParts(context.Background(), onlineDisks, partsMetadata, test.errs, bucket, object, madmin.HealDeepScan)
|
||||||
test.errs = newErrs
|
test.errs = newErrs
|
||||||
|
|
||||||
if test._tamperBackend != noTamper {
|
if test._tamperBackend != noTamper {
|
||||||
@ -291,7 +293,7 @@ func TestDisksWithAllParts(t *testing.T) {
|
|||||||
t.Fatalf("Failed to read xl meta data %v", err)
|
t.Fatalf("Failed to read xl meta data %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
|
filteredDisks, errs := disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan)
|
||||||
|
|
||||||
if len(filteredDisks) != len(xlDisks) {
|
if len(filteredDisks) != len(xlDisks) {
|
||||||
t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
|
t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
|
||||||
@ -328,7 +330,7 @@ func TestDisksWithAllParts(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
errs = make([]error, len(xlDisks))
|
errs = make([]error, len(xlDisks))
|
||||||
filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object)
|
filteredDisks, errs = disksWithAllParts(ctx, xlDisks, partsMetadata, errs, bucket, object, madmin.HealDeepScan)
|
||||||
|
|
||||||
if len(filteredDisks) != len(xlDisks) {
|
if len(filteredDisks) != len(xlDisks) {
|
||||||
t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
|
t.Errorf("Unexpected number of disks: %d", len(filteredDisks))
|
||||||
|
@ -208,7 +208,8 @@ func shouldHealObjectOnDisk(xlErr, dataErr error, meta xlMetaV1, quorumModTime t
|
|||||||
|
|
||||||
// Heals an object by re-writing corrupt/missing erasure blocks.
|
// Heals an object by re-writing corrupt/missing erasure blocks.
|
||||||
func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, object string,
|
func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, object string,
|
||||||
quorum int, dryRun bool) (result madmin.HealResultItem, err error) {
|
quorum int, dryRun bool, scanMode madmin.HealScanMode) (result madmin.HealResultItem, err error) {
|
||||||
|
|
||||||
partsMetadata, errs := readAllXLMetadata(ctx, storageDisks, bucket, object)
|
partsMetadata, errs := readAllXLMetadata(ctx, storageDisks, bucket, object)
|
||||||
|
|
||||||
errCount := 0
|
errCount := 0
|
||||||
@ -232,7 +233,7 @@ func healObject(ctx context.Context, storageDisks []StorageAPI, bucket string, o
|
|||||||
latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs)
|
latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs)
|
||||||
|
|
||||||
// List of disks having all parts as per latest xl.json.
|
// List of disks having all parts as per latest xl.json.
|
||||||
availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object)
|
availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object, scanMode)
|
||||||
|
|
||||||
// Initialize heal result object
|
// Initialize heal result object
|
||||||
result = madmin.HealResultItem{
|
result = madmin.HealResultItem{
|
||||||
@ -621,7 +622,7 @@ func (xl xlObjects) isObjectDangling(metaArr []xlMetaV1, errs []error) (validMet
|
|||||||
// FIXME: If an object object was deleted and one disk was down,
|
// FIXME: If an object object was deleted and one disk was down,
|
||||||
// and later the disk comes back up again, heal on the object
|
// and later the disk comes back up again, heal on the object
|
||||||
// should delete it.
|
// should delete it.
|
||||||
func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRun bool, remove bool) (hr madmin.HealResultItem, err error) {
|
func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRun bool, remove bool, scanMode madmin.HealScanMode) (hr madmin.HealResultItem, err error) {
|
||||||
// Create context that also contains information about the object and bucket.
|
// Create context that also contains information about the object and bucket.
|
||||||
// The top level handler might not have this information.
|
// The top level handler might not have this information.
|
||||||
reqInfo := logger.GetReqInfo(ctx)
|
reqInfo := logger.GetReqInfo(ctx)
|
||||||
@ -670,5 +671,5 @@ func (xl xlObjects) HealObject(ctx context.Context, bucket, object string, dryRu
|
|||||||
defer objectLock.RUnlock()
|
defer objectLock.RUnlock()
|
||||||
|
|
||||||
// Heal the object.
|
// Heal the object.
|
||||||
return healObject(healCtx, xl.getDisks(), bucket, object, latestXLMeta.Erasure.DataBlocks, dryRun)
|
return healObject(healCtx, xl.getDisks(), bucket, object, latestXLMeta.Erasure.DataBlocks, dryRun, scanMode)
|
||||||
}
|
}
|
||||||
|
@ -21,6 +21,8 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/minio/minio/pkg/madmin"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Tests undoes and validates if the undoing completes successfully.
|
// Tests undoes and validates if the undoing completes successfully.
|
||||||
@ -114,7 +116,7 @@ func TestHealObjectXL(t *testing.T) {
|
|||||||
t.Fatalf("Failed to delete a file - %v", err)
|
t.Fatalf("Failed to delete a file - %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = obj.HealObject(context.Background(), bucket, object, false, false)
|
_, err = obj.HealObject(context.Background(), bucket, object, false, false, madmin.HealNormalScan)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to heal object - %v", err)
|
t.Fatalf("Failed to heal object - %v", err)
|
||||||
}
|
}
|
||||||
@ -130,7 +132,7 @@ func TestHealObjectXL(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Try healing now, expect to receive errDiskNotFound.
|
// Try healing now, expect to receive errDiskNotFound.
|
||||||
_, err = obj.HealObject(context.Background(), bucket, object, false, false)
|
_, err = obj.HealObject(context.Background(), bucket, object, false, false, madmin.HealDeepScan)
|
||||||
// since majority of xl.jsons are not available, object quorum can't be read properly and error will be errXLReadQuorum
|
// since majority of xl.jsons are not available, object quorum can't be read properly and error will be errXLReadQuorum
|
||||||
if _, ok := err.(InsufficientReadQuorum); !ok {
|
if _, ok := err.(InsufficientReadQuorum); !ok {
|
||||||
t.Errorf("Expected %v but received %v", InsufficientReadQuorum{}, err)
|
t.Errorf("Expected %v but received %v", InsufficientReadQuorum{}, err)
|
||||||
|
@ -28,6 +28,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
humanize "github.com/dustin/go-humanize"
|
humanize "github.com/dustin/go-humanize"
|
||||||
|
"github.com/minio/minio/pkg/madmin"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestRepeatPutObjectPart(t *testing.T) {
|
func TestRepeatPutObjectPart(t *testing.T) {
|
||||||
@ -308,7 +309,7 @@ func TestHealing(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = xl.HealObject(context.Background(), bucket, object, false, false)
|
_, err = xl.HealObject(context.Background(), bucket, object, false, false, madmin.HealNormalScan)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -337,7 +338,7 @@ func TestHealing(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = xl.HealObject(context.Background(), bucket, object, false, false)
|
_, err = xl.HealObject(context.Background(), bucket, object, false, false, madmin.HealDeepScan)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -26,11 +26,22 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// HealScanMode represents the type of healing scan
|
||||||
|
type HealScanMode int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// HealNormalScan checks if parts are present and not outdated
|
||||||
|
HealNormalScan HealScanMode = iota
|
||||||
|
// HealDeepScan checks for parts bitrot checksums
|
||||||
|
HealDeepScan
|
||||||
|
)
|
||||||
|
|
||||||
// HealOpts - collection of options for a heal sequence
|
// HealOpts - collection of options for a heal sequence
|
||||||
type HealOpts struct {
|
type HealOpts struct {
|
||||||
Recursive bool `json:"recursive"`
|
Recursive bool `json:"recursive"`
|
||||||
DryRun bool `json:"dryRun"`
|
DryRun bool `json:"dryRun"`
|
||||||
Remove bool `json:"remove"`
|
Remove bool `json:"remove"`
|
||||||
|
ScanMode HealScanMode `json:"scanMode"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// HealStartSuccess - holds information about a successfully started
|
// HealStartSuccess - holds information about a successfully started
|
||||||
|
Loading…
x
Reference in New Issue
Block a user