mirror of
https://github.com/minio/minio.git
synced 2025-01-11 23:13:23 -05:00
fix: heal only once per disk per set among multiple disks (#16358)
This commit is contained in:
parent
1c85652cff
commit
0333412148
@ -79,6 +79,10 @@ type healingTracker struct {
|
|||||||
|
|
||||||
// Filled during heal.
|
// Filled during heal.
|
||||||
HealedBuckets []string
|
HealedBuckets []string
|
||||||
|
|
||||||
|
// ID of the current healing operation
|
||||||
|
HealID string
|
||||||
|
|
||||||
// Add future tracking capabilities
|
// Add future tracking capabilities
|
||||||
// Be sure that they are included in toHealingDisk
|
// Be sure that they are included in toHealingDisk
|
||||||
}
|
}
|
||||||
@ -112,11 +116,12 @@ func loadHealingTracker(ctx context.Context, disk StorageAPI) (*healingTracker,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// newHealingTracker will create a new healing tracker for the disk.
|
// newHealingTracker will create a new healing tracker for the disk.
|
||||||
func newHealingTracker(disk StorageAPI) *healingTracker {
|
func newHealingTracker(disk StorageAPI, healID string) *healingTracker {
|
||||||
diskID, _ := disk.GetDiskID()
|
diskID, _ := disk.GetDiskID()
|
||||||
h := healingTracker{
|
h := healingTracker{
|
||||||
disk: disk,
|
disk: disk,
|
||||||
ID: diskID,
|
ID: diskID,
|
||||||
|
HealID: healID,
|
||||||
Path: disk.String(),
|
Path: disk.String(),
|
||||||
Endpoint: disk.Endpoint().String(),
|
Endpoint: disk.Endpoint().String(),
|
||||||
Started: time.Now().UTC(),
|
Started: time.Now().UTC(),
|
||||||
@ -227,6 +232,7 @@ func (h *healingTracker) printTo(writer io.Writer) {
|
|||||||
func (h *healingTracker) toHealingDisk() madmin.HealingDisk {
|
func (h *healingTracker) toHealingDisk() madmin.HealingDisk {
|
||||||
return madmin.HealingDisk{
|
return madmin.HealingDisk{
|
||||||
ID: h.ID,
|
ID: h.ID,
|
||||||
|
HealID: h.HealID,
|
||||||
Endpoint: h.Endpoint,
|
Endpoint: h.Endpoint,
|
||||||
PoolIndex: h.PoolIndex,
|
PoolIndex: h.PoolIndex,
|
||||||
SetIndex: h.SetIndex,
|
SetIndex: h.SetIndex,
|
||||||
@ -286,8 +292,6 @@ func getLocalDisksToHeal() (disksToHeal Endpoints) {
|
|||||||
var newDiskHealingTimeout = newDynamicTimeout(30*time.Second, 10*time.Second)
|
var newDiskHealingTimeout = newDynamicTimeout(30*time.Second, 10*time.Second)
|
||||||
|
|
||||||
func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint) error {
|
func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint) error {
|
||||||
logger.Info(fmt.Sprintf("Proceeding to heal '%s' - 'mc admin heal alias/ --verbose' to check the status.", endpoint))
|
|
||||||
|
|
||||||
disk, format, err := connectEndpoint(endpoint)
|
disk, format, err := connectEndpoint(endpoint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Error: %w, %s", err, endpoint)
|
return fmt.Errorf("Error: %w, %s", err, endpoint)
|
||||||
@ -318,6 +322,20 @@ func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint
|
|||||||
ctx = lkctx.Context()
|
ctx = lkctx.Context()
|
||||||
defer locker.Unlock(lkctx)
|
defer locker.Unlock(lkctx)
|
||||||
|
|
||||||
|
// Load healing tracker in this disk
|
||||||
|
tracker, err := loadHealingTracker(ctx, disk)
|
||||||
|
if err != nil {
|
||||||
|
// A healing track can be not found when another disk in the same
|
||||||
|
// erasure set and same healing-id successfully finished healing.
|
||||||
|
if err == errFileNotFound {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
logger.LogIf(ctx, fmt.Errorf("Unable to load a healing tracker on '%s': %w", disk, err))
|
||||||
|
tracker = newHealingTracker(disk, mustGetUUID())
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Info(fmt.Sprintf("Proceeding to heal '%s' - 'mc admin heal alias/ --verbose' to check the status.", endpoint))
|
||||||
|
|
||||||
buckets, _ := z.ListBuckets(ctx, BucketOptions{})
|
buckets, _ := z.ListBuckets(ctx, BucketOptions{})
|
||||||
// Buckets data are dispersed in multiple zones/sets, make
|
// Buckets data are dispersed in multiple zones/sets, make
|
||||||
// sure to heal all bucket metadata configuration.
|
// sure to heal all bucket metadata configuration.
|
||||||
@ -340,15 +358,6 @@ func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint
|
|||||||
logger.Info("Healing drive '%v' on %s pool", disk, humanize.Ordinal(poolIdx+1))
|
logger.Info("Healing drive '%v' on %s pool", disk, humanize.Ordinal(poolIdx+1))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load healing tracker in this disk
|
|
||||||
tracker, err := loadHealingTracker(ctx, disk)
|
|
||||||
if err != nil {
|
|
||||||
// So someone changed the drives underneath, healing tracker missing.
|
|
||||||
logger.LogIf(ctx, fmt.Errorf("Healing tracker missing on '%s', drive was swapped again on %s pool: %w",
|
|
||||||
disk, humanize.Ordinal(poolIdx+1), err))
|
|
||||||
tracker = newHealingTracker(disk)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load bucket totals
|
// Load bucket totals
|
||||||
cache := dataUsageCache{}
|
cache := dataUsageCache{}
|
||||||
if err := cache.load(ctx, z.serverPools[poolIdx].sets[setIdx], dataUsageCacheName); err == nil {
|
if err := cache.load(ctx, z.serverPools[poolIdx].sets[setIdx], dataUsageCacheName); err == nil {
|
||||||
@ -379,7 +388,24 @@ func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint
|
|||||||
logger.Info("\n")
|
logger.Info("\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if tracker.HealID == "" { // HealID is empty only before Feb 2023
|
||||||
logger.LogIf(ctx, tracker.delete(ctx))
|
logger.LogIf(ctx, tracker.delete(ctx))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove .healing.bin from all disks with similar heal-id
|
||||||
|
for _, disk := range z.serverPools[poolIdx].sets[setIdx].getDisks() {
|
||||||
|
t, err := loadHealingTracker(ctx, disk)
|
||||||
|
if err != nil {
|
||||||
|
if err != errFileNotFound {
|
||||||
|
logger.LogIf(ctx, err)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if t.HealID == tracker.HealID {
|
||||||
|
t.delete(ctx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -182,6 +182,12 @@ func (z *healingTracker) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case "HealID":
|
||||||
|
z.HealID, err = dc.ReadString()
|
||||||
|
if err != nil {
|
||||||
|
err = msgp.WrapError(err, "HealID")
|
||||||
|
return
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
err = dc.Skip()
|
err = dc.Skip()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -195,9 +201,9 @@ func (z *healingTracker) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||||||
|
|
||||||
// EncodeMsg implements msgp.Encodable
|
// EncodeMsg implements msgp.Encodable
|
||||||
func (z *healingTracker) EncodeMsg(en *msgp.Writer) (err error) {
|
func (z *healingTracker) EncodeMsg(en *msgp.Writer) (err error) {
|
||||||
// map header, size 22
|
// map header, size 23
|
||||||
// write "ID"
|
// write "ID"
|
||||||
err = en.Append(0xde, 0x0, 0x16, 0xa2, 0x49, 0x44)
|
err = en.Append(0xde, 0x0, 0x17, 0xa2, 0x49, 0x44)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -430,15 +436,25 @@ func (z *healingTracker) EncodeMsg(en *msgp.Writer) (err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// write "HealID"
|
||||||
|
err = en.Append(0xa6, 0x48, 0x65, 0x61, 0x6c, 0x49, 0x44)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
err = en.WriteString(z.HealID)
|
||||||
|
if err != nil {
|
||||||
|
err = msgp.WrapError(err, "HealID")
|
||||||
|
return
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// MarshalMsg implements msgp.Marshaler
|
// MarshalMsg implements msgp.Marshaler
|
||||||
func (z *healingTracker) MarshalMsg(b []byte) (o []byte, err error) {
|
func (z *healingTracker) MarshalMsg(b []byte) (o []byte, err error) {
|
||||||
o = msgp.Require(b, z.Msgsize())
|
o = msgp.Require(b, z.Msgsize())
|
||||||
// map header, size 22
|
// map header, size 23
|
||||||
// string "ID"
|
// string "ID"
|
||||||
o = append(o, 0xde, 0x0, 0x16, 0xa2, 0x49, 0x44)
|
o = append(o, 0xde, 0x0, 0x17, 0xa2, 0x49, 0x44)
|
||||||
o = msgp.AppendString(o, z.ID)
|
o = msgp.AppendString(o, z.ID)
|
||||||
// string "PoolIndex"
|
// string "PoolIndex"
|
||||||
o = append(o, 0xa9, 0x50, 0x6f, 0x6f, 0x6c, 0x49, 0x6e, 0x64, 0x65, 0x78)
|
o = append(o, 0xa9, 0x50, 0x6f, 0x6f, 0x6c, 0x49, 0x6e, 0x64, 0x65, 0x78)
|
||||||
@ -509,6 +525,9 @@ func (z *healingTracker) MarshalMsg(b []byte) (o []byte, err error) {
|
|||||||
for za0002 := range z.HealedBuckets {
|
for za0002 := range z.HealedBuckets {
|
||||||
o = msgp.AppendString(o, z.HealedBuckets[za0002])
|
o = msgp.AppendString(o, z.HealedBuckets[za0002])
|
||||||
}
|
}
|
||||||
|
// string "HealID"
|
||||||
|
o = append(o, 0xa6, 0x48, 0x65, 0x61, 0x6c, 0x49, 0x44)
|
||||||
|
o = msgp.AppendString(o, z.HealID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -688,6 +707,12 @@ func (z *healingTracker) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case "HealID":
|
||||||
|
z.HealID, bts, err = msgp.ReadStringBytes(bts)
|
||||||
|
if err != nil {
|
||||||
|
err = msgp.WrapError(err, "HealID")
|
||||||
|
return
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
bts, err = msgp.Skip(bts)
|
bts, err = msgp.Skip(bts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -710,5 +735,6 @@ func (z *healingTracker) Msgsize() (s int) {
|
|||||||
for za0002 := range z.HealedBuckets {
|
for za0002 := range z.HealedBuckets {
|
||||||
s += msgp.StringPrefixSize + len(z.HealedBuckets[za0002])
|
s += msgp.StringPrefixSize + len(z.HealedBuckets[za0002])
|
||||||
}
|
}
|
||||||
|
s += 7 + msgp.StringPrefixSize + len(z.HealID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -1149,6 +1149,8 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
|
|||||||
return res, errNoHealRequired
|
return res, errNoHealRequired
|
||||||
}
|
}
|
||||||
|
|
||||||
|
formatOpID := mustGetUUID()
|
||||||
|
|
||||||
// Initialize a new set of set formats which will be written to disk.
|
// Initialize a new set of set formats which will be written to disk.
|
||||||
newFormatSets := newHealFormatSets(refFormat, s.setCount, s.setDriveCount, formats, sErrs)
|
newFormatSets := newHealFormatSets(refFormat, s.setCount, s.setDriveCount, formats, sErrs)
|
||||||
|
|
||||||
@ -1170,7 +1172,7 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
|
|||||||
if storageDisks[index] == nil || format == nil {
|
if storageDisks[index] == nil || format == nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if err := saveFormatErasure(storageDisks[index], format, true); err != nil {
|
if err := saveFormatErasure(storageDisks[index], format, formatOpID); err != nil {
|
||||||
logger.LogIf(ctx, fmt.Errorf("Drive %s failed to write updated 'format.json': %v", storageDisks[index], err))
|
logger.LogIf(ctx, fmt.Errorf("Drive %s failed to write updated 'format.json': %v", storageDisks[index], err))
|
||||||
tmpNewFormats[index] = nil // this disk failed to write new format
|
tmpNewFormats[index] = nil // this disk failed to write new format
|
||||||
}
|
}
|
||||||
|
@ -347,7 +347,7 @@ func loadFormatErasureAll(storageDisks []StorageAPI, heal bool) ([]*formatErasur
|
|||||||
return formats, g.Wait()
|
return formats, g.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func saveFormatErasure(disk StorageAPI, format *formatErasureV3, heal bool) error {
|
func saveFormatErasure(disk StorageAPI, format *formatErasureV3, healID string) error {
|
||||||
if disk == nil || format == nil {
|
if disk == nil || format == nil {
|
||||||
return errDiskNotFound
|
return errDiskNotFound
|
||||||
}
|
}
|
||||||
@ -383,9 +383,9 @@ func saveFormatErasure(disk StorageAPI, format *formatErasureV3, heal bool) erro
|
|||||||
}
|
}
|
||||||
|
|
||||||
disk.SetDiskID(diskID)
|
disk.SetDiskID(diskID)
|
||||||
if heal {
|
if healID != "" {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
ht := newHealingTracker(disk)
|
ht := newHealingTracker(disk, healID)
|
||||||
return ht.save(ctx)
|
return ht.save(ctx)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
@ -541,7 +541,7 @@ func formatErasureFixLocalDeploymentID(endpoints Endpoints, storageDisks []Stora
|
|||||||
}
|
}
|
||||||
format.ID = refFormat.ID
|
format.ID = refFormat.ID
|
||||||
// Heal the drive if we fixed its deployment ID.
|
// Heal the drive if we fixed its deployment ID.
|
||||||
if err := saveFormatErasure(storageDisks[index], format, true); err != nil {
|
if err := saveFormatErasure(storageDisks[index], format, mustGetUUID()); err != nil {
|
||||||
logger.LogIf(GlobalContext, err)
|
logger.LogIf(GlobalContext, err)
|
||||||
return fmt.Errorf("Unable to save format.json, %w", err)
|
return fmt.Errorf("Unable to save format.json, %w", err)
|
||||||
}
|
}
|
||||||
@ -642,7 +642,7 @@ func saveFormatErasureAll(ctx context.Context, storageDisks []StorageAPI, format
|
|||||||
if formats[index] == nil {
|
if formats[index] == nil {
|
||||||
return errDiskNotFound
|
return errDiskNotFound
|
||||||
}
|
}
|
||||||
return saveFormatErasure(storageDisks[index], formats[index], false)
|
return saveFormatErasure(storageDisks[index], formats[index], "")
|
||||||
}, index)
|
}, index)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -722,7 +722,7 @@ func fixFormatErasureV3(storageDisks []StorageAPI, endpoints Endpoints, formats
|
|||||||
if formats[i].Erasure.This == "" {
|
if formats[i].Erasure.This == "" {
|
||||||
formats[i].Erasure.This = formats[i].Erasure.Sets[0][i]
|
formats[i].Erasure.This = formats[i].Erasure.Sets[0][i]
|
||||||
// Heal the drive if drive has .This empty.
|
// Heal the drive if drive has .This empty.
|
||||||
if err := saveFormatErasure(storageDisks[i], formats[i], true); err != nil {
|
if err := saveFormatErasure(storageDisks[i], formats[i], mustGetUUID()); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
2
go.mod
2
go.mod
@ -49,7 +49,7 @@ require (
|
|||||||
github.com/minio/dperf v0.4.2
|
github.com/minio/dperf v0.4.2
|
||||||
github.com/minio/highwayhash v1.0.2
|
github.com/minio/highwayhash v1.0.2
|
||||||
github.com/minio/kes v0.22.2
|
github.com/minio/kes v0.22.2
|
||||||
github.com/minio/madmin-go/v2 v2.0.3
|
github.com/minio/madmin-go/v2 v2.0.5
|
||||||
github.com/minio/minio-go/v7 v7.0.45
|
github.com/minio/minio-go/v7 v7.0.45
|
||||||
github.com/minio/pkg v1.5.8
|
github.com/minio/pkg v1.5.8
|
||||||
github.com/minio/selfupdate v0.5.0
|
github.com/minio/selfupdate v0.5.0
|
||||||
|
4
go.sum
4
go.sum
@ -770,8 +770,8 @@ github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLT
|
|||||||
github.com/minio/kes v0.22.2 h1:9NdgTx+TFJco0Pqdrq8WZbrTZVv0ichg+sbPRQiJ2HU=
|
github.com/minio/kes v0.22.2 h1:9NdgTx+TFJco0Pqdrq8WZbrTZVv0ichg+sbPRQiJ2HU=
|
||||||
github.com/minio/kes v0.22.2/go.mod h1:J9sD6Pe8obPt7+JXFcznkWaYaj9pBWCfN9U9j//NsNw=
|
github.com/minio/kes v0.22.2/go.mod h1:J9sD6Pe8obPt7+JXFcznkWaYaj9pBWCfN9U9j//NsNw=
|
||||||
github.com/minio/madmin-go v1.6.6/go.mod h1:ATvkBOLiP3av4D++2v1UEHC/QzsGtgXD5kYvvRYzdKs=
|
github.com/minio/madmin-go v1.6.6/go.mod h1:ATvkBOLiP3av4D++2v1UEHC/QzsGtgXD5kYvvRYzdKs=
|
||||||
github.com/minio/madmin-go/v2 v2.0.3 h1:Q8qco+JrbRIim25tGrs0enVRJGoIMUHfULa5nJoSiqM=
|
github.com/minio/madmin-go/v2 v2.0.5 h1:W0dY4enDYdIegTcIQSkdtzvvyQpZtEn6bft5JMb/wYA=
|
||||||
github.com/minio/madmin-go/v2 v2.0.3/go.mod h1:5aFi/VLWBHC2DEFfGIlUmAeJhaF4ZAjuYpEWZFU14Zw=
|
github.com/minio/madmin-go/v2 v2.0.5/go.mod h1:5aFi/VLWBHC2DEFfGIlUmAeJhaF4ZAjuYpEWZFU14Zw=
|
||||||
github.com/minio/mc v0.0.0-20221224152138-176072dee43d h1:etzZIWQ3NFrxzwnvjczETWMcgoja9ZKLFLIfQzvpqP8=
|
github.com/minio/mc v0.0.0-20221224152138-176072dee43d h1:etzZIWQ3NFrxzwnvjczETWMcgoja9ZKLFLIfQzvpqP8=
|
||||||
github.com/minio/mc v0.0.0-20221224152138-176072dee43d/go.mod h1:af4hDQUHwu8az+6TyEKXa2Yd+lvMDVgnc9/kstHPZY8=
|
github.com/minio/mc v0.0.0-20221224152138-176072dee43d/go.mod h1:af4hDQUHwu8az+6TyEKXa2Yd+lvMDVgnc9/kstHPZY8=
|
||||||
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
|
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
|
||||||
|
Loading…
Reference in New Issue
Block a user