add healing retries when there are failed heal attempts (#19986)

transient errors for long running tasks are normal, allow for
drive to retry again upto 3 times before giving up on healing
the drive.
This commit is contained in:
Harshavardhana
2024-06-25 10:32:56 -07:00
committed by GitHub
parent 41f508765d
commit 22c5a5b91b
2 changed files with 55 additions and 7 deletions

View File

@@ -200,6 +200,12 @@ func (z *healingTracker) DecodeMsg(dc *msgp.Reader) (err error) {
err = msgp.WrapError(err, "BytesSkipped")
return
}
case "RetryAttempts":
z.RetryAttempts, err = dc.ReadUint64()
if err != nil {
err = msgp.WrapError(err, "RetryAttempts")
return
}
default:
err = dc.Skip()
if err != nil {
@@ -213,9 +219,9 @@ func (z *healingTracker) DecodeMsg(dc *msgp.Reader) (err error) {
// EncodeMsg implements msgp.Encodable
func (z *healingTracker) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 25
// map header, size 26
// write "ID"
err = en.Append(0xde, 0x0, 0x19, 0xa2, 0x49, 0x44)
err = en.Append(0xde, 0x0, 0x1a, 0xa2, 0x49, 0x44)
if err != nil {
return
}
@@ -478,15 +484,25 @@ func (z *healingTracker) EncodeMsg(en *msgp.Writer) (err error) {
err = msgp.WrapError(err, "BytesSkipped")
return
}
// write "RetryAttempts"
err = en.Append(0xad, 0x52, 0x65, 0x74, 0x72, 0x79, 0x41, 0x74, 0x74, 0x65, 0x6d, 0x70, 0x74, 0x73)
if err != nil {
return
}
err = en.WriteUint64(z.RetryAttempts)
if err != nil {
err = msgp.WrapError(err, "RetryAttempts")
return
}
return
}
// MarshalMsg implements msgp.Marshaler
func (z *healingTracker) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 25
// map header, size 26
// string "ID"
o = append(o, 0xde, 0x0, 0x19, 0xa2, 0x49, 0x44)
o = append(o, 0xde, 0x0, 0x1a, 0xa2, 0x49, 0x44)
o = msgp.AppendString(o, z.ID)
// string "PoolIndex"
o = append(o, 0xa9, 0x50, 0x6f, 0x6f, 0x6c, 0x49, 0x6e, 0x64, 0x65, 0x78)
@@ -566,6 +582,9 @@ func (z *healingTracker) MarshalMsg(b []byte) (o []byte, err error) {
// string "BytesSkipped"
o = append(o, 0xac, 0x42, 0x79, 0x74, 0x65, 0x73, 0x53, 0x6b, 0x69, 0x70, 0x70, 0x65, 0x64)
o = msgp.AppendUint64(o, z.BytesSkipped)
// string "RetryAttempts"
o = append(o, 0xad, 0x52, 0x65, 0x74, 0x72, 0x79, 0x41, 0x74, 0x74, 0x65, 0x6d, 0x70, 0x74, 0x73)
o = msgp.AppendUint64(o, z.RetryAttempts)
return
}
@@ -763,6 +782,12 @@ func (z *healingTracker) UnmarshalMsg(bts []byte) (o []byte, err error) {
err = msgp.WrapError(err, "BytesSkipped")
return
}
case "RetryAttempts":
z.RetryAttempts, bts, err = msgp.ReadUint64Bytes(bts)
if err != nil {
err = msgp.WrapError(err, "RetryAttempts")
return
}
default:
bts, err = msgp.Skip(bts)
if err != nil {
@@ -785,6 +810,6 @@ func (z *healingTracker) Msgsize() (s int) {
for za0002 := range z.HealedBuckets {
s += msgp.StringPrefixSize + len(z.HealedBuckets[za0002])
}
s += 7 + msgp.StringPrefixSize + len(z.HealID) + 13 + msgp.Uint64Size + 13 + msgp.Uint64Size
s += 7 + msgp.StringPrefixSize + len(z.HealID) + 13 + msgp.Uint64Size + 13 + msgp.Uint64Size + 14 + msgp.Uint64Size
return
}