mirror of
https://github.com/minio/minio.git
synced 2025-03-30 17:23:42 -04:00
heal: Reset healing params when a retry is decided (#20285)
Currently, retry healing of a new drive healing does not reset HealedBuckets means that the next healing retry will skip those buckets. The commit will fix this behavior. Also, the skipped objects counter will include objects uploaded that are uploaded after the healing is started.
This commit is contained in:
parent
2d44c161c7
commit
a8f143298f
@ -148,6 +148,26 @@ func initHealingTracker(disk StorageAPI, healID string) *healingTracker {
|
|||||||
return h
|
return h
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *healingTracker) resetHealing() {
|
||||||
|
h.mu.Lock()
|
||||||
|
defer h.mu.Unlock()
|
||||||
|
|
||||||
|
h.ItemsHealed = 0
|
||||||
|
h.ItemsFailed = 0
|
||||||
|
h.BytesDone = 0
|
||||||
|
h.BytesFailed = 0
|
||||||
|
h.ResumeItemsHealed = 0
|
||||||
|
h.ResumeItemsFailed = 0
|
||||||
|
h.ResumeBytesDone = 0
|
||||||
|
h.ResumeBytesFailed = 0
|
||||||
|
h.ItemsSkipped = 0
|
||||||
|
h.BytesSkipped = 0
|
||||||
|
|
||||||
|
h.HealedBuckets = nil
|
||||||
|
h.Object = ""
|
||||||
|
h.Bucket = ""
|
||||||
|
}
|
||||||
|
|
||||||
func (h *healingTracker) getLastUpdate() time.Time {
|
func (h *healingTracker) getLastUpdate() time.Time {
|
||||||
h.mu.RLock()
|
h.mu.RLock()
|
||||||
defer h.mu.RUnlock()
|
defer h.mu.RUnlock()
|
||||||
@ -349,6 +369,7 @@ func (h *healingTracker) toHealingDisk() madmin.HealingDisk {
|
|||||||
Object: h.Object,
|
Object: h.Object,
|
||||||
QueuedBuckets: h.QueuedBuckets,
|
QueuedBuckets: h.QueuedBuckets,
|
||||||
HealedBuckets: h.HealedBuckets,
|
HealedBuckets: h.HealedBuckets,
|
||||||
|
RetryAttempts: h.RetryAttempts,
|
||||||
|
|
||||||
ObjectsHealed: h.ItemsHealed, // Deprecated July 2021
|
ObjectsHealed: h.ItemsHealed, // Deprecated July 2021
|
||||||
ObjectsFailed: h.ItemsFailed, // Deprecated July 2021
|
ObjectsFailed: h.ItemsFailed, // Deprecated July 2021
|
||||||
@ -482,16 +503,19 @@ func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint
|
|||||||
// if objects have failed healing, we attempt a retry to heal the drive upto 3 times before giving up.
|
// if objects have failed healing, we attempt a retry to heal the drive upto 3 times before giving up.
|
||||||
if tracker.ItemsFailed > 0 && tracker.RetryAttempts < 4 {
|
if tracker.ItemsFailed > 0 && tracker.RetryAttempts < 4 {
|
||||||
tracker.RetryAttempts++
|
tracker.RetryAttempts++
|
||||||
bugLogIf(ctx, tracker.update(ctx))
|
|
||||||
|
|
||||||
healingLogEvent(ctx, "Healing of drive '%s' is incomplete, retrying %s time (healed: %d, skipped: %d, failed: %d).", disk,
|
healingLogEvent(ctx, "Healing of drive '%s' is incomplete, retrying %s time (healed: %d, skipped: %d, failed: %d).", disk,
|
||||||
humanize.Ordinal(int(tracker.RetryAttempts)), tracker.ItemsHealed, tracker.ItemsSkipped, tracker.ItemsFailed)
|
humanize.Ordinal(int(tracker.RetryAttempts)), tracker.ItemsHealed, tracker.ItemsSkipped, tracker.ItemsFailed)
|
||||||
|
|
||||||
|
tracker.resetHealing()
|
||||||
|
bugLogIf(ctx, tracker.update(ctx))
|
||||||
|
|
||||||
return errRetryHealing
|
return errRetryHealing
|
||||||
}
|
}
|
||||||
|
|
||||||
if tracker.ItemsFailed > 0 {
|
if tracker.ItemsFailed > 0 {
|
||||||
healingLogEvent(ctx, "Healing of drive '%s' is incomplete, retried %d times (healed: %d, skipped: %d, failed: %d).", disk,
|
healingLogEvent(ctx, "Healing of drive '%s' is incomplete, retried %d times (healed: %d, skipped: %d, failed: %d).", disk,
|
||||||
tracker.RetryAttempts-1, tracker.ItemsHealed, tracker.ItemsSkipped, tracker.ItemsFailed)
|
tracker.RetryAttempts, tracker.ItemsHealed, tracker.ItemsSkipped, tracker.ItemsFailed)
|
||||||
} else {
|
} else {
|
||||||
if tracker.RetryAttempts > 0 {
|
if tracker.RetryAttempts > 0 {
|
||||||
healingLogEvent(ctx, "Healing of drive '%s' is complete, retried %d times (healed: %d, skipped: %d).", disk,
|
healingLogEvent(ctx, "Healing of drive '%s' is complete, retried %d times (healed: %d, skipped: %d).", disk,
|
||||||
|
@ -167,6 +167,19 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||||||
return errServerNotInitialized
|
return errServerNotInitialized
|
||||||
}
|
}
|
||||||
|
|
||||||
|
started := tracker.Started
|
||||||
|
if started.IsZero() || started.Equal(timeSentinel) {
|
||||||
|
healingLogIf(ctx, fmt.Errorf("unexpected tracker healing start time found: %v", started))
|
||||||
|
started = time.Time{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final tracer update before quitting
|
||||||
|
defer func() {
|
||||||
|
tracker.setObject("")
|
||||||
|
tracker.setBucket("")
|
||||||
|
healingLogIf(ctx, tracker.update(ctx))
|
||||||
|
}()
|
||||||
|
|
||||||
for _, bucket := range healBuckets {
|
for _, bucket := range healBuckets {
|
||||||
if err := bgSeq.healBucket(objAPI, bucket, true); err != nil {
|
if err := bgSeq.healBucket(objAPI, bucket, true); err != nil {
|
||||||
// Log bucket healing error if any, we shall retry again.
|
// Log bucket healing error if any, we shall retry again.
|
||||||
@ -435,13 +448,10 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||||||
|
|
||||||
var versionNotFound int
|
var versionNotFound int
|
||||||
for _, version := range fivs.Versions {
|
for _, version := range fivs.Versions {
|
||||||
// Ignore a version with a modtime newer than healing start time.
|
// Ignore healing a version if:
|
||||||
if version.ModTime.After(tracker.Started) {
|
// - It is uploaded after the drive healing is started
|
||||||
continue
|
// - An object that is already expired by ILM rule.
|
||||||
}
|
if !started.IsZero() && version.ModTime.After(started) || filterLifecycle(bucket, version.Name, version) {
|
||||||
|
|
||||||
// Apply lifecycle rules on the objects that are expired.
|
|
||||||
if filterLifecycle(bucket, version.Name, version) {
|
|
||||||
versionNotFound++
|
versionNotFound++
|
||||||
if !send(healEntrySkipped(uint64(version.Size))) {
|
if !send(healEntrySkipped(uint64(version.Size))) {
|
||||||
return
|
return
|
||||||
@ -556,10 +566,6 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||||||
healingLogIf(ctx, tracker.update(ctx))
|
healingLogIf(ctx, tracker.update(ctx))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tracker.setObject("")
|
|
||||||
tracker.setBucket("")
|
|
||||||
|
|
||||||
if retErr != nil {
|
if retErr != nil {
|
||||||
return retErr
|
return retErr
|
||||||
}
|
}
|
||||||
|
2
go.mod
2
go.mod
@ -51,7 +51,7 @@ require (
|
|||||||
github.com/minio/highwayhash v1.0.3
|
github.com/minio/highwayhash v1.0.3
|
||||||
github.com/minio/kms-go/kes v0.3.0
|
github.com/minio/kms-go/kes v0.3.0
|
||||||
github.com/minio/kms-go/kms v0.4.0
|
github.com/minio/kms-go/kms v0.4.0
|
||||||
github.com/minio/madmin-go/v3 v3.0.63
|
github.com/minio/madmin-go/v3 v3.0.64-0.20240822003756-fe52a32e526d
|
||||||
github.com/minio/minio-go/v7 v7.0.75
|
github.com/minio/minio-go/v7 v7.0.75
|
||||||
github.com/minio/mux v1.9.0
|
github.com/minio/mux v1.9.0
|
||||||
github.com/minio/pkg/v3 v3.0.11
|
github.com/minio/pkg/v3 v3.0.11
|
||||||
|
4
go.sum
4
go.sum
@ -426,8 +426,8 @@ github.com/minio/kms-go/kes v0.3.0 h1:SU8VGVM/Hk9w1OiSby3OatkcojooUqIdDHl6dtM6Nk
|
|||||||
github.com/minio/kms-go/kes v0.3.0/go.mod h1:w6DeVT878qEOU3nUrYVy1WOT5H1Ig9hbDIh698NYJKY=
|
github.com/minio/kms-go/kes v0.3.0/go.mod h1:w6DeVT878qEOU3nUrYVy1WOT5H1Ig9hbDIh698NYJKY=
|
||||||
github.com/minio/kms-go/kms v0.4.0 h1:cLPZceEp+05xHotVBaeFJrgL7JcXM4lBy6PU0idkE7I=
|
github.com/minio/kms-go/kms v0.4.0 h1:cLPZceEp+05xHotVBaeFJrgL7JcXM4lBy6PU0idkE7I=
|
||||||
github.com/minio/kms-go/kms v0.4.0/go.mod h1:q12CehiIy2qgBnDKq6Q7wmPi2PHSyRVug5DKp0HAVeE=
|
github.com/minio/kms-go/kms v0.4.0/go.mod h1:q12CehiIy2qgBnDKq6Q7wmPi2PHSyRVug5DKp0HAVeE=
|
||||||
github.com/minio/madmin-go/v3 v3.0.63 h1:ERJRxEI/FFRh8MDi4Z+3DKe4sONkQ0g+OkNzRpk7qxk=
|
github.com/minio/madmin-go/v3 v3.0.64-0.20240822003756-fe52a32e526d h1:ma9PAmbEs+TP9BdsbQLO3gUa2nHSzeuQobOCT8BWUpg=
|
||||||
github.com/minio/madmin-go/v3 v3.0.63/go.mod h1:IFAwr0XMrdsLovxAdCcuq/eoL4nRuMVQQv0iubJANQw=
|
github.com/minio/madmin-go/v3 v3.0.64-0.20240822003756-fe52a32e526d/go.mod h1:IFAwr0XMrdsLovxAdCcuq/eoL4nRuMVQQv0iubJANQw=
|
||||||
github.com/minio/mc v0.0.0-20240815155011-479171e7be9c h1:0tzuJ1nV6oZstqKQ/CwK1dzxNJ/cE38ym4SPi2HsWoY=
|
github.com/minio/mc v0.0.0-20240815155011-479171e7be9c h1:0tzuJ1nV6oZstqKQ/CwK1dzxNJ/cE38ym4SPi2HsWoY=
|
||||||
github.com/minio/mc v0.0.0-20240815155011-479171e7be9c/go.mod h1:Cr4x7eiMJfOTWwg40Rk3EaOI7i+DUyOAtqLO7x+heiA=
|
github.com/minio/mc v0.0.0-20240815155011-479171e7be9c/go.mod h1:Cr4x7eiMJfOTWwg40Rk3EaOI7i+DUyOAtqLO7x+heiA=
|
||||||
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
|
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
|
||||||
|
Loading…
x
Reference in New Issue
Block a user