mirror of
https://github.com/minio/minio.git
synced 2025-01-25 21:53:16 -05:00
serialize updates to healing tracker (#15647)
When healing is parallelized by setting the ` _MINIO_HEAL_WORKERS` environment variable, multiple goroutines may race while updating the disk's healing tracker. This change serializes only these concurrent updates using a channel. Note, the healing tracker is still not concurrency safe in other contexts.
This commit is contained in:
parent
8e997eba4a
commit
96bfa77856
@ -243,6 +243,53 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||||||
disks = disks[:3]
|
disks = disks[:3]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type healEntryResult struct {
|
||||||
|
bytes uint64
|
||||||
|
success bool
|
||||||
|
entryDone bool
|
||||||
|
name string
|
||||||
|
}
|
||||||
|
healEntryDone := func(name string) healEntryResult {
|
||||||
|
return healEntryResult{
|
||||||
|
entryDone: true,
|
||||||
|
name: name,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
healEntrySuccess := func(sz uint64) healEntryResult {
|
||||||
|
return healEntryResult{
|
||||||
|
bytes: sz,
|
||||||
|
success: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
healEntryFailure := func(sz uint64) healEntryResult {
|
||||||
|
return healEntryResult{
|
||||||
|
bytes: sz,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect updates to tracker from concurrent healEntry calls
|
||||||
|
results := make(chan healEntryResult)
|
||||||
|
go func() {
|
||||||
|
for res := range results {
|
||||||
|
if res.entryDone {
|
||||||
|
tracker.Object = res.name
|
||||||
|
if time.Since(tracker.LastUpdate) > time.Minute {
|
||||||
|
logger.LogIf(ctx, tracker.update(ctx))
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if res.success {
|
||||||
|
tracker.ItemsHealed++
|
||||||
|
tracker.BytesDone += res.bytes
|
||||||
|
} else {
|
||||||
|
tracker.ItemsFailed++
|
||||||
|
tracker.BytesFailed += res.bytes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Note: updates from healEntry to tracker must be sent on results channel.
|
||||||
healEntry := func(entry metaCacheEntry) {
|
healEntry := func(entry metaCacheEntry) {
|
||||||
defer jt.Give()
|
defer jt.Give()
|
||||||
|
|
||||||
@ -269,6 +316,7 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var result healEntryResult
|
||||||
fivs, err := entry.fileInfoVersions(bucket)
|
fivs, err := entry.fileInfoVersions(bucket)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err := bgSeq.queueHealTask(healSource{
|
err := bgSeq.queueHealTask(healSource{
|
||||||
@ -277,12 +325,18 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||||||
versionID: "",
|
versionID: "",
|
||||||
}, madmin.HealItemObject)
|
}, madmin.HealItemObject)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
tracker.ItemsFailed++
|
result = healEntryFailure(0)
|
||||||
logger.LogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w", bucket, entry.name, err))
|
logger.LogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w", bucket, entry.name, err))
|
||||||
} else {
|
} else {
|
||||||
tracker.ItemsHealed++
|
result = healEntrySuccess(0)
|
||||||
}
|
}
|
||||||
bgSeq.logHeal(madmin.HealItemObject)
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case results <- result:
|
||||||
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -296,22 +350,27 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||||||
Remove: healDeleteDangling,
|
Remove: healDeleteDangling,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
// If not deleted, assume they failed.
|
// If not deleted, assume they failed.
|
||||||
tracker.ItemsFailed++
|
result = healEntryFailure(uint64(version.Size))
|
||||||
tracker.BytesFailed += uint64(version.Size)
|
|
||||||
if version.VersionID != "" {
|
if version.VersionID != "" {
|
||||||
logger.LogIf(ctx, fmt.Errorf("unable to heal object %s/%s-v(%s): %w", bucket, version.Name, version.VersionID, err))
|
logger.LogIf(ctx, fmt.Errorf("unable to heal object %s/%s-v(%s): %w", bucket, version.Name, version.VersionID, err))
|
||||||
} else {
|
} else {
|
||||||
logger.LogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w", bucket, version.Name, err))
|
logger.LogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w", bucket, version.Name, err))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
tracker.ItemsHealed++
|
result = healEntrySuccess(uint64(version.Size))
|
||||||
tracker.BytesDone += uint64(version.Size)
|
|
||||||
}
|
}
|
||||||
bgSeq.logHeal(madmin.HealItemObject)
|
bgSeq.logHeal(madmin.HealItemObject)
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case results <- result:
|
||||||
|
}
|
||||||
}
|
}
|
||||||
tracker.Object = entry.name
|
select {
|
||||||
if time.Since(tracker.LastUpdate) > time.Minute {
|
case <-ctx.Done():
|
||||||
logger.LogIf(ctx, tracker.update(ctx))
|
return
|
||||||
|
case results <- healEntryDone(entry.name):
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait and proceed if there are active requests
|
// Wait and proceed if there are active requests
|
||||||
@ -349,6 +408,7 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||||||
finished: nil,
|
finished: nil,
|
||||||
})
|
})
|
||||||
jt.Wait() // synchronize all the concurrent heal jobs
|
jt.Wait() // synchronize all the concurrent heal jobs
|
||||||
|
close(results)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Set this such that when we return this function
|
// Set this such that when we return this function
|
||||||
// we let the caller retry this disk again for the
|
// we let the caller retry this disk again for the
|
||||||
|
Loading…
x
Reference in New Issue
Block a user