mirror of
https://github.com/minio/minio.git
synced 2025-01-12 15:33:22 -05:00
fix: the race in healing tracker code (#17048)
This commit is contained in:
parent
0db34e4b85
commit
224d9a752f
@ -26,6 +26,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/dustin/go-humanize"
|
"github.com/dustin/go-humanize"
|
||||||
@ -44,6 +45,7 @@ const (
|
|||||||
// healingTracker is used to persist healing information during a heal.
|
// healingTracker is used to persist healing information during a heal.
|
||||||
type healingTracker struct {
|
type healingTracker struct {
|
||||||
disk StorageAPI `msg:"-"`
|
disk StorageAPI `msg:"-"`
|
||||||
|
mu *sync.RWMutex `msg:"-"`
|
||||||
|
|
||||||
ID string
|
ID string
|
||||||
PoolIndex int
|
PoolIndex int
|
||||||
@ -112,22 +114,76 @@ func loadHealingTracker(ctx context.Context, disk StorageAPI) (*healingTracker,
|
|||||||
}
|
}
|
||||||
h.disk = disk
|
h.disk = disk
|
||||||
h.ID = diskID
|
h.ID = diskID
|
||||||
|
h.mu = &sync.RWMutex{}
|
||||||
return &h, nil
|
return &h, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// newHealingTracker will create a new healing tracker for the disk.
|
// newHealingTracker will create a new healing tracker for the disk.
|
||||||
func newHealingTracker(disk StorageAPI, healID string) *healingTracker {
|
func newHealingTracker() *healingTracker {
|
||||||
diskID, _ := disk.GetDiskID()
|
return &healingTracker{
|
||||||
h := healingTracker{
|
mu: &sync.RWMutex{},
|
||||||
disk: disk,
|
|
||||||
ID: diskID,
|
|
||||||
HealID: healID,
|
|
||||||
Path: disk.String(),
|
|
||||||
Endpoint: disk.Endpoint().String(),
|
|
||||||
Started: time.Now().UTC(),
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func initHealingTracker(disk StorageAPI, healID string) *healingTracker {
|
||||||
|
h := newHealingTracker()
|
||||||
|
diskID, _ := disk.GetDiskID()
|
||||||
|
h.disk = disk
|
||||||
|
h.ID = diskID
|
||||||
|
h.HealID = healID
|
||||||
|
h.Path = disk.String()
|
||||||
|
h.Endpoint = disk.Endpoint().String()
|
||||||
|
h.Started = time.Now().UTC()
|
||||||
h.PoolIndex, h.SetIndex, h.DiskIndex = disk.GetDiskLoc()
|
h.PoolIndex, h.SetIndex, h.DiskIndex = disk.GetDiskLoc()
|
||||||
return &h
|
return h
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h healingTracker) getLastUpdate() time.Time {
|
||||||
|
h.mu.RLock()
|
||||||
|
defer h.mu.RUnlock()
|
||||||
|
|
||||||
|
return h.LastUpdate
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h healingTracker) getBucket() string {
|
||||||
|
h.mu.RLock()
|
||||||
|
defer h.mu.RUnlock()
|
||||||
|
|
||||||
|
return h.Bucket
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *healingTracker) setBucket(bucket string) {
|
||||||
|
h.mu.Lock()
|
||||||
|
defer h.mu.Unlock()
|
||||||
|
|
||||||
|
h.Bucket = bucket
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h healingTracker) getObject() string {
|
||||||
|
h.mu.RLock()
|
||||||
|
defer h.mu.RUnlock()
|
||||||
|
|
||||||
|
return h.Object
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *healingTracker) setObject(object string) {
|
||||||
|
h.mu.Lock()
|
||||||
|
defer h.mu.Unlock()
|
||||||
|
|
||||||
|
h.Object = object
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *healingTracker) updateProgress(success bool, bytes uint64) {
|
||||||
|
h.mu.Lock()
|
||||||
|
defer h.mu.Unlock()
|
||||||
|
|
||||||
|
if success {
|
||||||
|
h.ItemsHealed++
|
||||||
|
h.BytesDone += bytes
|
||||||
|
} else {
|
||||||
|
h.ItemsFailed++
|
||||||
|
h.BytesFailed += bytes
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// update will update the tracker on the disk.
|
// update will update the tracker on the disk.
|
||||||
@ -136,15 +192,18 @@ func (h *healingTracker) update(ctx context.Context) error {
|
|||||||
if h.disk.Healing() == nil {
|
if h.disk.Healing() == nil {
|
||||||
return fmt.Errorf("healingTracker: drive %q is not marked as healing", h.ID)
|
return fmt.Errorf("healingTracker: drive %q is not marked as healing", h.ID)
|
||||||
}
|
}
|
||||||
|
h.mu.Lock()
|
||||||
if h.ID == "" || h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 {
|
if h.ID == "" || h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 {
|
||||||
h.ID, _ = h.disk.GetDiskID()
|
h.ID, _ = h.disk.GetDiskID()
|
||||||
h.PoolIndex, h.SetIndex, h.DiskIndex = h.disk.GetDiskLoc()
|
h.PoolIndex, h.SetIndex, h.DiskIndex = h.disk.GetDiskLoc()
|
||||||
}
|
}
|
||||||
|
h.mu.Unlock()
|
||||||
return h.save(ctx)
|
return h.save(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
// save will unconditionally save the tracker and will be created if not existing.
|
// save will unconditionally save the tracker and will be created if not existing.
|
||||||
func (h *healingTracker) save(ctx context.Context) error {
|
func (h *healingTracker) save(ctx context.Context) error {
|
||||||
|
h.mu.Lock()
|
||||||
if h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 {
|
if h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 {
|
||||||
// Attempt to get location.
|
// Attempt to get location.
|
||||||
if api := newObjectLayerFn(); api != nil {
|
if api := newObjectLayerFn(); api != nil {
|
||||||
@ -155,6 +214,7 @@ func (h *healingTracker) save(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
h.LastUpdate = time.Now().UTC()
|
h.LastUpdate = time.Now().UTC()
|
||||||
htrackerBytes, err := h.MarshalMsg(nil)
|
htrackerBytes, err := h.MarshalMsg(nil)
|
||||||
|
h.mu.Unlock()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -176,6 +236,8 @@ func (h *healingTracker) delete(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (h *healingTracker) isHealed(bucket string) bool {
|
func (h *healingTracker) isHealed(bucket string) bool {
|
||||||
|
h.mu.RLock()
|
||||||
|
defer h.mu.RUnlock()
|
||||||
for _, v := range h.HealedBuckets {
|
for _, v := range h.HealedBuckets {
|
||||||
if v == bucket {
|
if v == bucket {
|
||||||
return true
|
return true
|
||||||
@ -186,6 +248,9 @@ func (h *healingTracker) isHealed(bucket string) bool {
|
|||||||
|
|
||||||
// resume will reset progress to the numbers at the start of the bucket.
|
// resume will reset progress to the numbers at the start of the bucket.
|
||||||
func (h *healingTracker) resume() {
|
func (h *healingTracker) resume() {
|
||||||
|
h.mu.Lock()
|
||||||
|
defer h.mu.Unlock()
|
||||||
|
|
||||||
h.ItemsHealed = h.ResumeItemsHealed
|
h.ItemsHealed = h.ResumeItemsHealed
|
||||||
h.ItemsFailed = h.ResumeItemsFailed
|
h.ItemsFailed = h.ResumeItemsFailed
|
||||||
h.BytesDone = h.ResumeBytesDone
|
h.BytesDone = h.ResumeBytesDone
|
||||||
@ -195,6 +260,9 @@ func (h *healingTracker) resume() {
|
|||||||
// bucketDone should be called when a bucket is done healing.
|
// bucketDone should be called when a bucket is done healing.
|
||||||
// Adds the bucket to the list of healed buckets and updates resume numbers.
|
// Adds the bucket to the list of healed buckets and updates resume numbers.
|
||||||
func (h *healingTracker) bucketDone(bucket string) {
|
func (h *healingTracker) bucketDone(bucket string) {
|
||||||
|
h.mu.Lock()
|
||||||
|
defer h.mu.Unlock()
|
||||||
|
|
||||||
h.ResumeItemsHealed = h.ItemsHealed
|
h.ResumeItemsHealed = h.ItemsHealed
|
||||||
h.ResumeItemsFailed = h.ItemsFailed
|
h.ResumeItemsFailed = h.ItemsFailed
|
||||||
h.ResumeBytesDone = h.BytesDone
|
h.ResumeBytesDone = h.BytesDone
|
||||||
@ -211,6 +279,9 @@ func (h *healingTracker) bucketDone(bucket string) {
|
|||||||
// setQueuedBuckets will add buckets, but exclude any that is already in h.HealedBuckets.
|
// setQueuedBuckets will add buckets, but exclude any that is already in h.HealedBuckets.
|
||||||
// Order is preserved.
|
// Order is preserved.
|
||||||
func (h *healingTracker) setQueuedBuckets(buckets []BucketInfo) {
|
func (h *healingTracker) setQueuedBuckets(buckets []BucketInfo) {
|
||||||
|
h.mu.Lock()
|
||||||
|
defer h.mu.Unlock()
|
||||||
|
|
||||||
s := set.CreateStringSet(h.HealedBuckets...)
|
s := set.CreateStringSet(h.HealedBuckets...)
|
||||||
h.QueuedBuckets = make([]string, 0, len(buckets))
|
h.QueuedBuckets = make([]string, 0, len(buckets))
|
||||||
for _, b := range buckets {
|
for _, b := range buckets {
|
||||||
@ -221,6 +292,9 @@ func (h *healingTracker) setQueuedBuckets(buckets []BucketInfo) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (h *healingTracker) printTo(writer io.Writer) {
|
func (h *healingTracker) printTo(writer io.Writer) {
|
||||||
|
h.mu.RLock()
|
||||||
|
defer h.mu.RUnlock()
|
||||||
|
|
||||||
b, err := json.MarshalIndent(h, "", " ")
|
b, err := json.MarshalIndent(h, "", " ")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
writer.Write([]byte(err.Error()))
|
writer.Write([]byte(err.Error()))
|
||||||
@ -230,6 +304,9 @@ func (h *healingTracker) printTo(writer io.Writer) {
|
|||||||
|
|
||||||
// toHealingDisk converts the information to madmin.HealingDisk
|
// toHealingDisk converts the information to madmin.HealingDisk
|
||||||
func (h *healingTracker) toHealingDisk() madmin.HealingDisk {
|
func (h *healingTracker) toHealingDisk() madmin.HealingDisk {
|
||||||
|
h.mu.RLock()
|
||||||
|
defer h.mu.RUnlock()
|
||||||
|
|
||||||
return madmin.HealingDisk{
|
return madmin.HealingDisk{
|
||||||
ID: h.ID,
|
ID: h.ID,
|
||||||
HealID: h.HealID,
|
HealID: h.HealID,
|
||||||
@ -336,7 +413,7 @@ func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
logger.LogIf(ctx, fmt.Errorf("Unable to load healing tracker on '%s': %w, re-initializing..", disk, err))
|
logger.LogIf(ctx, fmt.Errorf("Unable to load healing tracker on '%s': %w, re-initializing..", disk, err))
|
||||||
tracker = newHealingTracker(disk, mustGetUUID())
|
tracker = initHealingTracker(disk, mustGetUUID())
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.Info(fmt.Sprintf("Healing drive '%s' - 'mc admin heal alias/ --verbose' to check the current status.", endpoint))
|
logger.Info(fmt.Sprintf("Healing drive '%s' - 'mc admin heal alias/ --verbose' to check the current status.", endpoint))
|
||||||
|
@ -380,7 +380,7 @@ func saveFormatErasure(disk StorageAPI, format *formatErasureV3, healID string)
|
|||||||
disk.SetDiskID(diskID)
|
disk.SetDiskID(diskID)
|
||||||
if healID != "" {
|
if healID != "" {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
ht := newHealingTracker(disk, healID)
|
ht := initHealingTracker(disk, healID)
|
||||||
return ht.save(ctx)
|
return ht.save(ctx)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
@ -187,16 +187,16 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||||||
}
|
}
|
||||||
var forwardTo string
|
var forwardTo string
|
||||||
// If we resume to the same bucket, forward to last known item.
|
// If we resume to the same bucket, forward to last known item.
|
||||||
if tracker.Bucket != "" {
|
if b := tracker.getBucket(); b != "" {
|
||||||
if tracker.Bucket == bucket {
|
if b == bucket {
|
||||||
forwardTo = tracker.Object
|
forwardTo = tracker.getObject()
|
||||||
} else {
|
} else {
|
||||||
// Reset to where last bucket ended if resuming.
|
// Reset to where last bucket ended if resuming.
|
||||||
tracker.resume()
|
tracker.resume()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tracker.Object = ""
|
tracker.setObject("")
|
||||||
tracker.Bucket = bucket
|
tracker.setBucket(bucket)
|
||||||
// Heal current bucket again in case if it is failed
|
// Heal current bucket again in case if it is failed
|
||||||
// in the being of erasure set healing
|
// in the being of erasure set healing
|
||||||
if _, err := er.HealBucket(ctx, bucket, madmin.HealOpts{
|
if _, err := er.HealBucket(ctx, bucket, madmin.HealOpts{
|
||||||
@ -208,7 +208,7 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||||||
|
|
||||||
if serverDebugLog {
|
if serverDebugLog {
|
||||||
console.Debugf(color.Green("healDrive:")+" healing bucket %s content on %s erasure set\n",
|
console.Debugf(color.Green("healDrive:")+" healing bucket %s content on %s erasure set\n",
|
||||||
bucket, humanize.Ordinal(tracker.SetIndex+1))
|
bucket, humanize.Ordinal(er.setIndex+1))
|
||||||
}
|
}
|
||||||
|
|
||||||
disks, _ := er.getOnlineDisksWithHealing()
|
disks, _ := er.getOnlineDisksWithHealing()
|
||||||
@ -251,20 +251,14 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||||||
go func() {
|
go func() {
|
||||||
for res := range results {
|
for res := range results {
|
||||||
if res.entryDone {
|
if res.entryDone {
|
||||||
tracker.Object = res.name
|
tracker.setObject(res.name)
|
||||||
if time.Since(tracker.LastUpdate) > time.Minute {
|
if time.Since(tracker.getLastUpdate()) > time.Minute {
|
||||||
logger.LogIf(ctx, tracker.update(ctx))
|
logger.LogIf(ctx, tracker.update(ctx))
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if res.success {
|
tracker.updateProgress(res.success, res.bytes)
|
||||||
tracker.ItemsHealed++
|
|
||||||
tracker.BytesDone += res.bytes
|
|
||||||
} else {
|
|
||||||
tracker.ItemsFailed++
|
|
||||||
tracker.BytesFailed += res.bytes
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
@ -420,8 +414,9 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||||||
logger.LogIf(ctx, tracker.update(ctx))
|
logger.LogIf(ctx, tracker.update(ctx))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tracker.Object = ""
|
|
||||||
tracker.Bucket = ""
|
tracker.setObject("")
|
||||||
|
tracker.setBucket("")
|
||||||
|
|
||||||
return retErr
|
return retErr
|
||||||
}
|
}
|
||||||
|
@ -365,10 +365,10 @@ func (s *xlStorage) Healing() *healingTracker {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
var h healingTracker
|
h := newHealingTracker()
|
||||||
_, err = h.UnmarshalMsg(b)
|
_, err = h.UnmarshalMsg(b)
|
||||||
logger.LogIf(GlobalContext, err)
|
logger.LogIf(GlobalContext, err)
|
||||||
return &h
|
return h
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkODirectDiskSupport asks the disk to write some data
|
// checkODirectDiskSupport asks the disk to write some data
|
||||||
|
Loading…
Reference in New Issue
Block a user