fix: when save the rebalanceStats not found the config file (#21547)

This commit is contained in:
jiuker 2025-09-05 04:47:24 +08:00 committed by GitHub
parent f0b91e5504
commit 5c87d4ae87
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 18 additions and 4 deletions

View File

@ -61,7 +61,7 @@ func (a adminAPIHandlers) StartDecommission(w http.ResponseWriter, r *http.Reque
return return
} }
if z.IsRebalanceStarted() { if z.IsRebalanceStarted(ctx) {
writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrAdminRebalanceAlreadyStarted), r.URL) writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrAdminRebalanceAlreadyStarted), r.URL)
return return
} }
@ -277,7 +277,7 @@ func (a adminAPIHandlers) RebalanceStart(w http.ResponseWriter, r *http.Request)
return return
} }
if pools.IsRebalanceStarted() { if pools.IsRebalanceStarted(ctx) {
writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrAdminRebalanceAlreadyStarted), r.URL) writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrAdminRebalanceAlreadyStarted), r.URL)
return return
} }

View File

@ -341,7 +341,8 @@ func (r *rebalanceMeta) save(ctx context.Context, store objectIO) error {
return r.saveWithOpts(ctx, store, ObjectOptions{}) return r.saveWithOpts(ctx, store, ObjectOptions{})
} }
func (z *erasureServerPools) IsRebalanceStarted() bool { func (z *erasureServerPools) IsRebalanceStarted(ctx context.Context) bool {
_ = z.loadRebalanceMeta(ctx)
z.rebalMu.RLock() z.rebalMu.RLock()
defer z.rebalMu.RUnlock() defer z.rebalMu.RUnlock()
@ -394,12 +395,14 @@ func (z *erasureServerPools) rebalanceBuckets(ctx context.Context, poolIdx int)
var ( var (
quit bool quit bool
traceMsg string traceMsg string
notify bool // if status changed, notify nodes to reload rebalance metadata
) )
for { for {
select { select {
case rebalErr := <-doneCh: case rebalErr := <-doneCh:
quit = true quit = true
notify = true
now := time.Now() now := time.Now()
var status rebalStatus var status rebalStatus
@ -421,12 +424,16 @@ func (z *erasureServerPools) rebalanceBuckets(ctx context.Context, poolIdx int)
z.rebalMu.Unlock() z.rebalMu.Unlock()
case <-timer.C: case <-timer.C:
notify = false
traceMsg = fmt.Sprintf("saved at %s", time.Now()) traceMsg = fmt.Sprintf("saved at %s", time.Now())
} }
stopFn := globalRebalanceMetrics.log(rebalanceMetricSaveMetadata, poolIdx, traceMsg) stopFn := globalRebalanceMetrics.log(rebalanceMetricSaveMetadata, poolIdx, traceMsg)
err := z.saveRebalanceStats(GlobalContext, poolIdx, rebalSaveStats) err := z.saveRebalanceStats(GlobalContext, poolIdx, rebalSaveStats)
stopFn(0, err) stopFn(0, err)
if err == nil && notify {
globalNotificationSys.LoadRebalanceMeta(GlobalContext, false)
}
rebalanceLogIf(GlobalContext, err) rebalanceLogIf(GlobalContext, err)
if quit { if quit {
@ -800,13 +807,20 @@ func (z *erasureServerPools) saveRebalanceStats(ctx context.Context, poolIdx int
ctx = lkCtx.Context() ctx = lkCtx.Context()
noLockOpts := ObjectOptions{NoLock: true} noLockOpts := ObjectOptions{NoLock: true}
r := &rebalanceMeta{} r := &rebalanceMeta{}
if err := r.loadWithOpts(ctx, z.serverPools[0], noLockOpts); err != nil { err = r.loadWithOpts(ctx, z.serverPools[0], noLockOpts)
if err != nil && !errors.Is(err, errConfigNotFound) {
return err return err
} }
z.rebalMu.Lock() z.rebalMu.Lock()
defer z.rebalMu.Unlock() defer z.rebalMu.Unlock()
// if not found, we store the memory metadata back
// when rebalance status changed, will notify all nodes update status to memory, we can treat the memory metadata is the latest status
if errors.Is(err, errConfigNotFound) {
r = z.rebalMeta
}
switch opts { switch opts {
case rebalSaveStoppedAt: case rebalSaveStoppedAt:
r.StoppedAt = time.Now() r.StoppedAt = time.Now()