diff --git a/cmd/admin-handlers-pools.go b/cmd/admin-handlers-pools.go index 40d655934..33317d4d7 100644 --- a/cmd/admin-handlers-pools.go +++ b/cmd/admin-handlers-pools.go @@ -61,7 +61,7 @@ func (a adminAPIHandlers) StartDecommission(w http.ResponseWriter, r *http.Reque return } - if z.IsRebalanceStarted() { + if z.IsRebalanceStarted(ctx) { writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrAdminRebalanceAlreadyStarted), r.URL) return } @@ -277,7 +277,7 @@ func (a adminAPIHandlers) RebalanceStart(w http.ResponseWriter, r *http.Request) return } - if pools.IsRebalanceStarted() { + if pools.IsRebalanceStarted(ctx) { writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrAdminRebalanceAlreadyStarted), r.URL) return } diff --git a/cmd/erasure-server-pool-rebalance.go b/cmd/erasure-server-pool-rebalance.go index f8078c3f2..031f3e3f3 100644 --- a/cmd/erasure-server-pool-rebalance.go +++ b/cmd/erasure-server-pool-rebalance.go @@ -341,7 +341,8 @@ func (r *rebalanceMeta) save(ctx context.Context, store objectIO) error { return r.saveWithOpts(ctx, store, ObjectOptions{}) } -func (z *erasureServerPools) IsRebalanceStarted() bool { +func (z *erasureServerPools) IsRebalanceStarted(ctx context.Context) bool { + _ = z.loadRebalanceMeta(ctx) z.rebalMu.RLock() defer z.rebalMu.RUnlock() @@ -394,12 +395,14 @@ func (z *erasureServerPools) rebalanceBuckets(ctx context.Context, poolIdx int) var ( quit bool traceMsg string + notify bool // if status changed, notify nodes to reload rebalance metadata ) for { select { case rebalErr := <-doneCh: quit = true + notify = true now := time.Now() var status rebalStatus @@ -421,12 +424,16 @@ func (z *erasureServerPools) rebalanceBuckets(ctx context.Context, poolIdx int) z.rebalMu.Unlock() case <-timer.C: + notify = false traceMsg = fmt.Sprintf("saved at %s", time.Now()) } stopFn := globalRebalanceMetrics.log(rebalanceMetricSaveMetadata, poolIdx, traceMsg) err := z.saveRebalanceStats(GlobalContext, poolIdx, rebalSaveStats) stopFn(0, err) + if err == nil && notify { + globalNotificationSys.LoadRebalanceMeta(GlobalContext, false) + } rebalanceLogIf(GlobalContext, err) if quit { @@ -800,13 +807,20 @@ func (z *erasureServerPools) saveRebalanceStats(ctx context.Context, poolIdx int ctx = lkCtx.Context() noLockOpts := ObjectOptions{NoLock: true} r := &rebalanceMeta{} - if err := r.loadWithOpts(ctx, z.serverPools[0], noLockOpts); err != nil { + err = r.loadWithOpts(ctx, z.serverPools[0], noLockOpts) + if err != nil && !errors.Is(err, errConfigNotFound) { return err } z.rebalMu.Lock() defer z.rebalMu.Unlock() + // if not found, we store the memory metadata back + // when rebalance status changed, will notify all nodes update status to memory, we can treat the memory metadata is the latest status + if errors.Is(err, errConfigNotFound) { + r = z.rebalMeta + } + switch opts { case rebalSaveStoppedAt: r.StoppedAt = time.Now()