Add fixed timed restarts to updates (#19960)

This commit is contained in:
Klaus Post 2024-06-20 07:49:22 -07:00 committed by GitHub
parent 3e6dc02f8f
commit fae563b85d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 32 additions and 8 deletions

View File

@ -237,6 +237,7 @@ func (a adminAPIHandlers) ServerUpdateV2Handler(w http.ResponseWriter, r *http.R
if globalIsDistErasure {
// Notify all other MinIO peers signal service.
startTime := time.Now().Add(restartUpdateDelay)
ng := WithNPeers(len(globalNotificationSys.peerClients))
for idx, client := range globalNotificationSys.peerClients {
_, ok := failedClients[idx]
@ -247,7 +248,7 @@ func (a adminAPIHandlers) ServerUpdateV2Handler(w http.ResponseWriter, r *http.R
ng.Go(ctx, func() error {
prs, ok := peerResults[client.String()]
if ok && prs.CurrentVersion != prs.UpdatedVersion && prs.UpdatedVersion != "" {
return client.SignalService(serviceRestart, "", dryRun)
return client.SignalService(serviceRestart, "", dryRun, &startTime)
}
return nil
}, idx, *client.host)
@ -542,9 +543,12 @@ func (a adminAPIHandlers) ServiceV2Handler(w http.ResponseWriter, r *http.Reques
}
var objectAPI ObjectLayer
var execAt *time.Time
switch serviceSig {
case serviceRestart:
objectAPI, _ = validateAdminReq(ctx, w, r, policy.ServiceRestartAdminAction)
t := time.Now().Add(restartUpdateDelay)
execAt = &t
case serviceStop:
objectAPI, _ = validateAdminReq(ctx, w, r, policy.ServiceStopAdminAction)
case serviceFreeze, serviceUnFreeze:
@ -571,7 +575,7 @@ func (a adminAPIHandlers) ServiceV2Handler(w http.ResponseWriter, r *http.Reques
}
if globalIsDistErasure {
for _, nerr := range globalNotificationSys.SignalServiceV2(serviceSig, dryRun) {
for _, nerr := range globalNotificationSys.SignalServiceV2(serviceSig, dryRun, execAt) {
if nerr.Err != nil && process {
waitingDrives := map[string]madmin.DiskMetrics{}
jerr := json.Unmarshal([]byte(nerr.Err.Error()), &waitingDrives)

View File

@ -424,7 +424,7 @@ func (sys *NotificationSys) SignalConfigReload(subSys string) []NotificationPeer
}
client := client
ng.Go(GlobalContext, func() error {
return client.SignalService(serviceReloadDynamic, subSys, false)
return client.SignalService(serviceReloadDynamic, subSys, false, nil)
}, idx, *client.host)
}
return ng.Wait()
@ -440,14 +440,14 @@ func (sys *NotificationSys) SignalService(sig serviceSignal) []NotificationPeerE
client := client
ng.Go(GlobalContext, func() error {
// force == true preserves the current behavior
return client.SignalService(sig, "", false)
return client.SignalService(sig, "", false, nil)
}, idx, *client.host)
}
return ng.Wait()
}
// SignalServiceV2 - calls signal service RPC call on all peers with v2 API
func (sys *NotificationSys) SignalServiceV2(sig serviceSignal, dryRun bool) []NotificationPeerErr {
func (sys *NotificationSys) SignalServiceV2(sig serviceSignal, dryRun bool, execAt *time.Time) []NotificationPeerErr {
ng := WithNPeers(len(sys.peerClients))
for idx, client := range sys.peerClients {
if client == nil {
@ -455,7 +455,7 @@ func (sys *NotificationSys) SignalServiceV2(sig serviceSignal, dryRun bool) []No
}
client := client
ng.Go(GlobalContext, func() error {
return client.SignalService(sig, "", dryRun)
return client.SignalService(sig, "", dryRun, execAt)
}, idx, *client.host)
}
return ng.Wait()
@ -1314,7 +1314,7 @@ func (sys *NotificationSys) ServiceFreeze(ctx context.Context, freeze bool) []No
}
client := client
ng.Go(GlobalContext, func() error {
return client.SignalService(serviceSig, "", false)
return client.SignalService(serviceSig, "", false, nil)
}, idx, *client.host)
}
nerrs := ng.Wait()

View File

@ -427,11 +427,14 @@ func (client *peerRESTClient) CommitBinary(ctx context.Context) error {
}
// SignalService - sends signal to peer nodes.
func (client *peerRESTClient) SignalService(sig serviceSignal, subSys string, dryRun bool) error {
func (client *peerRESTClient) SignalService(sig serviceSignal, subSys string, dryRun bool, execAt *time.Time) error {
values := grid.NewMSS()
values.Set(peerRESTSignal, strconv.Itoa(int(sig)))
values.Set(peerRESTDryRun, strconv.FormatBool(dryRun))
values.Set(peerRESTSubSys, subSys)
if execAt != nil {
values.Set(peerRESTExecAt, execAt.Format(time.RFC3339Nano))
}
_, err := signalServiceRPC.Call(context.Background(), client.gridConn(), values)
return err
}

View File

@ -17,6 +17,8 @@
package cmd
import "time"
const (
peerRESTVersion = "v39" // add more flags to speedtest API
peerRESTVersionPrefix = SlashSeparator + peerRESTVersion
@ -69,6 +71,7 @@ const (
peerRESTURL = "url"
peerRESTSha256Sum = "sha256sum"
peerRESTReleaseInfo = "releaseinfo"
peerRESTExecAt = "exec-at"
peerRESTListenBucket = "bucket"
peerRESTListenPrefix = "prefix"
@ -76,3 +79,5 @@ const (
peerRESTListenEvents = "events"
peerRESTLogMask = "log-mask"
)
const restartUpdateDelay = 250 * time.Millisecond

View File

@ -693,6 +693,18 @@ func (s *peerRESTServer) SignalServiceHandler(vars *grid.MSS) (np grid.NoPayload
if err != nil {
return np, grid.NewRemoteErr(err)
}
// Wait until the specified time before executing the signal.
if t := vars.Get(peerRESTExecAt); t != "" {
execAt, err := time.Parse(time.RFC3339Nano, vars.Get(peerRESTExecAt))
if err != nil {
logger.LogIf(GlobalContext, "signalservice", err)
execAt = time.Now().Add(restartUpdateDelay)
}
if d := time.Until(execAt); d > 0 {
time.Sleep(d)
}
}
signal := serviceSignal(si)
switch signal {
case serviceRestart, serviceStop: