mirror of
https://github.com/minio/minio.git
synced 2025-11-08 21:24:55 -05:00
introduce new ServiceV2 API to handle guided restarts (#18826)
New API now verifies any hung disks before restart/stop, provides a 'per node' break down of the restart/stop results. Provides also how many blocked syscalls are present on the drives and what users must do about them. Adds options to do pre-flight checks to provide information to the user regarding any hung disks. Provides 'force' option to forcibly attempt a restart() even with waiting syscalls on the drives.
This commit is contained in:
@@ -20,6 +20,7 @@ package cmd
|
||||
import (
|
||||
"context"
|
||||
"encoding/gob"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -863,6 +864,21 @@ func (s *peerRESTServer) CommitBinaryHandler(w http.ResponseWriter, r *http.Requ
|
||||
|
||||
var errUnsupportedSignal = fmt.Errorf("unsupported signal")
|
||||
|
||||
func canWeRestartNode() map[string]DiskMetrics {
|
||||
errs := make([]error, len(globalLocalDrives))
|
||||
infos := make([]DiskInfo, len(globalLocalDrives))
|
||||
for i, drive := range globalLocalDrives {
|
||||
infos[i], errs[i] = drive.DiskInfo(GlobalContext, false)
|
||||
}
|
||||
infoMaps := make(map[string]DiskMetrics)
|
||||
for i := range infos {
|
||||
if infos[i].Metrics.TotalWaiting >= 1 && errors.Is(errs[i], errFaultyDisk) {
|
||||
infoMaps[infos[i].Endpoint] = infos[i].Metrics
|
||||
}
|
||||
}
|
||||
return infoMaps
|
||||
}
|
||||
|
||||
// SignalServiceHandler - signal service handler.
|
||||
func (s *peerRESTServer) SignalServiceHandler(w http.ResponseWriter, r *http.Request) {
|
||||
if !s.IsValid(w, r) {
|
||||
@@ -883,10 +899,26 @@ func (s *peerRESTServer) SignalServiceHandler(w http.ResponseWriter, r *http.Req
|
||||
}
|
||||
signal := serviceSignal(si)
|
||||
switch signal {
|
||||
case serviceRestart:
|
||||
globalServiceSignalCh <- signal
|
||||
case serviceStop:
|
||||
globalServiceSignalCh <- signal
|
||||
case serviceRestart, serviceStop:
|
||||
dryRun := r.Form.Get("dry-run") == "true" // This is only supported for `restart/stop`
|
||||
force := r.Form.Get("force") == "true"
|
||||
|
||||
waitingDisks := canWeRestartNode()
|
||||
if len(waitingDisks) > 0 {
|
||||
buf, err := json.Marshal(waitingDisks)
|
||||
if err != nil {
|
||||
s.writeErrorResponse(w, err)
|
||||
return
|
||||
}
|
||||
s.writeErrorResponse(w, errors.New(string(buf)))
|
||||
// if its forced we signal the process anyway.
|
||||
if !force {
|
||||
return
|
||||
}
|
||||
}
|
||||
if !dryRun {
|
||||
globalServiceSignalCh <- signal
|
||||
}
|
||||
case serviceFreeze:
|
||||
freezeServices()
|
||||
case serviceUnFreeze:
|
||||
|
||||
Reference in New Issue
Block a user