mirror of https://github.com/minio/minio.git
Add forceStop flag to provide facility to stop healing (#6718)
This PR also makes sure that we deal with HTTP request count by ignoring the on-going heal operation, i.e do not wait on itself.
This commit is contained in:
parent
bef0318c36
commit
a9cda850ca
|
@ -53,9 +53,10 @@ type mgmtQueryKey string
|
||||||
// Only valid query params for mgmt admin APIs.
|
// Only valid query params for mgmt admin APIs.
|
||||||
const (
|
const (
|
||||||
mgmtBucket mgmtQueryKey = "bucket"
|
mgmtBucket mgmtQueryKey = "bucket"
|
||||||
mgmtPrefix mgmtQueryKey = "prefix"
|
mgmtPrefix = "prefix"
|
||||||
mgmtClientToken mgmtQueryKey = "clientToken"
|
mgmtClientToken = "clientToken"
|
||||||
mgmtForceStart mgmtQueryKey = "forceStart"
|
mgmtForceStart = "forceStart"
|
||||||
|
mgmtForceStop = "forceStop"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -402,7 +403,7 @@ func (a adminAPIHandlers) DownloadProfilingHandler(w http.ResponseWriter, r *htt
|
||||||
|
|
||||||
// extractHealInitParams - Validates params for heal init API.
|
// extractHealInitParams - Validates params for heal init API.
|
||||||
func extractHealInitParams(r *http.Request) (bucket, objPrefix string,
|
func extractHealInitParams(r *http.Request) (bucket, objPrefix string,
|
||||||
hs madmin.HealOpts, clientToken string, forceStart bool,
|
hs madmin.HealOpts, clientToken string, forceStart bool, forceStop bool,
|
||||||
err APIErrorCode) {
|
err APIErrorCode) {
|
||||||
|
|
||||||
vars := mux.Vars(r)
|
vars := mux.Vars(r)
|
||||||
|
@ -433,7 +434,9 @@ func extractHealInitParams(r *http.Request) (bucket, objPrefix string,
|
||||||
if _, ok := qParms[string(mgmtForceStart)]; ok {
|
if _, ok := qParms[string(mgmtForceStart)]; ok {
|
||||||
forceStart = true
|
forceStart = true
|
||||||
}
|
}
|
||||||
|
if _, ok := qParms[string(mgmtForceStop)]; ok {
|
||||||
|
forceStop = true
|
||||||
|
}
|
||||||
// ignore body if clientToken is provided
|
// ignore body if clientToken is provided
|
||||||
if clientToken == "" {
|
if clientToken == "" {
|
||||||
jerr := json.NewDecoder(r.Body).Decode(&hs)
|
jerr := json.NewDecoder(r.Body).Decode(&hs)
|
||||||
|
@ -484,7 +487,7 @@ func (a adminAPIHandlers) HealHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
bucket, objPrefix, hs, clientToken, forceStart, apiErr := extractHealInitParams(r)
|
bucket, objPrefix, hs, clientToken, forceStart, forceStop, apiErr := extractHealInitParams(r)
|
||||||
if apiErr != ErrNone {
|
if apiErr != ErrNone {
|
||||||
writeErrorResponseJSON(w, apiErr, r.URL)
|
writeErrorResponseJSON(w, apiErr, r.URL)
|
||||||
return
|
return
|
||||||
|
@ -518,13 +521,35 @@ func (a adminAPIHandlers) HealHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
w.Write([]byte("\n\r"))
|
w.Write([]byte("\n\r"))
|
||||||
w.(http.Flusher).Flush()
|
w.(http.Flusher).Flush()
|
||||||
case hr := <-respCh:
|
case hr := <-respCh:
|
||||||
switch {
|
switch hr.errCode {
|
||||||
case hr.errCode == ErrNone:
|
case ErrNone:
|
||||||
writeSuccessResponseJSON(w, hr.respBytes)
|
if started {
|
||||||
case hr.errBody == "":
|
w.Write(hr.respBytes)
|
||||||
writeErrorResponseJSON(w, hr.errCode, r.URL)
|
w.(http.Flusher).Flush()
|
||||||
|
} else {
|
||||||
|
writeSuccessResponseJSON(w, hr.respBytes)
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
writeCustomErrorResponseJSON(w, hr.errCode, hr.errBody, r.URL)
|
apiError := getAPIError(hr.errCode)
|
||||||
|
var errorRespJSON []byte
|
||||||
|
if hr.errBody == "" {
|
||||||
|
errorRespJSON = encodeResponseJSON(getAPIErrorResponse(apiError, r.URL.Path, w.Header().Get(responseRequestIDKey)))
|
||||||
|
} else {
|
||||||
|
errorRespJSON = encodeResponseJSON(APIErrorResponse{
|
||||||
|
Code: apiError.Code,
|
||||||
|
Message: hr.errBody,
|
||||||
|
Resource: r.URL.Path,
|
||||||
|
RequestID: w.Header().Get(responseRequestIDKey),
|
||||||
|
HostID: "3L137",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if !started {
|
||||||
|
setCommonHeaders(w)
|
||||||
|
w.Header().Set("Content-Type", string(mimeJSON))
|
||||||
|
w.WriteHeader(apiError.HTTPStatusCode)
|
||||||
|
}
|
||||||
|
w.Write(errorRespJSON)
|
||||||
|
w.(http.Flusher).Flush()
|
||||||
}
|
}
|
||||||
break forLoop
|
break forLoop
|
||||||
}
|
}
|
||||||
|
@ -535,34 +560,61 @@ func (a adminAPIHandlers) HealHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
info := objLayer.StorageInfo(ctx)
|
info := objLayer.StorageInfo(ctx)
|
||||||
numDisks := info.Backend.OfflineDisks + info.Backend.OnlineDisks
|
numDisks := info.Backend.OfflineDisks + info.Backend.OnlineDisks
|
||||||
|
|
||||||
if clientToken == "" {
|
healPath := pathJoin(bucket, objPrefix)
|
||||||
// Not a status request
|
if clientToken == "" && !forceStart && !forceStop {
|
||||||
nh := newHealSequence(bucket, objPrefix, handlers.GetSourceIP(r),
|
nh, exists := globalAllHealState.getHealSequence(healPath)
|
||||||
numDisks, hs, forceStart)
|
if exists && !nh.hasEnded() && len(nh.currentStatus.Items) > 0 {
|
||||||
|
b, err := json.Marshal(madmin.HealStartSuccess{
|
||||||
|
ClientToken: nh.clientToken,
|
||||||
|
ClientAddress: nh.clientAddress,
|
||||||
|
StartTime: nh.startTime,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
logger.LogIf(context.Background(), err)
|
||||||
|
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Client token not specified but a heal sequence exists on a path,
|
||||||
|
// Send the token back to client.
|
||||||
|
writeSuccessResponseJSON(w, b)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
respCh := make(chan healResp)
|
if clientToken != "" && !forceStart && !forceStop {
|
||||||
go func() {
|
|
||||||
respBytes, errCode, errMsg := globalAllHealState.LaunchNewHealSequence(nh)
|
|
||||||
hr := healResp{respBytes, errCode, errMsg}
|
|
||||||
respCh <- hr
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Due to the force-starting functionality, the Launch
|
|
||||||
// call above can take a long time - to keep the
|
|
||||||
// connection alive, we start sending whitespace
|
|
||||||
keepConnLive(w, respCh)
|
|
||||||
} else {
|
|
||||||
// Since clientToken is given, fetch heal status from running
|
// Since clientToken is given, fetch heal status from running
|
||||||
// heal sequence.
|
// heal sequence.
|
||||||
path := bucket + "/" + objPrefix
|
|
||||||
respBytes, errCode := globalAllHealState.PopHealStatusJSON(
|
respBytes, errCode := globalAllHealState.PopHealStatusJSON(
|
||||||
path, clientToken)
|
healPath, clientToken)
|
||||||
if errCode != ErrNone {
|
if errCode != ErrNone {
|
||||||
writeErrorResponseJSON(w, errCode, r.URL)
|
writeErrorResponseJSON(w, errCode, r.URL)
|
||||||
} else {
|
} else {
|
||||||
writeSuccessResponseJSON(w, respBytes)
|
writeSuccessResponseJSON(w, respBytes)
|
||||||
}
|
}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
respCh := make(chan healResp)
|
||||||
|
switch {
|
||||||
|
case forceStop:
|
||||||
|
go func() {
|
||||||
|
respBytes, errCode := globalAllHealState.stopHealSequence(healPath)
|
||||||
|
hr := healResp{respBytes: respBytes, errCode: errCode}
|
||||||
|
respCh <- hr
|
||||||
|
}()
|
||||||
|
case clientToken == "":
|
||||||
|
nh := newHealSequence(bucket, objPrefix, handlers.GetSourceIP(r), numDisks, hs, forceStart)
|
||||||
|
go func() {
|
||||||
|
respBytes, errCode, errMsg := globalAllHealState.LaunchNewHealSequence(nh)
|
||||||
|
hr := healResp{respBytes, errCode, errMsg}
|
||||||
|
respCh <- hr
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Due to the force-starting functionality, the Launch
|
||||||
|
// call above can take a long time - to keep the
|
||||||
|
// connection alive, we start sending whitespace
|
||||||
|
keepConnLive(w, respCh)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetConfigHandler - GET /minio/admin/v1/config
|
// GetConfigHandler - GET /minio/admin/v1/config
|
||||||
|
|
|
@ -20,6 +20,7 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net/http"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -123,6 +124,35 @@ func (ahs *allHealState) getHealSequence(path string) (h *healSequence, exists b
|
||||||
return h, exists
|
return h, exists
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ahs *allHealState) stopHealSequence(path string) ([]byte, APIErrorCode) {
|
||||||
|
var hsp madmin.HealStopSuccess
|
||||||
|
he, exists := ahs.getHealSequence(path)
|
||||||
|
if !exists {
|
||||||
|
hsp = madmin.HealStopSuccess{
|
||||||
|
ClientToken: "invalid",
|
||||||
|
StartTime: UTCNow(),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
hsp = madmin.HealStopSuccess{
|
||||||
|
ClientToken: he.clientToken,
|
||||||
|
ClientAddress: he.clientAddress,
|
||||||
|
StartTime: he.startTime,
|
||||||
|
}
|
||||||
|
|
||||||
|
he.stop()
|
||||||
|
for !he.hasEnded() {
|
||||||
|
time.Sleep(1 * time.Second)
|
||||||
|
}
|
||||||
|
ahs.Lock()
|
||||||
|
defer ahs.Unlock()
|
||||||
|
// Heal sequence explicitly stopped, remove it.
|
||||||
|
delete(ahs.healSeqMap, path)
|
||||||
|
}
|
||||||
|
|
||||||
|
b, err := json.Marshal(&hsp)
|
||||||
|
return b, toAdminAPIErrCode(err)
|
||||||
|
}
|
||||||
|
|
||||||
// LaunchNewHealSequence - launches a background routine that performs
|
// LaunchNewHealSequence - launches a background routine that performs
|
||||||
// healing according to the healSequence argument. For each heal
|
// healing according to the healSequence argument. For each heal
|
||||||
// sequence, state is stored in the `globalAllHealState`, which is a
|
// sequence, state is stored in the `globalAllHealState`, which is a
|
||||||
|
@ -143,20 +173,20 @@ func (ahs *allHealState) LaunchNewHealSequence(h *healSequence) (
|
||||||
existsAndLive = true
|
existsAndLive = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if existsAndLive {
|
if existsAndLive {
|
||||||
// A heal sequence exists on the given path.
|
// A heal sequence exists on the given path.
|
||||||
if h.forceStarted {
|
if h.forceStarted {
|
||||||
// stop the running heal sequence - wait for
|
// stop the running heal sequence - wait for it to finish.
|
||||||
// it to finish.
|
|
||||||
he.stop()
|
he.stop()
|
||||||
for !he.hasEnded() {
|
for !he.hasEnded() {
|
||||||
time.Sleep(10 * time.Second)
|
time.Sleep(1 * time.Second)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
errMsg = "Heal is already running on the given path " +
|
errMsg = "Heal is already running on the given path " +
|
||||||
"(use force-start option to stop and start afresh). " +
|
"(use force-start option to stop and start afresh). " +
|
||||||
fmt.Sprintf("The heal was started by IP %s at %s",
|
fmt.Sprintf("The heal was started by IP %s at %s, token is %s",
|
||||||
h.clientAddress, h.startTime)
|
h.clientAddress, h.startTime.Format(http.TimeFormat), h.clientToken)
|
||||||
|
|
||||||
return nil, ErrHealAlreadyRunning, errMsg
|
return nil, ErrHealAlreadyRunning, errMsg
|
||||||
}
|
}
|
||||||
|
@ -285,7 +315,7 @@ type healSequence struct {
|
||||||
// bucket, and prefix on which heal seq. was initiated
|
// bucket, and prefix on which heal seq. was initiated
|
||||||
bucket, objPrefix string
|
bucket, objPrefix string
|
||||||
|
|
||||||
// path is just bucket + "/" + objPrefix
|
// path is just pathJoin(bucket, objPrefix)
|
||||||
path string
|
path string
|
||||||
|
|
||||||
// time at which heal sequence was started
|
// time at which heal sequence was started
|
||||||
|
@ -330,7 +360,7 @@ func newHealSequence(bucket, objPrefix, clientAddr string,
|
||||||
return &healSequence{
|
return &healSequence{
|
||||||
bucket: bucket,
|
bucket: bucket,
|
||||||
objPrefix: objPrefix,
|
objPrefix: objPrefix,
|
||||||
path: bucket + "/" + objPrefix,
|
path: pathJoin(bucket, objPrefix),
|
||||||
startTime: UTCNow(),
|
startTime: UTCNow(),
|
||||||
clientToken: mustGetUUID(),
|
clientToken: mustGetUUID(),
|
||||||
clientAddress: clientAddr,
|
clientAddress: clientAddr,
|
||||||
|
@ -552,7 +582,7 @@ func (h *healSequence) healConfig() error {
|
||||||
// before proceeding to heal
|
// before proceeding to heal
|
||||||
waitCount := 60
|
waitCount := 60
|
||||||
// Any requests in progress, delay the heal.
|
// Any requests in progress, delay the heal.
|
||||||
for globalHTTPServer.GetRequestCount() > 0 && waitCount > 0 {
|
for globalHTTPServer.GetRequestCount() > 2 && waitCount > 0 {
|
||||||
waitCount--
|
waitCount--
|
||||||
time.Sleep(1 * time.Second)
|
time.Sleep(1 * time.Second)
|
||||||
}
|
}
|
||||||
|
@ -698,7 +728,7 @@ func (h *healSequence) healBucket(bucket string) error {
|
||||||
// before proceeding to heal
|
// before proceeding to heal
|
||||||
waitCount := 60
|
waitCount := 60
|
||||||
// Any requests in progress, delay the heal.
|
// Any requests in progress, delay the heal.
|
||||||
for globalHTTPServer.GetRequestCount() > 0 && waitCount > 0 {
|
for globalHTTPServer.GetRequestCount() > 2 && waitCount > 0 {
|
||||||
waitCount--
|
waitCount--
|
||||||
time.Sleep(1 * time.Second)
|
time.Sleep(1 * time.Second)
|
||||||
}
|
}
|
||||||
|
|
|
@ -608,7 +608,7 @@ func writeCustomErrorResponseJSON(w http.ResponseWriter, errorCode APIErrorCode,
|
||||||
Code: apiError.Code,
|
Code: apiError.Code,
|
||||||
Message: errBody,
|
Message: errBody,
|
||||||
Resource: reqURL.Path,
|
Resource: reqURL.Path,
|
||||||
RequestID: "3L137",
|
RequestID: w.Header().Get(responseRequestIDKey),
|
||||||
HostID: "3L137",
|
HostID: "3L137",
|
||||||
}
|
}
|
||||||
encodedErrorResponse := encodeResponseJSON(errorResponse)
|
encodedErrorResponse := encodeResponseJSON(errorResponse)
|
||||||
|
|
|
@ -208,7 +208,7 @@ Fetches information for all cluster nodes, such as server properties, storage in
|
||||||
## 6. Heal operations
|
## 6. Heal operations
|
||||||
|
|
||||||
<a name="Heal"></a>
|
<a name="Heal"></a>
|
||||||
### Heal(bucket, prefix string, healOpts HealOpts, clientToken string, forceStart bool) (start HealStartSuccess, status HealTaskStatus, err error)
|
### Heal(bucket, prefix string, healOpts HealOpts, clientToken string, forceStart bool, forceStop bool) (start HealStartSuccess, status HealTaskStatus, err error)
|
||||||
|
|
||||||
Start a heal sequence that scans data under given (possible empty)
|
Start a heal sequence that scans data under given (possible empty)
|
||||||
`bucket` and `prefix`. The `recursive` bool turns on recursive
|
`bucket` and `prefix`. The `recursive` bool turns on recursive
|
||||||
|
@ -232,7 +232,8 @@ __Example__
|
||||||
DryRun: false,
|
DryRun: false,
|
||||||
}
|
}
|
||||||
forceStart := false
|
forceStart := false
|
||||||
healPath, err := madmClnt.Heal("", "", opts, "", forceStart)
|
forceStop := false
|
||||||
|
healPath, err := madmClnt.Heal("", "", opts, "", forceStart, forceStop)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalln(err)
|
log.Fatalln(err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,6 +40,10 @@ type HealStartSuccess struct {
|
||||||
StartTime time.Time `json:"startTime"`
|
StartTime time.Time `json:"startTime"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HealStopSuccess - holds information about a successfully stopped
|
||||||
|
// heal operation.
|
||||||
|
type HealStopSuccess HealStartSuccess
|
||||||
|
|
||||||
// HealTaskStatus - status struct for a heal task
|
// HealTaskStatus - status struct for a heal task
|
||||||
type HealTaskStatus struct {
|
type HealTaskStatus struct {
|
||||||
Summary string `json:"summary"`
|
Summary string `json:"summary"`
|
||||||
|
@ -176,10 +180,17 @@ func (hri *HealResultItem) GetOnlineCounts() (b, a int) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Heal - API endpoint to start heal and to fetch status
|
// Heal - API endpoint to start heal and to fetch status
|
||||||
|
// forceStart and forceStop are mutually exclusive, you can either
|
||||||
|
// set one of them to 'true'. If both are set 'forceStart' will be
|
||||||
|
// honored.
|
||||||
func (adm *AdminClient) Heal(bucket, prefix string, healOpts HealOpts,
|
func (adm *AdminClient) Heal(bucket, prefix string, healOpts HealOpts,
|
||||||
clientToken string, forceStart bool) (
|
clientToken string, forceStart, forceStop bool) (
|
||||||
healStart HealStartSuccess, healTaskStatus HealTaskStatus, err error) {
|
healStart HealStartSuccess, healTaskStatus HealTaskStatus, err error) {
|
||||||
|
|
||||||
|
if forceStart && forceStop {
|
||||||
|
return healStart, healTaskStatus, ErrInvalidArgument("forceStart and forceStop set to true is not allowed")
|
||||||
|
}
|
||||||
|
|
||||||
body, err := json.Marshal(healOpts)
|
body, err := json.Marshal(healOpts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return healStart, healTaskStatus, err
|
return healStart, healTaskStatus, err
|
||||||
|
@ -196,8 +207,12 @@ func (adm *AdminClient) Heal(bucket, prefix string, healOpts HealOpts,
|
||||||
queryVals.Set("clientToken", clientToken)
|
queryVals.Set("clientToken", clientToken)
|
||||||
body = []byte{}
|
body = []byte{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Anyone can be set, either force start or forceStop.
|
||||||
if forceStart {
|
if forceStart {
|
||||||
queryVals.Set("forceStart", "true")
|
queryVals.Set("forceStart", "true")
|
||||||
|
} else if forceStop {
|
||||||
|
queryVals.Set("forceStop", "true")
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := adm.executeMethod("POST", requestData{
|
resp, err := adm.executeMethod("POST", requestData{
|
||||||
|
@ -221,9 +236,24 @@ func (adm *AdminClient) Heal(bucket, prefix string, healOpts HealOpts,
|
||||||
|
|
||||||
// Was it a status request?
|
// Was it a status request?
|
||||||
if clientToken == "" {
|
if clientToken == "" {
|
||||||
|
// As a special operation forceStop would return a
|
||||||
|
// similar struct as healStart will have the
|
||||||
|
// heal sequence information about the heal which
|
||||||
|
// was stopped.
|
||||||
err = json.Unmarshal(respBytes, &healStart)
|
err = json.Unmarshal(respBytes, &healStart)
|
||||||
} else {
|
} else {
|
||||||
err = json.Unmarshal(respBytes, &healTaskStatus)
|
err = json.Unmarshal(respBytes, &healTaskStatus)
|
||||||
}
|
}
|
||||||
return healStart, healTaskStatus, err
|
if err != nil {
|
||||||
|
// May be the server responded with error after success
|
||||||
|
// message, handle it separately here.
|
||||||
|
var errResp ErrorResponse
|
||||||
|
err = json.Unmarshal(respBytes, &errResp)
|
||||||
|
if err != nil {
|
||||||
|
// Unknown structure return error anyways.
|
||||||
|
return healStart, healTaskStatus, err
|
||||||
|
}
|
||||||
|
return healStart, healTaskStatus, errResp
|
||||||
|
}
|
||||||
|
return healStart, healTaskStatus, nil
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue