mirror of
				https://github.com/minio/minio.git
				synced 2025-10-29 15:55:00 -04:00 
			
		
		
		
	Add forceStop flag to provide facility to stop healing (#6718)
This PR also makes sure that we deal with HTTP request count by ignoring the on-going heal operation, i.e do not wait on itself.
This commit is contained in:
		
							parent
							
								
									bef0318c36
								
							
						
					
					
						commit
						a9cda850ca
					
				| @ -53,9 +53,10 @@ type mgmtQueryKey string | ||||
| // Only valid query params for mgmt admin APIs. | ||||
| const ( | ||||
| 	mgmtBucket      mgmtQueryKey = "bucket" | ||||
| 	mgmtPrefix      mgmtQueryKey = "prefix" | ||||
| 	mgmtClientToken mgmtQueryKey = "clientToken" | ||||
| 	mgmtForceStart  mgmtQueryKey = "forceStart" | ||||
| 	mgmtPrefix                   = "prefix" | ||||
| 	mgmtClientToken              = "clientToken" | ||||
| 	mgmtForceStart               = "forceStart" | ||||
| 	mgmtForceStop                = "forceStop" | ||||
| ) | ||||
| 
 | ||||
| var ( | ||||
| @ -402,7 +403,7 @@ func (a adminAPIHandlers) DownloadProfilingHandler(w http.ResponseWriter, r *htt | ||||
| 
 | ||||
| // extractHealInitParams - Validates params for heal init API. | ||||
| func extractHealInitParams(r *http.Request) (bucket, objPrefix string, | ||||
| 	hs madmin.HealOpts, clientToken string, forceStart bool, | ||||
| 	hs madmin.HealOpts, clientToken string, forceStart bool, forceStop bool, | ||||
| 	err APIErrorCode) { | ||||
| 
 | ||||
| 	vars := mux.Vars(r) | ||||
| @ -433,7 +434,9 @@ func extractHealInitParams(r *http.Request) (bucket, objPrefix string, | ||||
| 	if _, ok := qParms[string(mgmtForceStart)]; ok { | ||||
| 		forceStart = true | ||||
| 	} | ||||
| 
 | ||||
| 	if _, ok := qParms[string(mgmtForceStop)]; ok { | ||||
| 		forceStop = true | ||||
| 	} | ||||
| 	// ignore body if clientToken is provided | ||||
| 	if clientToken == "" { | ||||
| 		jerr := json.NewDecoder(r.Body).Decode(&hs) | ||||
| @ -484,7 +487,7 @@ func (a adminAPIHandlers) HealHandler(w http.ResponseWriter, r *http.Request) { | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	bucket, objPrefix, hs, clientToken, forceStart, apiErr := extractHealInitParams(r) | ||||
| 	bucket, objPrefix, hs, clientToken, forceStart, forceStop, apiErr := extractHealInitParams(r) | ||||
| 	if apiErr != ErrNone { | ||||
| 		writeErrorResponseJSON(w, apiErr, r.URL) | ||||
| 		return | ||||
| @ -518,13 +521,35 @@ func (a adminAPIHandlers) HealHandler(w http.ResponseWriter, r *http.Request) { | ||||
| 				w.Write([]byte("\n\r")) | ||||
| 				w.(http.Flusher).Flush() | ||||
| 			case hr := <-respCh: | ||||
| 				switch { | ||||
| 				case hr.errCode == ErrNone: | ||||
| 					writeSuccessResponseJSON(w, hr.respBytes) | ||||
| 				case hr.errBody == "": | ||||
| 					writeErrorResponseJSON(w, hr.errCode, r.URL) | ||||
| 				switch hr.errCode { | ||||
| 				case ErrNone: | ||||
| 					if started { | ||||
| 						w.Write(hr.respBytes) | ||||
| 						w.(http.Flusher).Flush() | ||||
| 					} else { | ||||
| 						writeSuccessResponseJSON(w, hr.respBytes) | ||||
| 					} | ||||
| 				default: | ||||
| 					writeCustomErrorResponseJSON(w, hr.errCode, hr.errBody, r.URL) | ||||
| 					apiError := getAPIError(hr.errCode) | ||||
| 					var errorRespJSON []byte | ||||
| 					if hr.errBody == "" { | ||||
| 						errorRespJSON = encodeResponseJSON(getAPIErrorResponse(apiError, r.URL.Path, w.Header().Get(responseRequestIDKey))) | ||||
| 					} else { | ||||
| 						errorRespJSON = encodeResponseJSON(APIErrorResponse{ | ||||
| 							Code:      apiError.Code, | ||||
| 							Message:   hr.errBody, | ||||
| 							Resource:  r.URL.Path, | ||||
| 							RequestID: w.Header().Get(responseRequestIDKey), | ||||
| 							HostID:    "3L137", | ||||
| 						}) | ||||
| 					} | ||||
| 					if !started { | ||||
| 						setCommonHeaders(w) | ||||
| 						w.Header().Set("Content-Type", string(mimeJSON)) | ||||
| 						w.WriteHeader(apiError.HTTPStatusCode) | ||||
| 					} | ||||
| 					w.Write(errorRespJSON) | ||||
| 					w.(http.Flusher).Flush() | ||||
| 				} | ||||
| 				break forLoop | ||||
| 			} | ||||
| @ -535,34 +560,61 @@ func (a adminAPIHandlers) HealHandler(w http.ResponseWriter, r *http.Request) { | ||||
| 	info := objLayer.StorageInfo(ctx) | ||||
| 	numDisks := info.Backend.OfflineDisks + info.Backend.OnlineDisks | ||||
| 
 | ||||
| 	if clientToken == "" { | ||||
| 		// Not a status request | ||||
| 		nh := newHealSequence(bucket, objPrefix, handlers.GetSourceIP(r), | ||||
| 			numDisks, hs, forceStart) | ||||
| 	healPath := pathJoin(bucket, objPrefix) | ||||
| 	if clientToken == "" && !forceStart && !forceStop { | ||||
| 		nh, exists := globalAllHealState.getHealSequence(healPath) | ||||
| 		if exists && !nh.hasEnded() && len(nh.currentStatus.Items) > 0 { | ||||
| 			b, err := json.Marshal(madmin.HealStartSuccess{ | ||||
| 				ClientToken:   nh.clientToken, | ||||
| 				ClientAddress: nh.clientAddress, | ||||
| 				StartTime:     nh.startTime, | ||||
| 			}) | ||||
| 			if err != nil { | ||||
| 				logger.LogIf(context.Background(), err) | ||||
| 				writeErrorResponse(w, toAPIErrorCode(err), r.URL) | ||||
| 				return | ||||
| 			} | ||||
| 			// Client token not specified but a heal sequence exists on a path, | ||||
| 			// Send the token back to client. | ||||
| 			writeSuccessResponseJSON(w, b) | ||||
| 			return | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 		respCh := make(chan healResp) | ||||
| 		go func() { | ||||
| 			respBytes, errCode, errMsg := globalAllHealState.LaunchNewHealSequence(nh) | ||||
| 			hr := healResp{respBytes, errCode, errMsg} | ||||
| 			respCh <- hr | ||||
| 		}() | ||||
| 
 | ||||
| 		// Due to the force-starting functionality, the Launch | ||||
| 		// call above can take a long time - to keep the | ||||
| 		// connection alive, we start sending whitespace | ||||
| 		keepConnLive(w, respCh) | ||||
| 	} else { | ||||
| 	if clientToken != "" && !forceStart && !forceStop { | ||||
| 		// Since clientToken is given, fetch heal status from running | ||||
| 		// heal sequence. | ||||
| 		path := bucket + "/" + objPrefix | ||||
| 		respBytes, errCode := globalAllHealState.PopHealStatusJSON( | ||||
| 			path, clientToken) | ||||
| 			healPath, clientToken) | ||||
| 		if errCode != ErrNone { | ||||
| 			writeErrorResponseJSON(w, errCode, r.URL) | ||||
| 		} else { | ||||
| 			writeSuccessResponseJSON(w, respBytes) | ||||
| 		} | ||||
| 		return | ||||
| 	} | ||||
| 
 | ||||
| 	respCh := make(chan healResp) | ||||
| 	switch { | ||||
| 	case forceStop: | ||||
| 		go func() { | ||||
| 			respBytes, errCode := globalAllHealState.stopHealSequence(healPath) | ||||
| 			hr := healResp{respBytes: respBytes, errCode: errCode} | ||||
| 			respCh <- hr | ||||
| 		}() | ||||
| 	case clientToken == "": | ||||
| 		nh := newHealSequence(bucket, objPrefix, handlers.GetSourceIP(r), numDisks, hs, forceStart) | ||||
| 		go func() { | ||||
| 			respBytes, errCode, errMsg := globalAllHealState.LaunchNewHealSequence(nh) | ||||
| 			hr := healResp{respBytes, errCode, errMsg} | ||||
| 			respCh <- hr | ||||
| 		}() | ||||
| 	} | ||||
| 
 | ||||
| 	// Due to the force-starting functionality, the Launch | ||||
| 	// call above can take a long time - to keep the | ||||
| 	// connection alive, we start sending whitespace | ||||
| 	keepConnLive(w, respCh) | ||||
| } | ||||
| 
 | ||||
| // GetConfigHandler - GET /minio/admin/v1/config | ||||
|  | ||||
| @ -20,6 +20,7 @@ import ( | ||||
| 	"context" | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"net/http" | ||||
| 	"runtime" | ||||
| 	"strings" | ||||
| 	"sync" | ||||
| @ -123,6 +124,35 @@ func (ahs *allHealState) getHealSequence(path string) (h *healSequence, exists b | ||||
| 	return h, exists | ||||
| } | ||||
| 
 | ||||
| func (ahs *allHealState) stopHealSequence(path string) ([]byte, APIErrorCode) { | ||||
| 	var hsp madmin.HealStopSuccess | ||||
| 	he, exists := ahs.getHealSequence(path) | ||||
| 	if !exists { | ||||
| 		hsp = madmin.HealStopSuccess{ | ||||
| 			ClientToken: "invalid", | ||||
| 			StartTime:   UTCNow(), | ||||
| 		} | ||||
| 	} else { | ||||
| 		hsp = madmin.HealStopSuccess{ | ||||
| 			ClientToken:   he.clientToken, | ||||
| 			ClientAddress: he.clientAddress, | ||||
| 			StartTime:     he.startTime, | ||||
| 		} | ||||
| 
 | ||||
| 		he.stop() | ||||
| 		for !he.hasEnded() { | ||||
| 			time.Sleep(1 * time.Second) | ||||
| 		} | ||||
| 		ahs.Lock() | ||||
| 		defer ahs.Unlock() | ||||
| 		// Heal sequence explicitly stopped, remove it. | ||||
| 		delete(ahs.healSeqMap, path) | ||||
| 	} | ||||
| 
 | ||||
| 	b, err := json.Marshal(&hsp) | ||||
| 	return b, toAdminAPIErrCode(err) | ||||
| } | ||||
| 
 | ||||
| // LaunchNewHealSequence - launches a background routine that performs | ||||
| // healing according to the healSequence argument. For each heal | ||||
| // sequence, state is stored in the `globalAllHealState`, which is a | ||||
| @ -143,20 +173,20 @@ func (ahs *allHealState) LaunchNewHealSequence(h *healSequence) ( | ||||
| 			existsAndLive = true | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if existsAndLive { | ||||
| 		// A heal sequence exists on the given path. | ||||
| 		if h.forceStarted { | ||||
| 			// stop the running heal sequence - wait for | ||||
| 			// it to finish. | ||||
| 			// stop the running heal sequence - wait for it to finish. | ||||
| 			he.stop() | ||||
| 			for !he.hasEnded() { | ||||
| 				time.Sleep(10 * time.Second) | ||||
| 				time.Sleep(1 * time.Second) | ||||
| 			} | ||||
| 		} else { | ||||
| 			errMsg = "Heal is already running on the given path " + | ||||
| 				"(use force-start option to stop and start afresh). " + | ||||
| 				fmt.Sprintf("The heal was started by IP %s at %s", | ||||
| 					h.clientAddress, h.startTime) | ||||
| 				fmt.Sprintf("The heal was started by IP %s at %s, token is %s", | ||||
| 					h.clientAddress, h.startTime.Format(http.TimeFormat), h.clientToken) | ||||
| 
 | ||||
| 			return nil, ErrHealAlreadyRunning, errMsg | ||||
| 		} | ||||
| @ -285,7 +315,7 @@ type healSequence struct { | ||||
| 	// bucket, and prefix on which heal seq. was initiated | ||||
| 	bucket, objPrefix string | ||||
| 
 | ||||
| 	// path is just bucket + "/" + objPrefix | ||||
| 	// path is just pathJoin(bucket, objPrefix) | ||||
| 	path string | ||||
| 
 | ||||
| 	// time at which heal sequence was started | ||||
| @ -330,7 +360,7 @@ func newHealSequence(bucket, objPrefix, clientAddr string, | ||||
| 	return &healSequence{ | ||||
| 		bucket:        bucket, | ||||
| 		objPrefix:     objPrefix, | ||||
| 		path:          bucket + "/" + objPrefix, | ||||
| 		path:          pathJoin(bucket, objPrefix), | ||||
| 		startTime:     UTCNow(), | ||||
| 		clientToken:   mustGetUUID(), | ||||
| 		clientAddress: clientAddr, | ||||
| @ -552,7 +582,7 @@ func (h *healSequence) healConfig() error { | ||||
| 			// before proceeding to heal | ||||
| 			waitCount := 60 | ||||
| 			// Any requests in progress, delay the heal. | ||||
| 			for globalHTTPServer.GetRequestCount() > 0 && waitCount > 0 { | ||||
| 			for globalHTTPServer.GetRequestCount() > 2 && waitCount > 0 { | ||||
| 				waitCount-- | ||||
| 				time.Sleep(1 * time.Second) | ||||
| 			} | ||||
| @ -698,7 +728,7 @@ func (h *healSequence) healBucket(bucket string) error { | ||||
| 			// before proceeding to heal | ||||
| 			waitCount := 60 | ||||
| 			// Any requests in progress, delay the heal. | ||||
| 			for globalHTTPServer.GetRequestCount() > 0 && waitCount > 0 { | ||||
| 			for globalHTTPServer.GetRequestCount() > 2 && waitCount > 0 { | ||||
| 				waitCount-- | ||||
| 				time.Sleep(1 * time.Second) | ||||
| 			} | ||||
|  | ||||
| @ -608,7 +608,7 @@ func writeCustomErrorResponseJSON(w http.ResponseWriter, errorCode APIErrorCode, | ||||
| 		Code:      apiError.Code, | ||||
| 		Message:   errBody, | ||||
| 		Resource:  reqURL.Path, | ||||
| 		RequestID: "3L137", | ||||
| 		RequestID: w.Header().Get(responseRequestIDKey), | ||||
| 		HostID:    "3L137", | ||||
| 	} | ||||
| 	encodedErrorResponse := encodeResponseJSON(errorResponse) | ||||
|  | ||||
| @ -208,7 +208,7 @@ Fetches information for all cluster nodes, such as server properties, storage in | ||||
| ## 6. Heal operations | ||||
| 
 | ||||
| <a name="Heal"></a> | ||||
| ### Heal(bucket, prefix string, healOpts HealOpts, clientToken string, forceStart bool) (start HealStartSuccess, status HealTaskStatus, err error) | ||||
| ### Heal(bucket, prefix string, healOpts HealOpts, clientToken string, forceStart bool, forceStop bool) (start HealStartSuccess, status HealTaskStatus, err error) | ||||
| 
 | ||||
| Start a heal sequence that scans data under given (possible empty) | ||||
| `bucket` and `prefix`. The `recursive` bool turns on recursive | ||||
| @ -232,7 +232,8 @@ __Example__ | ||||
|             DryRun:    false, | ||||
|     } | ||||
|     forceStart := false | ||||
|     healPath, err := madmClnt.Heal("", "", opts, "", forceStart) | ||||
|     forceStop := false | ||||
|     healPath, err := madmClnt.Heal("", "", opts, "", forceStart, forceStop) | ||||
|     if err != nil { | ||||
|         log.Fatalln(err) | ||||
|     } | ||||
|  | ||||
| @ -40,6 +40,10 @@ type HealStartSuccess struct { | ||||
| 	StartTime     time.Time `json:"startTime"` | ||||
| } | ||||
| 
 | ||||
| // HealStopSuccess - holds information about a successfully stopped | ||||
| // heal operation. | ||||
| type HealStopSuccess HealStartSuccess | ||||
| 
 | ||||
| // HealTaskStatus - status struct for a heal task | ||||
| type HealTaskStatus struct { | ||||
| 	Summary       string    `json:"summary"` | ||||
| @ -176,10 +180,17 @@ func (hri *HealResultItem) GetOnlineCounts() (b, a int) { | ||||
| } | ||||
| 
 | ||||
| // Heal - API endpoint to start heal and to fetch status | ||||
| // forceStart and forceStop are mutually exclusive, you can either | ||||
| // set one of them to 'true'. If both are set 'forceStart' will be | ||||
| // honored. | ||||
| func (adm *AdminClient) Heal(bucket, prefix string, healOpts HealOpts, | ||||
| 	clientToken string, forceStart bool) ( | ||||
| 	clientToken string, forceStart, forceStop bool) ( | ||||
| 	healStart HealStartSuccess, healTaskStatus HealTaskStatus, err error) { | ||||
| 
 | ||||
| 	if forceStart && forceStop { | ||||
| 		return healStart, healTaskStatus, ErrInvalidArgument("forceStart and forceStop set to true is not allowed") | ||||
| 	} | ||||
| 
 | ||||
| 	body, err := json.Marshal(healOpts) | ||||
| 	if err != nil { | ||||
| 		return healStart, healTaskStatus, err | ||||
| @ -196,8 +207,12 @@ func (adm *AdminClient) Heal(bucket, prefix string, healOpts HealOpts, | ||||
| 		queryVals.Set("clientToken", clientToken) | ||||
| 		body = []byte{} | ||||
| 	} | ||||
| 
 | ||||
| 	// Anyone can be set, either force start or forceStop. | ||||
| 	if forceStart { | ||||
| 		queryVals.Set("forceStart", "true") | ||||
| 	} else if forceStop { | ||||
| 		queryVals.Set("forceStop", "true") | ||||
| 	} | ||||
| 
 | ||||
| 	resp, err := adm.executeMethod("POST", requestData{ | ||||
| @ -221,9 +236,24 @@ func (adm *AdminClient) Heal(bucket, prefix string, healOpts HealOpts, | ||||
| 
 | ||||
| 	// Was it a status request? | ||||
| 	if clientToken == "" { | ||||
| 		// As a special operation forceStop would return a | ||||
| 		// similar struct as healStart will have the | ||||
| 		// heal sequence information about the heal which | ||||
| 		// was stopped. | ||||
| 		err = json.Unmarshal(respBytes, &healStart) | ||||
| 	} else { | ||||
| 		err = json.Unmarshal(respBytes, &healTaskStatus) | ||||
| 	} | ||||
| 	return healStart, healTaskStatus, err | ||||
| 	if err != nil { | ||||
| 		// May be the server responded with error after success | ||||
| 		// message, handle it separately here. | ||||
| 		var errResp ErrorResponse | ||||
| 		err = json.Unmarshal(respBytes, &errResp) | ||||
| 		if err != nil { | ||||
| 			// Unknown structure return error anyways. | ||||
| 			return healStart, healTaskStatus, err | ||||
| 		} | ||||
| 		return healStart, healTaskStatus, errResp | ||||
| 	} | ||||
| 	return healStart, healTaskStatus, nil | ||||
| } | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user