mirror of
https://github.com/minio/minio.git
synced 2025-01-26 14:13:16 -05:00
read-health check endpoint returns success if cluster can serve read requests (#11310)
This commit is contained in:
parent
3d74efa6b1
commit
876b79b8d8
@ -1461,6 +1461,40 @@ type HealthResult struct {
|
||||
WriteQuorum int
|
||||
}
|
||||
|
||||
// ReadHealth returns if the cluster can serve read requests
|
||||
func (z *erasureServerPools) ReadHealth(ctx context.Context) bool {
|
||||
erasureSetUpCount := make([][]int, len(z.serverPools))
|
||||
for i := range z.serverPools {
|
||||
erasureSetUpCount[i] = make([]int, len(z.serverPools[i].sets))
|
||||
}
|
||||
|
||||
diskIDs := globalNotificationSys.GetLocalDiskIDs(ctx)
|
||||
diskIDs = append(diskIDs, getLocalDiskIDs(z))
|
||||
|
||||
for _, localDiskIDs := range diskIDs {
|
||||
for _, id := range localDiskIDs {
|
||||
poolIdx, setIdx, err := z.getPoolAndSet(id)
|
||||
if err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
continue
|
||||
}
|
||||
erasureSetUpCount[poolIdx][setIdx]++
|
||||
}
|
||||
}
|
||||
|
||||
b := z.BackendInfo()
|
||||
readQuorum := b.StandardSCData[0]
|
||||
|
||||
for poolIdx := range erasureSetUpCount {
|
||||
for setIdx := range erasureSetUpCount[poolIdx] {
|
||||
if erasureSetUpCount[poolIdx][setIdx] < readQuorum {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Health - returns current status of the object layer health,
|
||||
// provides if write access exists across sets, additionally
|
||||
// can be used to query scenarios if health may be lost
|
||||
|
@ -1623,3 +1623,9 @@ func (fs *FSObjects) Health(ctx context.Context, opts HealthOptions) HealthResul
|
||||
Healthy: newObjectLayerFn() != nil,
|
||||
}
|
||||
}
|
||||
|
||||
// ReadHealth returns "read" health of the object layer
|
||||
func (fs *FSObjects) ReadHealth(ctx context.Context) bool {
|
||||
_, err := os.Stat(fs.fsPath)
|
||||
return err == nil
|
||||
}
|
||||
|
@ -254,3 +254,8 @@ func (a GatewayUnsupported) IsCompressionSupported() bool {
|
||||
func (a GatewayUnsupported) Health(_ context.Context, _ HealthOptions) HealthResult {
|
||||
return HealthResult{}
|
||||
}
|
||||
|
||||
// ReadHealth - No Op.
|
||||
func (a GatewayUnsupported) ReadHealth(_ context.Context) bool {
|
||||
return true
|
||||
}
|
||||
|
@ -216,7 +216,8 @@ func guessIsHealthCheckReq(req *http.Request) bool {
|
||||
return aType == authTypeAnonymous && (req.Method == http.MethodGet || req.Method == http.MethodHead) &&
|
||||
(req.URL.Path == healthCheckPathPrefix+healthCheckLivenessPath ||
|
||||
req.URL.Path == healthCheckPathPrefix+healthCheckReadinessPath ||
|
||||
req.URL.Path == healthCheckPathPrefix+healthCheckClusterPath)
|
||||
req.URL.Path == healthCheckPathPrefix+healthCheckClusterPath ||
|
||||
req.URL.Path == healthCheckPathPrefix+healthCheckClusterReadPath)
|
||||
}
|
||||
|
||||
// guessIsMetricsReq - returns true if incoming request looks
|
||||
|
@ -64,6 +64,29 @@ func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) {
|
||||
writeResponse(w, http.StatusOK, nil, mimeNone)
|
||||
}
|
||||
|
||||
// ClusterReadCheckHandler returns if the server is ready for requests.
|
||||
func ClusterReadCheckHandler(w http.ResponseWriter, r *http.Request) {
|
||||
ctx := newContext(r, w, "ClusterReadCheckHandler")
|
||||
|
||||
if shouldProxy() {
|
||||
w.Header().Set(xhttp.MinIOServerStatus, unavailable)
|
||||
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
|
||||
return
|
||||
}
|
||||
|
||||
objLayer := newObjectLayerFn()
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, globalAPIConfig.getClusterDeadline())
|
||||
defer cancel()
|
||||
|
||||
result := objLayer.ReadHealth(ctx)
|
||||
if !result {
|
||||
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
|
||||
return
|
||||
}
|
||||
writeResponse(w, http.StatusOK, nil, mimeNone)
|
||||
}
|
||||
|
||||
// ReadinessCheckHandler Checks if the process is up. Always returns success.
|
||||
func ReadinessCheckHandler(w http.ResponseWriter, r *http.Request) {
|
||||
if shouldProxy() {
|
||||
|
@ -23,11 +23,12 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
healthCheckPath = "/health"
|
||||
healthCheckLivenessPath = "/live"
|
||||
healthCheckReadinessPath = "/ready"
|
||||
healthCheckClusterPath = "/cluster"
|
||||
healthCheckPathPrefix = minioReservedBucketPath + healthCheckPath
|
||||
healthCheckPath = "/health"
|
||||
healthCheckLivenessPath = "/live"
|
||||
healthCheckReadinessPath = "/ready"
|
||||
healthCheckClusterPath = "/cluster"
|
||||
healthCheckClusterReadPath = "/cluster/read"
|
||||
healthCheckPathPrefix = minioReservedBucketPath + healthCheckPath
|
||||
)
|
||||
|
||||
// registerHealthCheckRouter - add handler functions for liveness and readiness routes.
|
||||
@ -38,6 +39,7 @@ func registerHealthCheckRouter(router *mux.Router) {
|
||||
|
||||
// Cluster check handler to verify cluster is active
|
||||
healthRouter.Methods(http.MethodGet).Path(healthCheckClusterPath).HandlerFunc(httpTraceAll(ClusterCheckHandler))
|
||||
healthRouter.Methods(http.MethodGet).Path(healthCheckClusterReadPath).HandlerFunc(httpTraceAll(ClusterReadCheckHandler))
|
||||
|
||||
// Liveness handler
|
||||
healthRouter.Methods(http.MethodGet).Path(healthCheckLivenessPath).HandlerFunc(httpTraceAll(LivenessCheckHandler))
|
||||
|
@ -155,6 +155,7 @@ type ObjectLayer interface {
|
||||
|
||||
// Returns health of the backend
|
||||
Health(ctx context.Context, opts HealthOptions) HealthResult
|
||||
ReadHealth(ctx context.Context) bool
|
||||
|
||||
// ObjectTagging operations
|
||||
PutObjectTags(context.Context, string, string, string, ObjectOptions) (ObjectInfo, error)
|
||||
|
@ -20,7 +20,8 @@ livenessProbe:
|
||||
```
|
||||
|
||||
### Cluster probe
|
||||
This probe is not useful in almost all cases, this is meant for administrators to see if quorum is available in any given cluster. The reply is '200 OK' if cluster has quorum if not it returns '503 Service Unavailable'.
|
||||
#### Cluster-writeable probe
|
||||
This probe is not useful in almost all cases, this is meant for administrators to see if write quorum is available in any given cluster. The reply is '200 OK' if cluster has write quorum if not it returns '503 Service Unavailable'.
|
||||
|
||||
```
|
||||
curl http://minio1:9001/minio/health/cluster
|
||||
@ -37,6 +38,24 @@ X-Xss-Protection: 1; mode=block
|
||||
Date: Tue, 21 Jul 2020 00:36:14 GMT
|
||||
```
|
||||
|
||||
#### Clustr-readable probe
|
||||
This probe is not useful in almost all cases, this is meant for administrators to see if read quorum is available in any given cluster. The reply is '200 OK' if cluster has read quorum if not it returns '503 Service Unavailable'.
|
||||
|
||||
```
|
||||
curl http://minio1:9001/minio/health/cluster/read
|
||||
HTTP/1.1 503 Service Unavailable
|
||||
Accept-Ranges: bytes
|
||||
Content-Length: 0
|
||||
Content-Security-Policy: block-all-mixed-content
|
||||
Server: MinIO/GOGET.GOGET
|
||||
Vary: Origin
|
||||
X-Amz-Bucket-Region: us-east-1
|
||||
X-Minio-Write-Quorum: 3
|
||||
X-Amz-Request-Id: 16239D6AB80EBECF
|
||||
X-Xss-Protection: 1; mode=block
|
||||
Date: Tue, 21 Jul 2020 00:36:14 GMT
|
||||
```
|
||||
|
||||
#### Checking cluster health for maintenance
|
||||
You may query the cluster probe endpoint to check if the node which received the request can be taken down for maintenance, if the server replies back '412 Precondition Failed' this means you will lose HA. '200 OK' means you are okay to proceed.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user