fix: rename READY deadline to CLUSTER deadline ENV (#10535)

This commit is contained in:
Harshavardhana 2020-09-23 09:14:33 -07:00 committed by GitHub
parent eec69d6796
commit 8b74a72b21
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 40 additions and 39 deletions

View File

@ -31,17 +31,23 @@ import (
const ( const (
apiRequestsMax = "requests_max" apiRequestsMax = "requests_max"
apiRequestsDeadline = "requests_deadline" apiRequestsDeadline = "requests_deadline"
apiReadyDeadline = "ready_deadline" apiClusterDeadline = "cluster_deadline"
apiCorsAllowOrigin = "cors_allow_origin" apiCorsAllowOrigin = "cors_allow_origin"
apiRemoteTransportDeadline = "remote_transport_deadline" apiRemoteTransportDeadline = "remote_transport_deadline"
EnvAPIRequestsMax = "MINIO_API_REQUESTS_MAX" EnvAPIRequestsMax = "MINIO_API_REQUESTS_MAX"
EnvAPIRequestsDeadline = "MINIO_API_REQUESTS_DEADLINE" EnvAPIRequestsDeadline = "MINIO_API_REQUESTS_DEADLINE"
EnvAPIReadyDeadline = "MINIO_API_READY_DEADLINE" EnvAPIClusterDeadline = "MINIO_API_CLUSTER_DEADLINE"
EnvAPICorsAllowOrigin = "MINIO_API_CORS_ALLOW_ORIGIN" EnvAPICorsAllowOrigin = "MINIO_API_CORS_ALLOW_ORIGIN"
EnvAPIRemoteTransportDeadline = "MINIO_API_REMOTE_TRANSPORT_DEADLINE" EnvAPIRemoteTransportDeadline = "MINIO_API_REMOTE_TRANSPORT_DEADLINE"
) )
// Deprecated key and ENVs
const (
apiReadyDeadline = "ready_deadline"
EnvAPIReadyDeadline = "MINIO_API_READY_DEADLINE"
)
// DefaultKVS - default storage class config // DefaultKVS - default storage class config
var ( var (
DefaultKVS = config.KVS{ DefaultKVS = config.KVS{
@ -54,7 +60,7 @@ var (
Value: "10s", Value: "10s",
}, },
config.KV{ config.KV{
Key: apiReadyDeadline, Key: apiClusterDeadline,
Value: "10s", Value: "10s",
}, },
config.KV{ config.KV{
@ -72,7 +78,7 @@ var (
type Config struct { type Config struct {
RequestsMax int `json:"requests_max"` RequestsMax int `json:"requests_max"`
RequestsDeadline time.Duration `json:"requests_deadline"` RequestsDeadline time.Duration `json:"requests_deadline"`
ReadyDeadline time.Duration `json:"ready_deadline"` ClusterDeadline time.Duration `json:"cluster_deadline"`
CorsAllowOrigin []string `json:"cors_allow_origin"` CorsAllowOrigin []string `json:"cors_allow_origin"`
RemoteTransportDeadline time.Duration `json:"remote_transport_deadline"` RemoteTransportDeadline time.Duration `json:"remote_transport_deadline"`
} }
@ -90,6 +96,9 @@ func (sCfg *Config) UnmarshalJSON(data []byte) error {
// LookupConfig - lookup api config and override with valid environment settings if any. // LookupConfig - lookup api config and override with valid environment settings if any.
func LookupConfig(kvs config.KVS) (cfg Config, err error) { func LookupConfig(kvs config.KVS) (cfg Config, err error) {
// remove this since we have removed this already.
kvs.Delete(apiReadyDeadline)
if err = config.CheckValidKeys(config.APISubSys, kvs, DefaultKVS); err != nil { if err = config.CheckValidKeys(config.APISubSys, kvs, DefaultKVS); err != nil {
return cfg, err return cfg, err
} }
@ -109,7 +118,7 @@ func LookupConfig(kvs config.KVS) (cfg Config, err error) {
return cfg, err return cfg, err
} }
readyDeadline, err := time.ParseDuration(env.Get(EnvAPIReadyDeadline, kvs.Get(apiReadyDeadline))) clusterDeadline, err := time.ParseDuration(env.Get(EnvAPIClusterDeadline, kvs.Get(apiClusterDeadline)))
if err != nil { if err != nil {
return cfg, err return cfg, err
} }
@ -124,7 +133,7 @@ func LookupConfig(kvs config.KVS) (cfg Config, err error) {
return Config{ return Config{
RequestsMax: requestsMax, RequestsMax: requestsMax,
RequestsDeadline: requestsDeadline, RequestsDeadline: requestsDeadline,
ReadyDeadline: readyDeadline, ClusterDeadline: clusterDeadline,
CorsAllowOrigin: corsAllowOrigin, CorsAllowOrigin: corsAllowOrigin,
RemoteTransportDeadline: remoteTransportDeadline, RemoteTransportDeadline: remoteTransportDeadline,
}, nil }, nil

View File

@ -267,6 +267,16 @@ func (kvs KVS) Get(key string) string {
return "" return ""
} }
// Delete - deletes the key if present from the KV list.
func (kvs *KVS) Delete(key string) {
for i, kv := range *kvs {
if kv.Key == key {
*kvs = append((*kvs)[:i], (*kvs)[i+1:]...)
return
}
}
}
// Lookup - lookup a key in a list of KVS // Lookup - lookup a key in a list of KVS
func (kvs KVS) Lookup(key string) (string, bool) { func (kvs KVS) Lookup(key string) (string, bool) {
for _, kv := range kvs { for _, kv := range kvs {

View File

@ -31,7 +31,7 @@ type apiConfig struct {
requestsDeadline time.Duration requestsDeadline time.Duration
requestsPool chan struct{} requestsPool chan struct{}
readyDeadline time.Duration clusterDeadline time.Duration
corsAllowOrigins []string corsAllowOrigins []string
} }
@ -39,7 +39,7 @@ func (t *apiConfig) init(cfg api.Config, setDriveCount int) {
t.mu.Lock() t.mu.Lock()
defer t.mu.Unlock() defer t.mu.Unlock()
t.readyDeadline = cfg.ReadyDeadline t.clusterDeadline = cfg.ClusterDeadline
t.corsAllowOrigins = cfg.CorsAllowOrigin t.corsAllowOrigins = cfg.CorsAllowOrigin
var apiRequestsMaxPerNode int var apiRequestsMaxPerNode int
@ -74,15 +74,15 @@ func (t *apiConfig) getCorsAllowOrigins() []string {
return corsAllowOrigins return corsAllowOrigins
} }
func (t *apiConfig) getReadyDeadline() time.Duration { func (t *apiConfig) getClusterDeadline() time.Duration {
t.mu.RLock() t.mu.RLock()
defer t.mu.RUnlock() defer t.mu.RUnlock()
if t.readyDeadline == 0 { if t.clusterDeadline == 0 {
return 10 * time.Second return 10 * time.Second
} }
return t.readyDeadline return t.clusterDeadline
} }
func (t *apiConfig) getRequestsPool() (chan struct{}, <-chan time.Time) { func (t *apiConfig) getRequestsPool() (chan struct{}, <-chan time.Time) {

View File

@ -33,7 +33,7 @@ func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) {
return return
} }
ctx, cancel := context.WithTimeout(ctx, globalAPIConfig.getReadyDeadline()) ctx, cancel := context.WithTimeout(ctx, globalAPIConfig.getClusterDeadline())
defer cancel() defer cancel()
opts := HealthOptions{Maintenance: r.URL.Query().Get("maintenance") == "true"} opts := HealthOptions{Maintenance: r.URL.Query().Get("maintenance") == "true"}

View File

@ -4,10 +4,10 @@ MinIO server exposes monitoring data over endpoints. Monitoring tools can pick t
### Healthcheck Probe ### Healthcheck Probe
MinIO server has two healthcheck related un-authenticated endpoints, a liveness probe to indicate if server is working fine and a readiness probe to indicate if server is not accepting connections due to heavy load. MinIO server has two healthcheck related un-authenticated endpoints, a liveness probe to indicate if server is responding, cluster probe to check if server can be taken down for maintenance.
- Liveness probe available at `/minio/health/live` - Liveness probe available at `/minio/health/live`
- Readiness probe available at `/minio/health/ready` - Cluster probe available at `/minio/health/cluster`
Read more on how to use these endpoints in [MinIO healthcheck guide](https://github.com/minio/minio/blob/master/docs/metrics/healthcheck/README.md). Read more on how to use these endpoints in [MinIO healthcheck guide](https://github.com/minio/minio/blob/master/docs/metrics/healthcheck/README.md).

View File

@ -1,6 +1,6 @@
## MinIO Healthcheck ## MinIO Healthcheck
MinIO server exposes three un-authenticated, healthcheck endpoints liveness probe, readiness probe and a cluster probe at `/minio/health/live`, `/minio/health/ready` and `/minio/health/cluster` respectively. MinIO server exposes three un-authenticated, healthcheck endpoints liveness probe and a cluster probe at `/minio/health/live`, `/minio/health/ready` and `/minio/health/cluster` respectively.
### Liveness probe ### Liveness probe
@ -12,31 +12,13 @@ This probe always responds with '200 OK'. When liveness probe fails, Kubernetes
path: /minio/health/live path: /minio/health/live
port: 9000 port: 9000
scheme: HTTP scheme: HTTP
initialDelaySeconds: 3 initialDelaySeconds: 120
periodSeconds: 1 periodSeconds: 15
timeoutSeconds: 1 timeoutSeconds: 10
successThreshold: 1 successThreshold: 1
failureThreshold: 3 failureThreshold: 3
``` ```
### Readiness probe
This probe always responds with '200 OK'. When readiness probe fails, Kubernetes like platforms *do not* forward traffic to a pod.
```
readinessProbe:
httpGet:
path: /minio/health/ready
port: 9000
scheme: HTTP
initialDelaySeconds: 3
periodSeconds: 1
timeoutSeconds: 1
successThreshold: 1
failureThreshold: 3
```
### Cluster probe ### Cluster probe
This probe is not useful in almost all cases, this is meant for administrators to see if quorum is available in any given cluster. The reply is '200 OK' if cluster has quorum if not it returns '503 Service Unavailable'. This probe is not useful in almost all cases, this is meant for administrators to see if quorum is available in any given cluster. The reply is '200 OK' if cluster has quorum if not it returns '503 Service Unavailable'.
@ -49,6 +31,7 @@ Content-Security-Policy: block-all-mixed-content
Server: MinIO/GOGET.GOGET Server: MinIO/GOGET.GOGET
Vary: Origin Vary: Origin
X-Amz-Bucket-Region: us-east-1 X-Amz-Bucket-Region: us-east-1
X-Minio-Write-Quorum: 3
X-Amz-Request-Id: 16239D6AB80EBECF X-Amz-Request-Id: 16239D6AB80EBECF
X-Xss-Protection: 1; mode=block X-Xss-Protection: 1; mode=block
Date: Tue, 21 Jul 2020 00:36:14 GMT Date: Tue, 21 Jul 2020 00:36:14 GMT
@ -68,5 +51,6 @@ Vary: Origin
X-Amz-Bucket-Region: us-east-1 X-Amz-Bucket-Region: us-east-1
X-Amz-Request-Id: 16239D63820C6E76 X-Amz-Request-Id: 16239D63820C6E76
X-Xss-Protection: 1; mode=block X-Xss-Protection: 1; mode=block
X-Minio-Write-Quorum: 3
Date: Tue, 21 Jul 2020 00:35:43 GMT Date: Tue, 21 Jul 2020 00:35:43 GMT
``` ```

View File

@ -12,9 +12,7 @@ There are multiple options to deploy MinIO on Kubernetes:
## Monitoring MinIO in Kubernetes ## Monitoring MinIO in Kubernetes
MinIO server exposes un-authenticated readiness and liveness endpoints so Kubernetes can natively identify unhealthy MinIO containers. MinIO also exposes Prometheus compatible data on a different endpoint to enable Prometheus users to natively monitor their MinIO deployments. MinIO server exposes un-authenticated liveness endpoints so Kubernetes can natively identify unhealthy MinIO containers. MinIO also exposes Prometheus compatible data on a different endpoint to enable Prometheus users to natively monitor their MinIO deployments.
_Note_ : Readiness check is not allowed in distributed MinIO deployment. This is because Kubernetes doesn't allow any traffic to containers whose Readiness checks fail, and in a distributed setup, MinIO server can't respond to Readiness checks until all the nodes are reachable. So, Liveness checks are recommended native Kubernetes monitoring approach for distributed MinIO StatefulSets. Read more about Kubernetes recommendations for [container probes](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-probes).
## Explore Further ## Explore Further