fail ready/liveness if etcd is unhealthy in gateway mode (#13146)

This commit is contained in:
Harshavardhana 2021-09-03 17:05:41 -07:00 committed by GitHub
parent 308371b434
commit 1250312287
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 41 additions and 4 deletions

View File

@ -301,9 +301,6 @@ func StartGateway(ctx *cli.Context, gw Gateway) {
logger.FatalIf(globalNotificationSys.Init(GlobalContext, buckets, newObject), "Unable to initialize notification system") logger.FatalIf(globalNotificationSys.Init(GlobalContext, buckets, newObject), "Unable to initialize notification system")
} }
// Initialize users credentials and policies in background.
globalIAMSys.InitStore(newObject)
go globalIAMSys.Init(GlobalContext, newObject) go globalIAMSys.Init(GlobalContext, newObject)
if globalCacheConfig.Enabled { if globalCacheConfig.Enabled {

View File

@ -95,6 +95,17 @@ func ReadinessCheckHandler(w http.ResponseWriter, r *http.Request) {
w.Header().Set(xhttp.MinIOServerStatus, unavailable) w.Header().Set(xhttp.MinIOServerStatus, unavailable)
} }
if globalIsGateway && globalEtcdClient != nil {
// Borrowed from https://github.com/etcd-io/etcd/blob/main/etcdctl/ctlv3/command/ep_command.go#L118
ctx, cancel := context.WithTimeout(r.Context(), defaultContextTimeout)
defer cancel()
// etcd unreachable throw an error for readiness.
if _, err := globalEtcdClient.Get(ctx, "health"); err != nil {
writeErrorResponse(r.Context(), w, toAPIError(r.Context(), err), r.URL)
return
}
}
writeResponse(w, http.StatusOK, nil, mimeNone) writeResponse(w, http.StatusOK, nil, mimeNone)
} }
@ -104,5 +115,17 @@ func LivenessCheckHandler(w http.ResponseWriter, r *http.Request) {
// Service not initialized yet // Service not initialized yet
w.Header().Set(xhttp.MinIOServerStatus, unavailable) w.Header().Set(xhttp.MinIOServerStatus, unavailable)
} }
if globalIsGateway && globalEtcdClient != nil {
// Borrowed from https://github.com/etcd-io/etcd/blob/main/etcdctl/ctlv3/command/ep_command.go#L118
ctx, cancel := context.WithTimeout(r.Context(), defaultContextTimeout)
defer cancel()
// etcd unreachable throw an error for readiness.
if _, err := globalEtcdClient.Get(ctx, "health"); err != nil {
writeErrorResponse(r.Context(), w, toAPIError(r.Context(), err), r.URL)
return
}
}
writeResponse(w, http.StatusOK, nil, mimeNone) writeResponse(w, http.StatusOK, nil, mimeNone)
} }

View File

@ -4,7 +4,7 @@ MinIO server exposes three un-authenticated, healthcheck endpoints liveness prob
### Liveness probe ### Liveness probe
This probe always responds with '200 OK'. When liveness probe fails, Kubernetes like platforms restart the container. This probe always responds with '200 OK'. Only fails if 'etcd' is configured and unreachable. This behavior is specific to gateway. When liveness probe fails, Kubernetes like platforms restart the container.
``` ```
livenessProbe: livenessProbe:
@ -13,6 +13,23 @@ livenessProbe:
port: 9000 port: 9000
scheme: HTTP scheme: HTTP
initialDelaySeconds: 120 initialDelaySeconds: 120
periodSeconds: 30
timeoutSeconds: 10
successThreshold: 1
failureThreshold: 3
```
### Readiness probe
This probe always responds with '200 OK'. Only fails if 'etcd' is configured and unreachable. This behavior is specific to gateway. When readiness probe fails, Kubernetes like platforms turn-off routing to the container.
```
readinessProbe:
httpGet:
path: /minio/health/ready
port: 9000
scheme: HTTP
initialDelaySeconds: 120
periodSeconds: 15 periodSeconds: 15
timeoutSeconds: 10 timeoutSeconds: 10
successThreshold: 1 successThreshold: 1