mirror of
https://github.com/minio/minio.git
synced 2025-01-24 13:13:16 -05:00
Add healthcheck endpoints (#5543)
This PR adds readiness and liveness endpoints to probe Minio server instance health. Endpoints can only be accessed without authentication and the paths are /minio/health/live and /minio/health/ready for liveness and readiness respectively. The new healthcheck liveness endpoint is used for Docker healthcheck now. Fixes #5357 Fixes #5514
This commit is contained in:
parent
d90985b6d8
commit
10b01ac836
@ -193,6 +193,18 @@ func guessIsBrowserReq(req *http.Request) bool {
|
||||
return strings.Contains(req.Header.Get("User-Agent"), "Mozilla")
|
||||
}
|
||||
|
||||
// guessIsHealthCheckReq - returns true if incoming request looks
|
||||
// like healthcheck request
|
||||
func guessIsHealthCheckReq(req *http.Request) bool {
|
||||
if req == nil {
|
||||
return false
|
||||
}
|
||||
aType := getRequestAuthType(req)
|
||||
return req.Method == http.MethodGet && aType == authTypeAnonymous &&
|
||||
(req.URL.Path == healthCheckPathPrefix+healthCheckLivenessPath ||
|
||||
req.URL.Path == healthCheckPathPrefix+healthCheckReadinessPath)
|
||||
}
|
||||
|
||||
// guessIsRPCReq - returns true if the request is for an RPC endpoint.
|
||||
func guessIsRPCReq(req *http.Request) bool {
|
||||
if req == nil {
|
||||
@ -263,7 +275,7 @@ func setReservedBucketHandler(h http.Handler) http.Handler {
|
||||
|
||||
func (h minioReservedBucketHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case guessIsRPCReq(r), guessIsBrowserReq(r), isAdminReq(r):
|
||||
case guessIsRPCReq(r), guessIsBrowserReq(r), guessIsHealthCheckReq(r), isAdminReq(r):
|
||||
// Allow access to reserved buckets
|
||||
default:
|
||||
// For all other requests reject access to reserved
|
||||
|
69
cmd/healthcheck-handler.go
Normal file
69
cmd/healthcheck-handler.go
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"runtime"
|
||||
)
|
||||
|
||||
const (
|
||||
minioHealthGoroutineThreshold = 1000
|
||||
)
|
||||
|
||||
// ReadinessCheckHandler -- checks if there are more than threshold number of goroutines running,
|
||||
// returns service unavailable.
|
||||
// Readiness probes are used to detect situations where application is under heavy load
|
||||
// and temporarily unable to serve. In a orchestrated setup like Kubernetes, containers reporting
|
||||
// that they are not ready do not receive traffic through Kubernetes Services.
|
||||
func ReadinessCheckHandler(w http.ResponseWriter, r *http.Request) {
|
||||
if err := goroutineCountCheck(minioHealthGoroutineThreshold); err != nil {
|
||||
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
|
||||
return
|
||||
}
|
||||
writeResponse(w, http.StatusOK, nil, mimeNone)
|
||||
}
|
||||
|
||||
// LivenessCheckHandler -- checks if server can ListBuckets internally. If not, server is
|
||||
// considered to have failed and needs to be restarted.
|
||||
// Liveness probes are used to detect situations where application (minio)
|
||||
// has gone into a state where it can not recover except by being restarted.
|
||||
func LivenessCheckHandler(w http.ResponseWriter, r *http.Request) {
|
||||
objLayer := newObjectLayerFn()
|
||||
// Service not initialized yet
|
||||
if objLayer == nil {
|
||||
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
|
||||
return
|
||||
}
|
||||
// List buckets is unsuccessful, means server is having issues, send 503 service unavailable
|
||||
if _, err := objLayer.ListBuckets(); err != nil {
|
||||
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
|
||||
return
|
||||
}
|
||||
writeResponse(w, http.StatusOK, nil, mimeNone)
|
||||
}
|
||||
|
||||
// checks threshold against total number of go-routines in the system and throws error if
|
||||
// more than threshold go-routines are running.
|
||||
func goroutineCountCheck(threshold int) error {
|
||||
count := runtime.NumGoroutine()
|
||||
if count > threshold {
|
||||
return fmt.Errorf("too many goroutines (%d > %d)", count, threshold)
|
||||
}
|
||||
return nil
|
||||
}
|
44
cmd/healthcheck-handler_test.go
Normal file
44
cmd/healthcheck-handler_test.go
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestGoroutineCountCheck(t *testing.T) {
|
||||
tests := []struct {
|
||||
threshold int
|
||||
wantErr bool
|
||||
}{
|
||||
{5000, false},
|
||||
{5, true},
|
||||
{6, true},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
// Make goroutines -- to make sure number of go-routines is higher than threshold
|
||||
if tt.threshold == 5 || tt.threshold == 6 {
|
||||
for i := 0; i < 6; i++ {
|
||||
go time.Sleep(5)
|
||||
}
|
||||
}
|
||||
if err := goroutineCountCheck(tt.threshold); (err != nil) != tt.wantErr {
|
||||
t.Errorf("goroutineCountCheck() error = %v, wantErr %v", err, tt.wantErr)
|
||||
}
|
||||
}
|
||||
}
|
43
cmd/healthcheck-router.go
Normal file
43
cmd/healthcheck-router.go
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
router "github.com/gorilla/mux"
|
||||
)
|
||||
|
||||
const (
|
||||
healthCheckPath = "/health"
|
||||
healthCheckLivenessPath = "/live"
|
||||
healthCheckReadinessPath = "/ready"
|
||||
healthCheckPathPrefix = minioReservedBucketPath + healthCheckPath
|
||||
)
|
||||
|
||||
// registerHealthCheckRouter - add handler functions for liveness and readiness routes.
|
||||
func registerHealthCheckRouter(mux *router.Router) {
|
||||
|
||||
// Healthcheck router
|
||||
healthRouter := mux.NewRoute().PathPrefix(healthCheckPathPrefix).Subrouter()
|
||||
|
||||
// Liveness handler
|
||||
healthRouter.Methods(http.MethodGet).Path(healthCheckLivenessPath).HandlerFunc(LivenessCheckHandler)
|
||||
|
||||
// Readiness handler
|
||||
healthRouter.Methods(http.MethodGet).Path(healthCheckReadinessPath).HandlerFunc(ReadinessCheckHandler)
|
||||
}
|
@ -73,6 +73,9 @@ func configureServerHandler(endpoints EndpointList) (http.Handler, error) {
|
||||
// Add Admin router.
|
||||
registerAdminRouter(mux)
|
||||
|
||||
// Add healthcheck router
|
||||
registerHealthCheckRouter(mux)
|
||||
|
||||
// Register web router when its enabled.
|
||||
if globalIsBrowserEnabled {
|
||||
if err := registerWebRouter(mux); err != nil {
|
||||
|
@ -20,7 +20,7 @@ set -x
|
||||
_init () {
|
||||
scheme="http://"
|
||||
address="$(netstat -nplt 2>/dev/null | awk ' /(.*\/minio)/ { gsub(":::","127.0.0.1:",$4); print $4}')"
|
||||
resource="/minio/index.html"
|
||||
resource="/minio/health/live"
|
||||
start=$(stat -c "%Y" /proc/1)
|
||||
}
|
||||
|
||||
@ -34,11 +34,10 @@ healthcheck_main () {
|
||||
exit 0
|
||||
else
|
||||
# Get the http response code
|
||||
http_response=$(curl -H "User-Agent: Mozilla" -s -k -o /dev/null -I -w "%{http_code}" \
|
||||
${scheme}${address}${resource})
|
||||
http_response=$(curl -s -k -o /dev/null -I -w "%{http_code}" ${scheme}${address}${resource})
|
||||
|
||||
# Get the http response body
|
||||
http_response_body=$(curl -H "User-Agent: Mozilla" -k -s ${scheme}${address}${resource})
|
||||
http_response_body=$(curl -k -s ${scheme}${address}${resource})
|
||||
|
||||
# server returns response 403 and body "SSL required" if non-TLS
|
||||
# connection is attempted on a TLS-configured server. Change
|
||||
@ -46,14 +45,11 @@ healthcheck_main () {
|
||||
if [ "$http_response" = "403" ] && \
|
||||
[ "$http_response_body" = "SSL required" ]; then
|
||||
scheme="https://"
|
||||
http_response=$(curl -H "User-Agent: Mozilla" -s -k -o /dev/null -I -w "%{http_code}" \
|
||||
${scheme}${address}${resource})
|
||||
http_response=$(curl -s -k -o /dev/null -I -w "%{http_code}" ${scheme}${address}${resource})
|
||||
fi
|
||||
|
||||
# If http_repsonse is 200 - server is up. When MINIO_BROWSER is
|
||||
# set to off, curl responds with 404. We assume that the server
|
||||
# is up
|
||||
[ "$http_response" = "200" ] || [ "$http_response" = "404" ]
|
||||
# If http_repsonse is 200 - server is up.
|
||||
[ "$http_response" = "200" ]
|
||||
fi
|
||||
}
|
||||
|
||||
|
39
docs/healthcheck/README.md
Normal file
39
docs/healthcheck/README.md
Normal file
@ -0,0 +1,39 @@
|
||||
## Minio Healthcheck
|
||||
|
||||
Minio server exposes two un-authenticated, healthcheck endpoints - liveness probe and readiness probe at `/minio/health/live` and `/minio/health/ready` respectively.
|
||||
|
||||
### Liveness probe
|
||||
This probe is used to identify situations where the server is running but may not behave optimally, i.e. sluggish response or corrupt backend. Such problems can be *only* fixed by a restart.
|
||||
|
||||
Internally, Minio liveness probe handler does a ListBuckets call. If successful, the server returns 200 OK, otherwise 503 Service Unavailable.
|
||||
|
||||
When liveness probe fails, Kubernetes like platforms restart the container.
|
||||
|
||||
Sample configuration in a Kubernetes `yaml` file.
|
||||
|
||||
```yaml
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /minio/health/live
|
||||
port: 9000
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 20
|
||||
```
|
||||
|
||||
### Readiness probe
|
||||
This probe is used to identify situations where the server is not ready to accept requests yet. In most cases, such conditions recover in some time.
|
||||
|
||||
Internally, Minio readiness probe handler checks for total go-routines. If the number of go-routines is less than 1000 (threshold), the server returns 200 OK, otherwise 503 Service Unavailable.
|
||||
|
||||
Platforms like Kubernetes *do not* forward traffic to a pod until its readiness probe is successful.
|
||||
|
||||
Sample configuration in a Kubernetes `yaml` file.
|
||||
|
||||
```yaml
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /minio/health/ready
|
||||
port: 9000
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 20
|
||||
```
|
Loading…
x
Reference in New Issue
Block a user