mirror of
https://github.com/minio/minio.git
synced 2024-12-24 22:25:54 -05:00
Add healthcheck endpoints (#5543)
This PR adds readiness and liveness endpoints to probe Minio server instance health. Endpoints can only be accessed without authentication and the paths are /minio/health/live and /minio/health/ready for liveness and readiness respectively. The new healthcheck liveness endpoint is used for Docker healthcheck now. Fixes #5357 Fixes #5514
This commit is contained in:
parent
d90985b6d8
commit
10b01ac836
@ -193,6 +193,18 @@ func guessIsBrowserReq(req *http.Request) bool {
|
|||||||
return strings.Contains(req.Header.Get("User-Agent"), "Mozilla")
|
return strings.Contains(req.Header.Get("User-Agent"), "Mozilla")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// guessIsHealthCheckReq - returns true if incoming request looks
|
||||||
|
// like healthcheck request
|
||||||
|
func guessIsHealthCheckReq(req *http.Request) bool {
|
||||||
|
if req == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
aType := getRequestAuthType(req)
|
||||||
|
return req.Method == http.MethodGet && aType == authTypeAnonymous &&
|
||||||
|
(req.URL.Path == healthCheckPathPrefix+healthCheckLivenessPath ||
|
||||||
|
req.URL.Path == healthCheckPathPrefix+healthCheckReadinessPath)
|
||||||
|
}
|
||||||
|
|
||||||
// guessIsRPCReq - returns true if the request is for an RPC endpoint.
|
// guessIsRPCReq - returns true if the request is for an RPC endpoint.
|
||||||
func guessIsRPCReq(req *http.Request) bool {
|
func guessIsRPCReq(req *http.Request) bool {
|
||||||
if req == nil {
|
if req == nil {
|
||||||
@ -263,7 +275,7 @@ func setReservedBucketHandler(h http.Handler) http.Handler {
|
|||||||
|
|
||||||
func (h minioReservedBucketHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
func (h minioReservedBucketHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||||
switch {
|
switch {
|
||||||
case guessIsRPCReq(r), guessIsBrowserReq(r), isAdminReq(r):
|
case guessIsRPCReq(r), guessIsBrowserReq(r), guessIsHealthCheckReq(r), isAdminReq(r):
|
||||||
// Allow access to reserved buckets
|
// Allow access to reserved buckets
|
||||||
default:
|
default:
|
||||||
// For all other requests reject access to reserved
|
// For all other requests reject access to reserved
|
||||||
|
69
cmd/healthcheck-handler.go
Normal file
69
cmd/healthcheck-handler.go
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
/*
|
||||||
|
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"runtime"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
minioHealthGoroutineThreshold = 1000
|
||||||
|
)
|
||||||
|
|
||||||
|
// ReadinessCheckHandler -- checks if there are more than threshold number of goroutines running,
|
||||||
|
// returns service unavailable.
|
||||||
|
// Readiness probes are used to detect situations where application is under heavy load
|
||||||
|
// and temporarily unable to serve. In a orchestrated setup like Kubernetes, containers reporting
|
||||||
|
// that they are not ready do not receive traffic through Kubernetes Services.
|
||||||
|
func ReadinessCheckHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if err := goroutineCountCheck(minioHealthGoroutineThreshold); err != nil {
|
||||||
|
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeResponse(w, http.StatusOK, nil, mimeNone)
|
||||||
|
}
|
||||||
|
|
||||||
|
// LivenessCheckHandler -- checks if server can ListBuckets internally. If not, server is
|
||||||
|
// considered to have failed and needs to be restarted.
|
||||||
|
// Liveness probes are used to detect situations where application (minio)
|
||||||
|
// has gone into a state where it can not recover except by being restarted.
|
||||||
|
func LivenessCheckHandler(w http.ResponseWriter, r *http.Request) {
|
||||||
|
objLayer := newObjectLayerFn()
|
||||||
|
// Service not initialized yet
|
||||||
|
if objLayer == nil {
|
||||||
|
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// List buckets is unsuccessful, means server is having issues, send 503 service unavailable
|
||||||
|
if _, err := objLayer.ListBuckets(); err != nil {
|
||||||
|
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeResponse(w, http.StatusOK, nil, mimeNone)
|
||||||
|
}
|
||||||
|
|
||||||
|
// checks threshold against total number of go-routines in the system and throws error if
|
||||||
|
// more than threshold go-routines are running.
|
||||||
|
func goroutineCountCheck(threshold int) error {
|
||||||
|
count := runtime.NumGoroutine()
|
||||||
|
if count > threshold {
|
||||||
|
return fmt.Errorf("too many goroutines (%d > %d)", count, threshold)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
44
cmd/healthcheck-handler_test.go
Normal file
44
cmd/healthcheck-handler_test.go
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGoroutineCountCheck(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
threshold int
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{5000, false},
|
||||||
|
{5, true},
|
||||||
|
{6, true},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
// Make goroutines -- to make sure number of go-routines is higher than threshold
|
||||||
|
if tt.threshold == 5 || tt.threshold == 6 {
|
||||||
|
for i := 0; i < 6; i++ {
|
||||||
|
go time.Sleep(5)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := goroutineCountCheck(tt.threshold); (err != nil) != tt.wantErr {
|
||||||
|
t.Errorf("goroutineCountCheck() error = %v, wantErr %v", err, tt.wantErr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
43
cmd/healthcheck-router.go
Normal file
43
cmd/healthcheck-router.go
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
/*
|
||||||
|
* Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
router "github.com/gorilla/mux"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
healthCheckPath = "/health"
|
||||||
|
healthCheckLivenessPath = "/live"
|
||||||
|
healthCheckReadinessPath = "/ready"
|
||||||
|
healthCheckPathPrefix = minioReservedBucketPath + healthCheckPath
|
||||||
|
)
|
||||||
|
|
||||||
|
// registerHealthCheckRouter - add handler functions for liveness and readiness routes.
|
||||||
|
func registerHealthCheckRouter(mux *router.Router) {
|
||||||
|
|
||||||
|
// Healthcheck router
|
||||||
|
healthRouter := mux.NewRoute().PathPrefix(healthCheckPathPrefix).Subrouter()
|
||||||
|
|
||||||
|
// Liveness handler
|
||||||
|
healthRouter.Methods(http.MethodGet).Path(healthCheckLivenessPath).HandlerFunc(LivenessCheckHandler)
|
||||||
|
|
||||||
|
// Readiness handler
|
||||||
|
healthRouter.Methods(http.MethodGet).Path(healthCheckReadinessPath).HandlerFunc(ReadinessCheckHandler)
|
||||||
|
}
|
@ -73,6 +73,9 @@ func configureServerHandler(endpoints EndpointList) (http.Handler, error) {
|
|||||||
// Add Admin router.
|
// Add Admin router.
|
||||||
registerAdminRouter(mux)
|
registerAdminRouter(mux)
|
||||||
|
|
||||||
|
// Add healthcheck router
|
||||||
|
registerHealthCheckRouter(mux)
|
||||||
|
|
||||||
// Register web router when its enabled.
|
// Register web router when its enabled.
|
||||||
if globalIsBrowserEnabled {
|
if globalIsBrowserEnabled {
|
||||||
if err := registerWebRouter(mux); err != nil {
|
if err := registerWebRouter(mux); err != nil {
|
||||||
|
@ -20,7 +20,7 @@ set -x
|
|||||||
_init () {
|
_init () {
|
||||||
scheme="http://"
|
scheme="http://"
|
||||||
address="$(netstat -nplt 2>/dev/null | awk ' /(.*\/minio)/ { gsub(":::","127.0.0.1:",$4); print $4}')"
|
address="$(netstat -nplt 2>/dev/null | awk ' /(.*\/minio)/ { gsub(":::","127.0.0.1:",$4); print $4}')"
|
||||||
resource="/minio/index.html"
|
resource="/minio/health/live"
|
||||||
start=$(stat -c "%Y" /proc/1)
|
start=$(stat -c "%Y" /proc/1)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -34,11 +34,10 @@ healthcheck_main () {
|
|||||||
exit 0
|
exit 0
|
||||||
else
|
else
|
||||||
# Get the http response code
|
# Get the http response code
|
||||||
http_response=$(curl -H "User-Agent: Mozilla" -s -k -o /dev/null -I -w "%{http_code}" \
|
http_response=$(curl -s -k -o /dev/null -I -w "%{http_code}" ${scheme}${address}${resource})
|
||||||
${scheme}${address}${resource})
|
|
||||||
|
|
||||||
# Get the http response body
|
# Get the http response body
|
||||||
http_response_body=$(curl -H "User-Agent: Mozilla" -k -s ${scheme}${address}${resource})
|
http_response_body=$(curl -k -s ${scheme}${address}${resource})
|
||||||
|
|
||||||
# server returns response 403 and body "SSL required" if non-TLS
|
# server returns response 403 and body "SSL required" if non-TLS
|
||||||
# connection is attempted on a TLS-configured server. Change
|
# connection is attempted on a TLS-configured server. Change
|
||||||
@ -46,14 +45,11 @@ healthcheck_main () {
|
|||||||
if [ "$http_response" = "403" ] && \
|
if [ "$http_response" = "403" ] && \
|
||||||
[ "$http_response_body" = "SSL required" ]; then
|
[ "$http_response_body" = "SSL required" ]; then
|
||||||
scheme="https://"
|
scheme="https://"
|
||||||
http_response=$(curl -H "User-Agent: Mozilla" -s -k -o /dev/null -I -w "%{http_code}" \
|
http_response=$(curl -s -k -o /dev/null -I -w "%{http_code}" ${scheme}${address}${resource})
|
||||||
${scheme}${address}${resource})
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# If http_repsonse is 200 - server is up. When MINIO_BROWSER is
|
# If http_repsonse is 200 - server is up.
|
||||||
# set to off, curl responds with 404. We assume that the server
|
[ "$http_response" = "200" ]
|
||||||
# is up
|
|
||||||
[ "$http_response" = "200" ] || [ "$http_response" = "404" ]
|
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
39
docs/healthcheck/README.md
Normal file
39
docs/healthcheck/README.md
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
## Minio Healthcheck
|
||||||
|
|
||||||
|
Minio server exposes two un-authenticated, healthcheck endpoints - liveness probe and readiness probe at `/minio/health/live` and `/minio/health/ready` respectively.
|
||||||
|
|
||||||
|
### Liveness probe
|
||||||
|
This probe is used to identify situations where the server is running but may not behave optimally, i.e. sluggish response or corrupt backend. Such problems can be *only* fixed by a restart.
|
||||||
|
|
||||||
|
Internally, Minio liveness probe handler does a ListBuckets call. If successful, the server returns 200 OK, otherwise 503 Service Unavailable.
|
||||||
|
|
||||||
|
When liveness probe fails, Kubernetes like platforms restart the container.
|
||||||
|
|
||||||
|
Sample configuration in a Kubernetes `yaml` file.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /minio/health/live
|
||||||
|
port: 9000
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 20
|
||||||
|
```
|
||||||
|
|
||||||
|
### Readiness probe
|
||||||
|
This probe is used to identify situations where the server is not ready to accept requests yet. In most cases, such conditions recover in some time.
|
||||||
|
|
||||||
|
Internally, Minio readiness probe handler checks for total go-routines. If the number of go-routines is less than 1000 (threshold), the server returns 200 OK, otherwise 503 Service Unavailable.
|
||||||
|
|
||||||
|
Platforms like Kubernetes *do not* forward traffic to a pod until its readiness probe is successful.
|
||||||
|
|
||||||
|
Sample configuration in a Kubernetes `yaml` file.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /minio/health/ready
|
||||||
|
port: 9000
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 20
|
||||||
|
```
|
Loading…
Reference in New Issue
Block a user