mirror of
https://github.com/minio/minio.git
synced 2025-01-26 14:13:16 -05:00
Fix Readiness check (#8681)
- Remove goroutine-check in Readiness check - Bring in quorum check for readiness Fixes #8385 Co-authored-by: Harshavardhana <harsha@minio.io>
This commit is contained in:
parent
b2044dd22f
commit
5d09233115
@ -1356,3 +1356,9 @@ func (fs *FSObjects) IsEncryptionSupported() bool {
|
||||
func (fs *FSObjects) IsCompressionSupported() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// IsReady - Check if the backend disk is ready to accept traffic.
|
||||
func (fs *FSObjects) IsReady(_ context.Context) bool {
|
||||
_, err := os.Stat(fs.fsPath)
|
||||
return err == nil
|
||||
}
|
||||
|
@ -192,3 +192,8 @@ func (a GatewayUnsupported) IsEncryptionSupported() bool {
|
||||
func (a GatewayUnsupported) IsCompressionSupported() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// IsReady - No Op.
|
||||
func (a GatewayUnsupported) IsReady(_ context.Context) bool {
|
||||
return false
|
||||
}
|
||||
|
@ -1296,3 +1296,8 @@ func (a *azureObjects) DeleteBucketPolicy(ctx context.Context, bucket string) er
|
||||
func (a *azureObjects) IsCompressionSupported() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// IsReady returns whether the layer is ready to take requests.
|
||||
func (a *azureObjects) IsReady(ctx context.Context) bool {
|
||||
return minio.IsBackendOnline(ctx, a.httpClient, a.endpoint)
|
||||
}
|
||||
|
@ -862,3 +862,8 @@ func (l *b2Objects) DeleteBucketPolicy(ctx context.Context, bucket string) error
|
||||
func (l *b2Objects) IsCompressionSupported() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// IsReady returns whether the layer is ready to take requests.
|
||||
func (l *b2Objects) IsReady(ctx context.Context) bool {
|
||||
return minio.IsBackendOnline(ctx, l.httpClient, "https://api.backblazeb2.com/b2api/v1")
|
||||
}
|
||||
|
@ -1467,3 +1467,8 @@ func (l *gcsGateway) DeleteBucketPolicy(ctx context.Context, bucket string) erro
|
||||
func (l *gcsGateway) IsCompressionSupported() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// IsReady returns whether the layer is ready to take requests.
|
||||
func (l *gcsGateway) IsReady(ctx context.Context) bool {
|
||||
return minio.IsBackendOnline(ctx, l.httpClient, "https://storage.googleapis.com")
|
||||
}
|
||||
|
@ -715,3 +715,8 @@ func (n *hdfsObjects) AbortMultipartUpload(ctx context.Context, bucket, object,
|
||||
}
|
||||
return hdfsToObjectErr(ctx, n.clnt.Remove(minio.PathJoin(hdfsSeparator, minioMetaTmpBucket, uploadID)), bucket, object, uploadID)
|
||||
}
|
||||
|
||||
// IsReady returns whether the layer is ready to take requests.
|
||||
func (n *hdfsObjects) IsReady(_ context.Context) bool {
|
||||
return true
|
||||
}
|
||||
|
@ -117,3 +117,9 @@ func (n *nasObjects) StorageInfo(ctx context.Context) minio.StorageInfo {
|
||||
type nasObjects struct {
|
||||
minio.ObjectLayer
|
||||
}
|
||||
|
||||
// IsReady returns whether the layer is ready to take requests.
|
||||
func (n *nasObjects) IsReady(ctx context.Context) bool {
|
||||
sinfo := n.ObjectLayer.StorageInfo(ctx)
|
||||
return sinfo.Backend.Type == minio.BackendFS
|
||||
}
|
||||
|
@ -1098,3 +1098,8 @@ func (l *ossObjects) DeleteBucketPolicy(ctx context.Context, bucket string) erro
|
||||
func (l *ossObjects) IsCompressionSupported() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// IsReady returns whether the layer is ready to take requests.
|
||||
func (l *ossObjects) IsReady(ctx context.Context) bool {
|
||||
return minio.IsBackendOnline(ctx, l.Client.HTTPClient, l.Client.Config.Endpoint)
|
||||
}
|
||||
|
@ -670,3 +670,8 @@ func (l *s3Objects) IsCompressionSupported() bool {
|
||||
func (l *s3Objects) IsEncryptionSupported() bool {
|
||||
return minio.GlobalKMS != nil || len(minio.GlobalGatewaySSE) > 0
|
||||
}
|
||||
|
||||
// IsReady returns whether the layer is ready to take requests.
|
||||
func (l *s3Objects) IsReady(ctx context.Context) bool {
|
||||
return minio.IsBackendOnline(ctx, l.HTTPClient, l.Client.EndpointURL().String())
|
||||
}
|
||||
|
@ -17,31 +17,26 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
xhttp "github.com/minio/minio/cmd/http"
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
minioHealthGoroutineThreshold = 10000
|
||||
)
|
||||
|
||||
// ReadinessCheckHandler -- checks if there are more than threshold
|
||||
// number of goroutines running, returns service unavailable.
|
||||
//
|
||||
// Readiness probes are used to detect situations where application
|
||||
// is under heavy load and temporarily unable to serve. In a orchestrated
|
||||
// setup like Kubernetes, containers reporting that they are not ready do
|
||||
// not receive traffic through Kubernetes Services.
|
||||
// ReadinessCheckHandler -- Checks if the quorum number of disks are available.
|
||||
// For FS - Checks if the backend disk is available
|
||||
// For Zones - Checks if all the zones have enough quorum
|
||||
func ReadinessCheckHandler(w http.ResponseWriter, r *http.Request) {
|
||||
if err := goroutineCountCheck(minioHealthGoroutineThreshold); err != nil {
|
||||
ctx := newContext(r, w, "ReadinessCheckHandler")
|
||||
|
||||
objLayer := newObjectLayerFn()
|
||||
// Service not initialized yet
|
||||
if objLayer == nil || !objLayer.IsReady(ctx) {
|
||||
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
|
||||
return
|
||||
}
|
||||
|
||||
writeResponse(w, http.StatusOK, nil, mimeNone)
|
||||
}
|
||||
|
||||
@ -102,13 +97,3 @@ func LivenessCheckHandler(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
writeResponse(w, http.StatusOK, nil, mimeNone)
|
||||
}
|
||||
|
||||
// checks threshold against total number of go-routines in the system and
|
||||
// throws error if more than threshold go-routines are running.
|
||||
func goroutineCountCheck(threshold int) error {
|
||||
count := runtime.NumGoroutine()
|
||||
if count > threshold {
|
||||
return fmt.Errorf("too many goroutines (%d > %d)", count, threshold)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -1,44 +0,0 @@
|
||||
/*
|
||||
* MinIO Cloud Storage, (C) 2018 MinIO, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestGoroutineCountCheck(t *testing.T) {
|
||||
tests := []struct {
|
||||
threshold int
|
||||
wantErr bool
|
||||
}{
|
||||
{5000, false},
|
||||
{5, true},
|
||||
{6, true},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
// Make goroutines -- to make sure number of go-routines is higher than threshold
|
||||
if tt.threshold == 5 || tt.threshold == 6 {
|
||||
for i := 0; i < 6; i++ {
|
||||
go time.Sleep(5 * time.Nanosecond)
|
||||
}
|
||||
}
|
||||
if err := goroutineCountCheck(tt.threshold); (err != nil) != tt.wantErr {
|
||||
t.Errorf("goroutineCountCheck() error = %v, wantErr %v", err, tt.wantErr)
|
||||
}
|
||||
}
|
||||
}
|
@ -122,4 +122,7 @@ type ObjectLayer interface {
|
||||
|
||||
// Backend related metrics
|
||||
GetMetrics(ctx context.Context) (*Metrics, error)
|
||||
|
||||
// Check Readiness
|
||||
IsReady(ctx context.Context) bool
|
||||
}
|
||||
|
@ -1656,3 +1656,30 @@ func (s *xlSets) GetMetrics(ctx context.Context) (*Metrics, error) {
|
||||
logger.LogIf(ctx, NotImplemented{})
|
||||
return &Metrics{}, NotImplemented{}
|
||||
}
|
||||
|
||||
// IsReady - Returns true if more than n/2 disks (quorum) are online
|
||||
func (s *xlSets) IsReady(_ context.Context) bool {
|
||||
s.xlDisksMu.RLock()
|
||||
defer s.xlDisksMu.RUnlock()
|
||||
|
||||
var activeDisks int
|
||||
for i := 0; i < s.setCount; i++ {
|
||||
for j := 0; j < s.drivesPerSet; j++ {
|
||||
if s.xlDisks[i][j] == nil {
|
||||
continue
|
||||
}
|
||||
if !s.xlLockers[i][j].IsOnline() {
|
||||
continue
|
||||
}
|
||||
if s.xlDisks[i][j].IsOnline() {
|
||||
activeDisks++
|
||||
}
|
||||
// Return if more than n/2 disks are online.
|
||||
if activeDisks > len(s.endpoints)/2 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
// Disks are not ready
|
||||
return false
|
||||
}
|
||||
|
@ -245,3 +245,9 @@ func (xl xlObjects) crawlAndGetDataUsage(ctx context.Context, endCh <-chan struc
|
||||
|
||||
return dataUsageInfo
|
||||
}
|
||||
|
||||
// IsReady - No Op.
|
||||
func (xl xlObjects) IsReady(ctx context.Context) bool {
|
||||
logger.CriticalIf(ctx, NotImplemented{})
|
||||
return true
|
||||
}
|
||||
|
@ -1359,3 +1359,16 @@ func (z *xlZones) GetMetrics(ctx context.Context) (*Metrics, error) {
|
||||
logger.LogIf(ctx, NotImplemented{})
|
||||
return &Metrics{}, NotImplemented{}
|
||||
}
|
||||
|
||||
// IsReady - Returns True if all the zones have enough quorum to accept requests.
|
||||
func (z *xlZones) IsReady(ctx context.Context) bool {
|
||||
if z.SingleZone() {
|
||||
return z.zones[0].IsReady(ctx)
|
||||
}
|
||||
for _, xlsets := range z.zones {
|
||||
if !xlsets.IsReady(ctx) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user