Fix Readiness check (#8681)

- Remove goroutine-check in Readiness check
- Bring in quorum check for readiness

Fixes #8385

Co-authored-by: Harshavardhana <harsha@minio.io>
This commit is contained in:
Praveen raj Mani 2019-12-28 22:24:43 +05:30 committed by Nitish Tiwari
parent b2044dd22f
commit 5d09233115
15 changed files with 105 additions and 68 deletions

View File

@ -1356,3 +1356,9 @@ func (fs *FSObjects) IsEncryptionSupported() bool {
func (fs *FSObjects) IsCompressionSupported() bool {
return true
}
// IsReady - Check if the backend disk is ready to accept traffic.
func (fs *FSObjects) IsReady(_ context.Context) bool {
_, err := os.Stat(fs.fsPath)
return err == nil
}

View File

@ -192,3 +192,8 @@ func (a GatewayUnsupported) IsEncryptionSupported() bool {
func (a GatewayUnsupported) IsCompressionSupported() bool {
return false
}
// IsReady - No Op.
func (a GatewayUnsupported) IsReady(_ context.Context) bool {
return false
}

View File

@ -1296,3 +1296,8 @@ func (a *azureObjects) DeleteBucketPolicy(ctx context.Context, bucket string) er
func (a *azureObjects) IsCompressionSupported() bool {
return false
}
// IsReady returns whether the layer is ready to take requests.
func (a *azureObjects) IsReady(ctx context.Context) bool {
return minio.IsBackendOnline(ctx, a.httpClient, a.endpoint)
}

View File

@ -862,3 +862,8 @@ func (l *b2Objects) DeleteBucketPolicy(ctx context.Context, bucket string) error
func (l *b2Objects) IsCompressionSupported() bool {
return false
}
// IsReady returns whether the layer is ready to take requests.
func (l *b2Objects) IsReady(ctx context.Context) bool {
return minio.IsBackendOnline(ctx, l.httpClient, "https://api.backblazeb2.com/b2api/v1")
}

View File

@ -1467,3 +1467,8 @@ func (l *gcsGateway) DeleteBucketPolicy(ctx context.Context, bucket string) erro
func (l *gcsGateway) IsCompressionSupported() bool {
return false
}
// IsReady returns whether the layer is ready to take requests.
func (l *gcsGateway) IsReady(ctx context.Context) bool {
return minio.IsBackendOnline(ctx, l.httpClient, "https://storage.googleapis.com")
}

View File

@ -715,3 +715,8 @@ func (n *hdfsObjects) AbortMultipartUpload(ctx context.Context, bucket, object,
}
return hdfsToObjectErr(ctx, n.clnt.Remove(minio.PathJoin(hdfsSeparator, minioMetaTmpBucket, uploadID)), bucket, object, uploadID)
}
// IsReady returns whether the layer is ready to take requests.
func (n *hdfsObjects) IsReady(_ context.Context) bool {
return true
}

View File

@ -117,3 +117,9 @@ func (n *nasObjects) StorageInfo(ctx context.Context) minio.StorageInfo {
type nasObjects struct {
minio.ObjectLayer
}
// IsReady returns whether the layer is ready to take requests.
func (n *nasObjects) IsReady(ctx context.Context) bool {
sinfo := n.ObjectLayer.StorageInfo(ctx)
return sinfo.Backend.Type == minio.BackendFS
}

View File

@ -1098,3 +1098,8 @@ func (l *ossObjects) DeleteBucketPolicy(ctx context.Context, bucket string) erro
func (l *ossObjects) IsCompressionSupported() bool {
return false
}
// IsReady returns whether the layer is ready to take requests.
func (l *ossObjects) IsReady(ctx context.Context) bool {
return minio.IsBackendOnline(ctx, l.Client.HTTPClient, l.Client.Config.Endpoint)
}

View File

@ -670,3 +670,8 @@ func (l *s3Objects) IsCompressionSupported() bool {
func (l *s3Objects) IsEncryptionSupported() bool {
return minio.GlobalKMS != nil || len(minio.GlobalGatewaySSE) > 0
}
// IsReady returns whether the layer is ready to take requests.
func (l *s3Objects) IsReady(ctx context.Context) bool {
return minio.IsBackendOnline(ctx, l.HTTPClient, l.Client.EndpointURL().String())
}

View File

@ -17,31 +17,26 @@
package cmd
import (
"fmt"
"net/http"
"os"
"runtime"
xhttp "github.com/minio/minio/cmd/http"
"github.com/minio/minio/cmd/logger"
)
const (
minioHealthGoroutineThreshold = 10000
)
// ReadinessCheckHandler -- checks if there are more than threshold
// number of goroutines running, returns service unavailable.
//
// Readiness probes are used to detect situations where application
// is under heavy load and temporarily unable to serve. In a orchestrated
// setup like Kubernetes, containers reporting that they are not ready do
// not receive traffic through Kubernetes Services.
// ReadinessCheckHandler -- Checks if the quorum number of disks are available.
// For FS - Checks if the backend disk is available
// For Zones - Checks if all the zones have enough quorum
func ReadinessCheckHandler(w http.ResponseWriter, r *http.Request) {
if err := goroutineCountCheck(minioHealthGoroutineThreshold); err != nil {
ctx := newContext(r, w, "ReadinessCheckHandler")
objLayer := newObjectLayerFn()
// Service not initialized yet
if objLayer == nil || !objLayer.IsReady(ctx) {
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
return
}
writeResponse(w, http.StatusOK, nil, mimeNone)
}
@ -102,13 +97,3 @@ func LivenessCheckHandler(w http.ResponseWriter, r *http.Request) {
}
writeResponse(w, http.StatusOK, nil, mimeNone)
}
// checks threshold against total number of go-routines in the system and
// throws error if more than threshold go-routines are running.
func goroutineCountCheck(threshold int) error {
count := runtime.NumGoroutine()
if count > threshold {
return fmt.Errorf("too many goroutines (%d > %d)", count, threshold)
}
return nil
}

View File

@ -1,44 +0,0 @@
/*
* MinIO Cloud Storage, (C) 2018 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"testing"
"time"
)
func TestGoroutineCountCheck(t *testing.T) {
tests := []struct {
threshold int
wantErr bool
}{
{5000, false},
{5, true},
{6, true},
}
for _, tt := range tests {
// Make goroutines -- to make sure number of go-routines is higher than threshold
if tt.threshold == 5 || tt.threshold == 6 {
for i := 0; i < 6; i++ {
go time.Sleep(5 * time.Nanosecond)
}
}
if err := goroutineCountCheck(tt.threshold); (err != nil) != tt.wantErr {
t.Errorf("goroutineCountCheck() error = %v, wantErr %v", err, tt.wantErr)
}
}
}

View File

@ -122,4 +122,7 @@ type ObjectLayer interface {
// Backend related metrics
GetMetrics(ctx context.Context) (*Metrics, error)
// Check Readiness
IsReady(ctx context.Context) bool
}

View File

@ -1656,3 +1656,30 @@ func (s *xlSets) GetMetrics(ctx context.Context) (*Metrics, error) {
logger.LogIf(ctx, NotImplemented{})
return &Metrics{}, NotImplemented{}
}
// IsReady - Returns true if more than n/2 disks (quorum) are online
func (s *xlSets) IsReady(_ context.Context) bool {
s.xlDisksMu.RLock()
defer s.xlDisksMu.RUnlock()
var activeDisks int
for i := 0; i < s.setCount; i++ {
for j := 0; j < s.drivesPerSet; j++ {
if s.xlDisks[i][j] == nil {
continue
}
if !s.xlLockers[i][j].IsOnline() {
continue
}
if s.xlDisks[i][j].IsOnline() {
activeDisks++
}
// Return if more than n/2 disks are online.
if activeDisks > len(s.endpoints)/2 {
return true
}
}
}
// Disks are not ready
return false
}

View File

@ -245,3 +245,9 @@ func (xl xlObjects) crawlAndGetDataUsage(ctx context.Context, endCh <-chan struc
return dataUsageInfo
}
// IsReady - No Op.
func (xl xlObjects) IsReady(ctx context.Context) bool {
logger.CriticalIf(ctx, NotImplemented{})
return true
}

View File

@ -1359,3 +1359,16 @@ func (z *xlZones) GetMetrics(ctx context.Context) (*Metrics, error) {
logger.LogIf(ctx, NotImplemented{})
return &Metrics{}, NotImplemented{}
}
// IsReady - Returns True if all the zones have enough quorum to accept requests.
func (z *xlZones) IsReady(ctx context.Context) bool {
if z.SingleZone() {
return z.zones[0].IsReady(ctx)
}
for _, xlsets := range z.zones {
if !xlsets.IsReady(ctx) {
return false
}
}
return true
}