fix: re-implement cluster healthcheck (#10101)

This commit is contained in:
Harshavardhana 2020-07-20 18:31:22 -07:00 committed by GitHub
parent 0c4be55936
commit ec06089eda
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 107 additions and 71 deletions

View File

@ -1633,9 +1633,10 @@ func (s *erasureSets) GetMetrics(ctx context.Context) (*Metrics, error) {
return &Metrics{}, NotImplemented{}
}
// IsReady - Returns true if atleast n/2 disks (read quorum) are online
func (s *erasureSets) IsReady(_ context.Context) bool {
return false
// Health shouldn't be called directly - will panic
func (s *erasureSets) Health(ctx context.Context, _ HealthOptions) HealthResult {
logger.CriticalIf(ctx, NotImplemented{})
return HealthResult{}
}
// maintainMRFList gathers the list of successful partial uploads

View File

@ -2007,18 +2007,37 @@ func (z *erasureZones) getZoneAndSet(id string) (int, int, error) {
return 0, 0, fmt.Errorf("DiskID(%s) %w", id, errDiskNotFound)
}
// IsReady - Returns true, when all the erasure sets are writable.
func (z *erasureZones) IsReady(ctx context.Context) bool {
// HealthOptions takes input options to return sepcific information
type HealthOptions struct {
Maintenance bool
}
// HealthResult returns the current state of the system, also
// additionally with any specific heuristic information which
// was queried
type HealthResult struct {
Healthy bool
ZoneID, SetID int
WriteQuorum int
}
// Health - returns current status of the object layer health,
// provides if write access exists across sets, additionally
// can be used to query scenarios if health may be lost
// if this node is taken down by an external orchestrator.
func (z *erasureZones) Health(ctx context.Context, opts HealthOptions) HealthResult {
erasureSetUpCount := make([][]int, len(z.zones))
for i := range z.zones {
erasureSetUpCount[i] = make([]int, len(z.zones[i].sets))
}
diskIDs := globalNotificationSys.GetLocalDiskIDs(ctx)
if !opts.Maintenance {
diskIDs = append(diskIDs, getLocalDiskIDs(z))
}
diskIDs = append(diskIDs, getLocalDiskIDs(z)...)
for _, id := range diskIDs {
for _, localDiskIDs := range diskIDs {
for _, id := range localDiskIDs {
zoneIdx, setIdx, err := z.getZoneAndSet(id)
if err != nil {
logger.LogIf(ctx, err)
@ -2026,10 +2045,11 @@ func (z *erasureZones) IsReady(ctx context.Context) bool {
}
erasureSetUpCount[zoneIdx][setIdx]++
}
}
for zoneIdx := range erasureSetUpCount {
parityDrives := globalStorageClass.GetParityForSC(storageclass.STANDARD)
diskCount := len(z.zones[zoneIdx].format.Erasure.Sets[0])
diskCount := z.zones[zoneIdx].drivesPerSet
if parityDrives == 0 {
parityDrives = getDefaultParityBlocks(diskCount)
}
@ -2042,11 +2062,18 @@ func (z *erasureZones) IsReady(ctx context.Context) bool {
if erasureSetUpCount[zoneIdx][setIdx] < writeQuorum {
logger.LogIf(ctx, fmt.Errorf("Write quorum lost on zone: %d, set: %d, expected write quorum: %d",
zoneIdx, setIdx, writeQuorum))
return false
return HealthResult{
Healthy: false,
ZoneID: zoneIdx,
SetID: setIdx,
WriteQuorum: writeQuorum,
}
}
}
return true
}
return HealthResult{
Healthy: true,
}
}
// PutObjectTags - replace or add tags to an existing object

View File

@ -391,8 +391,8 @@ func (er erasureObjects) crawlAndGetDataUsage(ctx context.Context, buckets []Buc
return nil
}
// IsReady - shouldn't be called will panic.
func (er erasureObjects) IsReady(ctx context.Context) bool {
// Health shouldn't be called directly - will panic
func (er erasureObjects) Health(ctx context.Context, _ HealthOptions) HealthResult {
logger.CriticalIf(ctx, NotImplemented{})
return true
return HealthResult{}
}

View File

@ -1557,11 +1557,12 @@ func (fs *FSObjects) IsTaggingSupported() bool {
return true
}
// IsReady - Check if the backend disk is ready to accept traffic.
func (fs *FSObjects) IsReady(_ context.Context) bool {
// Health returns health of the object layer
func (fs *FSObjects) Health(ctx context.Context, opts HealthOptions) HealthResult {
if _, err := os.Stat(fs.fsPath); err != nil {
return false
return HealthResult{}
}
return HealthResult{
Healthy: newObjectLayerFn() != nil,
}
return newObjectLayerFn() != nil
}

View File

@ -250,7 +250,7 @@ func (a GatewayUnsupported) IsCompressionSupported() bool {
return false
}
// IsReady - No Op.
func (a GatewayUnsupported) IsReady(_ context.Context) bool {
return false
// Health - No Op.
func (a GatewayUnsupported) Health(_ context.Context, _ HealthOptions) HealthResult {
return HealthResult{}
}

View File

@ -1436,8 +1436,3 @@ func (a *azureObjects) DeleteBucketPolicy(ctx context.Context, bucket string) er
func (a *azureObjects) IsCompressionSupported() bool {
return false
}
// IsReady returns whether the layer is ready to take requests.
func (a *azureObjects) IsReady(ctx context.Context) bool {
return minio.IsBackendOnline(ctx, a.httpClient, a.endpoint)
}

View File

@ -1508,8 +1508,3 @@ func (l *gcsGateway) DeleteBucketPolicy(ctx context.Context, bucket string) erro
func (l *gcsGateway) IsCompressionSupported() bool {
return false
}
// IsReady returns whether the layer is ready to take requests.
func (l *gcsGateway) IsReady(ctx context.Context) bool {
return minio.IsBackendOnline(ctx, l.httpClient, "https://storage.googleapis.com")
}

View File

@ -786,9 +786,3 @@ func (n *hdfsObjects) AbortMultipartUpload(ctx context.Context, bucket, object,
}
return hdfsToObjectErr(ctx, n.clnt.Remove(n.hdfsPathJoin(minioMetaTmpBucket, uploadID)), bucket, object, uploadID)
}
// IsReady returns whether the layer is ready to take requests.
func (n *hdfsObjects) IsReady(ctx context.Context) bool {
si, _ := n.StorageInfo(ctx, false)
return si.Backend.GatewayOnline
}

View File

@ -121,12 +121,6 @@ type nasObjects struct {
minio.ObjectLayer
}
// IsReady returns whether the layer is ready to take requests.
func (n *nasObjects) IsReady(ctx context.Context) bool {
si, _ := n.StorageInfo(ctx, false)
return si.Backend.GatewayOnline
}
func (n *nasObjects) IsTaggingSupported() bool {
return true
}

View File

@ -755,11 +755,6 @@ func (l *s3Objects) IsEncryptionSupported() bool {
return minio.GlobalKMS != nil || len(minio.GlobalGatewaySSE) > 0
}
// IsReady returns whether the layer is ready to take requests.
func (l *s3Objects) IsReady(ctx context.Context) bool {
return minio.IsBackendOnline(ctx, l.HTTPClient, l.Client.EndpointURL().String())
}
func (l *s3Objects) IsTaggingSupported() bool {
return true
}

View File

@ -35,8 +35,17 @@ func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithTimeout(ctx, globalAPIConfig.getReadyDeadline())
defer cancel()
if !objLayer.IsReady(ctx) {
opts := HealthOptions{Maintenance: r.URL.Query().Get("maintenance") == "true"}
result := objLayer.Health(ctx, opts)
if !result.Healthy {
// As a maintenance call we are purposefully asked to be taken
// down, this is for orchestrators to know if we can safely
// take this server down, return appropriate error.
if opts.Maintenance {
writeResponse(w, http.StatusPreconditionFailed, nil, mimeNone)
} else {
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
}
return
}

View File

@ -1164,26 +1164,21 @@ func (sys *NotificationSys) ServerInfo() []madmin.ServerProperties {
}
// GetLocalDiskIDs - return disk ids of the local disks of the peers.
func (sys *NotificationSys) GetLocalDiskIDs(ctx context.Context) []string {
var diskIDs []string
var mu sync.Mutex
func (sys *NotificationSys) GetLocalDiskIDs(ctx context.Context) (localDiskIDs [][]string) {
localDiskIDs = make([][]string, len(sys.peerClients))
var wg sync.WaitGroup
for _, client := range sys.peerClients {
for idx, client := range sys.peerClients {
if client == nil {
continue
}
wg.Add(1)
go func(client *peerRESTClient) {
go func(idx int, client *peerRESTClient) {
defer wg.Done()
ids := client.GetLocalDiskIDs(ctx)
mu.Lock()
diskIDs = append(diskIDs, ids...)
mu.Unlock()
}(client)
localDiskIDs[idx] = client.GetLocalDiskIDs(ctx)
}(idx, client)
}
wg.Wait()
return diskIDs
return localDiskIDs
}
// NewNotificationSys - creates new notification system object.

View File

@ -133,8 +133,8 @@ type ObjectLayer interface {
// Backend related metrics
GetMetrics(ctx context.Context) (*Metrics, error)
// Check Readiness
IsReady(ctx context.Context) bool
// Returns health of the backend
Health(ctx context.Context, opts HealthOptions) HealthResult
// ObjectTagging operations
PutObjectTags(context.Context, string, string, string, ObjectOptions) error

View File

@ -38,5 +38,35 @@ This probe always responds with '200 OK'. When readiness probe fails, Kubernetes
```
### Cluster probe
This probe is not useful in almost all cases, this is meant for administrators to see if quorum is available in any given cluster. The reply is '200 OK' if cluster has quorum if not it returns '503 Service Unavailable'.
```
curl http://minio1:9001/minio/health/cluster
HTTP/1.1 503 Service Unavailable
Accept-Ranges: bytes
Content-Length: 0
Content-Security-Policy: block-all-mixed-content
Server: MinIO/GOGET.GOGET
Vary: Origin
X-Amz-Bucket-Region: us-east-1
X-Amz-Request-Id: 16239D6AB80EBECF
X-Xss-Protection: 1; mode=block
Date: Tue, 21 Jul 2020 00:36:14 GMT
```
#### Checking cluster health for maintenance
You may query the cluster probe endpoint to check if the node which received the request can be taken down for maintenance, if the server replies back '412 Precondition Failed' this means you will loose HA. '200 OK' means you are okay to proceed.
```
curl http://minio1:9001/minio/health/cluster?maintenance=true
HTTP/1.1 412 Precondition Failed
Accept-Ranges: bytes
Content-Length: 0
Content-Security-Policy: block-all-mixed-content
Server: MinIO/GOGET.GOGET
Vary: Origin
X-Amz-Bucket-Region: us-east-1
X-Amz-Request-Id: 16239D63820C6E76
X-Xss-Protection: 1; mode=block
Date: Tue, 21 Jul 2020 00:35:43 GMT
```