fix: re-implement cluster healthcheck (#10101)

This commit is contained in:
Harshavardhana 2020-07-20 18:31:22 -07:00 committed by GitHub
parent 0c4be55936
commit ec06089eda
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 107 additions and 71 deletions

View File

@ -1633,9 +1633,10 @@ func (s *erasureSets) GetMetrics(ctx context.Context) (*Metrics, error) {
return &Metrics{}, NotImplemented{} return &Metrics{}, NotImplemented{}
} }
// IsReady - Returns true if atleast n/2 disks (read quorum) are online // Health shouldn't be called directly - will panic
func (s *erasureSets) IsReady(_ context.Context) bool { func (s *erasureSets) Health(ctx context.Context, _ HealthOptions) HealthResult {
return false logger.CriticalIf(ctx, NotImplemented{})
return HealthResult{}
} }
// maintainMRFList gathers the list of successful partial uploads // maintainMRFList gathers the list of successful partial uploads

View File

@ -2007,29 +2007,49 @@ func (z *erasureZones) getZoneAndSet(id string) (int, int, error) {
return 0, 0, fmt.Errorf("DiskID(%s) %w", id, errDiskNotFound) return 0, 0, fmt.Errorf("DiskID(%s) %w", id, errDiskNotFound)
} }
// IsReady - Returns true, when all the erasure sets are writable. // HealthOptions takes input options to return sepcific information
func (z *erasureZones) IsReady(ctx context.Context) bool { type HealthOptions struct {
Maintenance bool
}
// HealthResult returns the current state of the system, also
// additionally with any specific heuristic information which
// was queried
type HealthResult struct {
Healthy bool
ZoneID, SetID int
WriteQuorum int
}
// Health - returns current status of the object layer health,
// provides if write access exists across sets, additionally
// can be used to query scenarios if health may be lost
// if this node is taken down by an external orchestrator.
func (z *erasureZones) Health(ctx context.Context, opts HealthOptions) HealthResult {
erasureSetUpCount := make([][]int, len(z.zones)) erasureSetUpCount := make([][]int, len(z.zones))
for i := range z.zones { for i := range z.zones {
erasureSetUpCount[i] = make([]int, len(z.zones[i].sets)) erasureSetUpCount[i] = make([]int, len(z.zones[i].sets))
} }
diskIDs := globalNotificationSys.GetLocalDiskIDs(ctx) diskIDs := globalNotificationSys.GetLocalDiskIDs(ctx)
if !opts.Maintenance {
diskIDs = append(diskIDs, getLocalDiskIDs(z))
}
diskIDs = append(diskIDs, getLocalDiskIDs(z)...) for _, localDiskIDs := range diskIDs {
for _, id := range localDiskIDs {
for _, id := range diskIDs { zoneIdx, setIdx, err := z.getZoneAndSet(id)
zoneIdx, setIdx, err := z.getZoneAndSet(id) if err != nil {
if err != nil { logger.LogIf(ctx, err)
logger.LogIf(ctx, err) continue
continue }
erasureSetUpCount[zoneIdx][setIdx]++
} }
erasureSetUpCount[zoneIdx][setIdx]++
} }
for zoneIdx := range erasureSetUpCount { for zoneIdx := range erasureSetUpCount {
parityDrives := globalStorageClass.GetParityForSC(storageclass.STANDARD) parityDrives := globalStorageClass.GetParityForSC(storageclass.STANDARD)
diskCount := len(z.zones[zoneIdx].format.Erasure.Sets[0]) diskCount := z.zones[zoneIdx].drivesPerSet
if parityDrives == 0 { if parityDrives == 0 {
parityDrives = getDefaultParityBlocks(diskCount) parityDrives = getDefaultParityBlocks(diskCount)
} }
@ -2042,11 +2062,18 @@ func (z *erasureZones) IsReady(ctx context.Context) bool {
if erasureSetUpCount[zoneIdx][setIdx] < writeQuorum { if erasureSetUpCount[zoneIdx][setIdx] < writeQuorum {
logger.LogIf(ctx, fmt.Errorf("Write quorum lost on zone: %d, set: %d, expected write quorum: %d", logger.LogIf(ctx, fmt.Errorf("Write quorum lost on zone: %d, set: %d, expected write quorum: %d",
zoneIdx, setIdx, writeQuorum)) zoneIdx, setIdx, writeQuorum))
return false return HealthResult{
Healthy: false,
ZoneID: zoneIdx,
SetID: setIdx,
WriteQuorum: writeQuorum,
}
} }
} }
} }
return true return HealthResult{
Healthy: true,
}
} }
// PutObjectTags - replace or add tags to an existing object // PutObjectTags - replace or add tags to an existing object

View File

@ -391,8 +391,8 @@ func (er erasureObjects) crawlAndGetDataUsage(ctx context.Context, buckets []Buc
return nil return nil
} }
// IsReady - shouldn't be called will panic. // Health shouldn't be called directly - will panic
func (er erasureObjects) IsReady(ctx context.Context) bool { func (er erasureObjects) Health(ctx context.Context, _ HealthOptions) HealthResult {
logger.CriticalIf(ctx, NotImplemented{}) logger.CriticalIf(ctx, NotImplemented{})
return true return HealthResult{}
} }

View File

@ -1557,11 +1557,12 @@ func (fs *FSObjects) IsTaggingSupported() bool {
return true return true
} }
// IsReady - Check if the backend disk is ready to accept traffic. // Health returns health of the object layer
func (fs *FSObjects) IsReady(_ context.Context) bool { func (fs *FSObjects) Health(ctx context.Context, opts HealthOptions) HealthResult {
if _, err := os.Stat(fs.fsPath); err != nil { if _, err := os.Stat(fs.fsPath); err != nil {
return false return HealthResult{}
}
return HealthResult{
Healthy: newObjectLayerFn() != nil,
} }
return newObjectLayerFn() != nil
} }

View File

@ -250,7 +250,7 @@ func (a GatewayUnsupported) IsCompressionSupported() bool {
return false return false
} }
// IsReady - No Op. // Health - No Op.
func (a GatewayUnsupported) IsReady(_ context.Context) bool { func (a GatewayUnsupported) Health(_ context.Context, _ HealthOptions) HealthResult {
return false return HealthResult{}
} }

View File

@ -1436,8 +1436,3 @@ func (a *azureObjects) DeleteBucketPolicy(ctx context.Context, bucket string) er
func (a *azureObjects) IsCompressionSupported() bool { func (a *azureObjects) IsCompressionSupported() bool {
return false return false
} }
// IsReady returns whether the layer is ready to take requests.
func (a *azureObjects) IsReady(ctx context.Context) bool {
return minio.IsBackendOnline(ctx, a.httpClient, a.endpoint)
}

View File

@ -1508,8 +1508,3 @@ func (l *gcsGateway) DeleteBucketPolicy(ctx context.Context, bucket string) erro
func (l *gcsGateway) IsCompressionSupported() bool { func (l *gcsGateway) IsCompressionSupported() bool {
return false return false
} }
// IsReady returns whether the layer is ready to take requests.
func (l *gcsGateway) IsReady(ctx context.Context) bool {
return minio.IsBackendOnline(ctx, l.httpClient, "https://storage.googleapis.com")
}

View File

@ -786,9 +786,3 @@ func (n *hdfsObjects) AbortMultipartUpload(ctx context.Context, bucket, object,
} }
return hdfsToObjectErr(ctx, n.clnt.Remove(n.hdfsPathJoin(minioMetaTmpBucket, uploadID)), bucket, object, uploadID) return hdfsToObjectErr(ctx, n.clnt.Remove(n.hdfsPathJoin(minioMetaTmpBucket, uploadID)), bucket, object, uploadID)
} }
// IsReady returns whether the layer is ready to take requests.
func (n *hdfsObjects) IsReady(ctx context.Context) bool {
si, _ := n.StorageInfo(ctx, false)
return si.Backend.GatewayOnline
}

View File

@ -121,12 +121,6 @@ type nasObjects struct {
minio.ObjectLayer minio.ObjectLayer
} }
// IsReady returns whether the layer is ready to take requests.
func (n *nasObjects) IsReady(ctx context.Context) bool {
si, _ := n.StorageInfo(ctx, false)
return si.Backend.GatewayOnline
}
func (n *nasObjects) IsTaggingSupported() bool { func (n *nasObjects) IsTaggingSupported() bool {
return true return true
} }

View File

@ -755,11 +755,6 @@ func (l *s3Objects) IsEncryptionSupported() bool {
return minio.GlobalKMS != nil || len(minio.GlobalGatewaySSE) > 0 return minio.GlobalKMS != nil || len(minio.GlobalGatewaySSE) > 0
} }
// IsReady returns whether the layer is ready to take requests.
func (l *s3Objects) IsReady(ctx context.Context) bool {
return minio.IsBackendOnline(ctx, l.HTTPClient, l.Client.EndpointURL().String())
}
func (l *s3Objects) IsTaggingSupported() bool { func (l *s3Objects) IsTaggingSupported() bool {
return true return true
} }

View File

@ -35,8 +35,17 @@ func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithTimeout(ctx, globalAPIConfig.getReadyDeadline()) ctx, cancel := context.WithTimeout(ctx, globalAPIConfig.getReadyDeadline())
defer cancel() defer cancel()
if !objLayer.IsReady(ctx) { opts := HealthOptions{Maintenance: r.URL.Query().Get("maintenance") == "true"}
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone) result := objLayer.Health(ctx, opts)
if !result.Healthy {
// As a maintenance call we are purposefully asked to be taken
// down, this is for orchestrators to know if we can safely
// take this server down, return appropriate error.
if opts.Maintenance {
writeResponse(w, http.StatusPreconditionFailed, nil, mimeNone)
} else {
writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
}
return return
} }

View File

@ -1164,26 +1164,21 @@ func (sys *NotificationSys) ServerInfo() []madmin.ServerProperties {
} }
// GetLocalDiskIDs - return disk ids of the local disks of the peers. // GetLocalDiskIDs - return disk ids of the local disks of the peers.
func (sys *NotificationSys) GetLocalDiskIDs(ctx context.Context) []string { func (sys *NotificationSys) GetLocalDiskIDs(ctx context.Context) (localDiskIDs [][]string) {
var diskIDs []string localDiskIDs = make([][]string, len(sys.peerClients))
var mu sync.Mutex
var wg sync.WaitGroup var wg sync.WaitGroup
for _, client := range sys.peerClients { for idx, client := range sys.peerClients {
if client == nil { if client == nil {
continue continue
} }
wg.Add(1) wg.Add(1)
go func(client *peerRESTClient) { go func(idx int, client *peerRESTClient) {
defer wg.Done() defer wg.Done()
ids := client.GetLocalDiskIDs(ctx) localDiskIDs[idx] = client.GetLocalDiskIDs(ctx)
mu.Lock() }(idx, client)
diskIDs = append(diskIDs, ids...)
mu.Unlock()
}(client)
} }
wg.Wait() wg.Wait()
return diskIDs return localDiskIDs
} }
// NewNotificationSys - creates new notification system object. // NewNotificationSys - creates new notification system object.

View File

@ -133,8 +133,8 @@ type ObjectLayer interface {
// Backend related metrics // Backend related metrics
GetMetrics(ctx context.Context) (*Metrics, error) GetMetrics(ctx context.Context) (*Metrics, error)
// Check Readiness // Returns health of the backend
IsReady(ctx context.Context) bool Health(ctx context.Context, opts HealthOptions) HealthResult
// ObjectTagging operations // ObjectTagging operations
PutObjectTags(context.Context, string, string, string, ObjectOptions) error PutObjectTags(context.Context, string, string, string, ObjectOptions) error

View File

@ -38,5 +38,35 @@ This probe always responds with '200 OK'. When readiness probe fails, Kubernetes
``` ```
### Cluster probe ### Cluster probe
This probe is not useful in almost all cases, this is meant for administrators to see if quorum is available in any given cluster. The reply is '200 OK' if cluster has quorum if not it returns '503 Service Unavailable'. This probe is not useful in almost all cases, this is meant for administrators to see if quorum is available in any given cluster. The reply is '200 OK' if cluster has quorum if not it returns '503 Service Unavailable'.
```
curl http://minio1:9001/minio/health/cluster
HTTP/1.1 503 Service Unavailable
Accept-Ranges: bytes
Content-Length: 0
Content-Security-Policy: block-all-mixed-content
Server: MinIO/GOGET.GOGET
Vary: Origin
X-Amz-Bucket-Region: us-east-1
X-Amz-Request-Id: 16239D6AB80EBECF
X-Xss-Protection: 1; mode=block
Date: Tue, 21 Jul 2020 00:36:14 GMT
```
#### Checking cluster health for maintenance
You may query the cluster probe endpoint to check if the node which received the request can be taken down for maintenance, if the server replies back '412 Precondition Failed' this means you will loose HA. '200 OK' means you are okay to proceed.
```
curl http://minio1:9001/minio/health/cluster?maintenance=true
HTTP/1.1 412 Precondition Failed
Accept-Ranges: bytes
Content-Length: 0
Content-Security-Policy: block-all-mixed-content
Server: MinIO/GOGET.GOGET
Vary: Origin
X-Amz-Bucket-Region: us-east-1
X-Amz-Request-Id: 16239D63820C6E76
X-Xss-Protection: 1; mode=block
Date: Tue, 21 Jul 2020 00:35:43 GMT
```