export cluster health as prometheus metrics (#17741)

2025-08-01 11:34:30 -04:00 · 2023-07-28 01:16:53 -07:00 · 2023-07-28 01:16:53 -07:00 · 114fab4c70
commit 114fab4c70
parent c2edbfae55
6 changed files with 117 additions and 48 deletions
--- a/cmd/erasure-server-pool.go
+++ b/cmd/erasure-server-pool.go
@ -2060,9 +2060,13 @@ type HealthOptions struct {
 // additionally with any specific heuristic information which
 // was queried
 type HealthResult struct {
-	Healthy       bool
-	HealingDrives int
-	PoolID, SetID int
+	Healthy        bool
+	HealingDrives  int
+	UnhealthyPools []struct {
+		Maintenance   bool
+		PoolID, SetID int
+		WriteQuorum   int
+	}
 	WriteQuorum   int
 	UsingDefaults bool
 }
@ -2164,24 +2168,6 @@ func (z *erasureServerPools) Health(ctx context.Context, opts HealthOptions) Hea
 		usingDefaults = true
 	}

-	for poolIdx := range erasureSetUpCount {
-		for setIdx := range erasureSetUpCount[poolIdx] {
-			if erasureSetUpCount[poolIdx][setIdx] < poolWriteQuorums[poolIdx] {
-				logger.LogIf(logger.SetReqInfo(ctx, reqInfo),
-					fmt.Errorf("Write quorum may be lost on pool: %d, set: %d, expected write quorum: %d",
-						poolIdx, setIdx, poolWriteQuorums[poolIdx]))
-				return HealthResult{
-					Healthy:       false,
-					HealingDrives: len(aggHealStateResult.HealDisks),
-					PoolID:        poolIdx,
-					SetID:         setIdx,
-					WriteQuorum:   poolWriteQuorums[poolIdx],
-					UsingDefaults: usingDefaults, // indicates if config was not initialized and we are using defaults on this node.
-				}
-			}
-		}
-	}
-
 	var maximumWriteQuorum int
 	for _, writeQuorum := range poolWriteQuorums {
 		if maximumWriteQuorum == 0 {
@ -2192,6 +2178,35 @@ func (z *erasureServerPools) Health(ctx context.Context, opts HealthOptions) Hea
 		}
 	}

+	result := HealthResult{
+		HealingDrives: len(aggHealStateResult.HealDisks),
+		WriteQuorum:   maximumWriteQuorum,
+		UsingDefaults: usingDefaults, // indicates if config was not initialized and we are using defaults on this node.
+	}
+
+	for poolIdx := range erasureSetUpCount {
+		for setIdx := range erasureSetUpCount[poolIdx] {
+			if erasureSetUpCount[poolIdx][setIdx] < poolWriteQuorums[poolIdx] {
+				logger.LogIf(logger.SetReqInfo(ctx, reqInfo),
+					fmt.Errorf("Write quorum may be lost on pool: %d, set: %d, expected write quorum: %d",
+						poolIdx, setIdx, poolWriteQuorums[poolIdx]))
+				result.UnhealthyPools = append(result.UnhealthyPools, struct {
+					Maintenance                bool
+					PoolID, SetID, WriteQuorum int
+				}{
+					Maintenance: opts.Maintenance,
+					SetID:       setIdx,
+					PoolID:      poolIdx,
+					WriteQuorum: poolWriteQuorums[poolIdx],
+				})
+			}
+		}
+		if len(result.UnhealthyPools) > 0 {
+			// We have unhealthy pools return error.
+			return result
+		}
+	}
+
 	// when maintenance is not specified we don't have
 	// to look at the healing side of the code.
 	if !opts.Maintenance {
--- a/cmd/healthcheck-handler.go
+++ b/cmd/healthcheck-handler.go
@ -28,22 +28,17 @@ import (

 const unavailable = "offline"

-func isServerNotInitialized() bool {
-	return newObjectLayerFn() == nil
-}
-
 // ClusterCheckHandler returns if the server is ready for requests.
 func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) {
 	ctx := newContext(r, w, "ClusterCheckHandler")

-	if isServerNotInitialized() {
+	objLayer := newObjectLayerFn()
+	if objLayer == nil {
 		w.Header().Set(xhttp.MinIOServerStatus, unavailable)
 		writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
 		return
 	}

-	objLayer := newObjectLayerFn()
-
 	ctx, cancel := context.WithTimeout(ctx, globalAPIConfig.getClusterDeadline())
 	defer cancel()

@ -52,16 +47,13 @@ func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) {
 		DeploymentType: r.Form.Get("deployment-type"),
 	}
 	result := objLayer.Health(ctx, opts)
-	if result.WriteQuorum > 0 {
-		w.Header().Set(xhttp.MinIOWriteQuorum, strconv.Itoa(result.WriteQuorum))
-	}
+	w.Header().Set(xhttp.MinIOWriteQuorum, strconv.Itoa(result.WriteQuorum))
 	w.Header().Set(xhttp.MinIOStorageClassDefaults, strconv.FormatBool(result.UsingDefaults))
-
+	// return how many drives are being healed if any
+	if result.HealingDrives > 0 {
+		w.Header().Set(xhttp.MinIOHealingDrives, strconv.Itoa(result.HealingDrives))
+	}
 	if !result.Healthy {
-		// return how many drives are being healed if any
-		if result.HealingDrives > 0 {
-			w.Header().Set(xhttp.MinIOHealingDrives, strconv.Itoa(result.HealingDrives))
-		}
 		// As a maintenance call we are purposefully asked to be taken
 		// down, this is for orchestrators to know if we can safely
 		// take this server down, return appropriate error.
@ -79,14 +71,13 @@ func ClusterCheckHandler(w http.ResponseWriter, r *http.Request) {
 func ClusterReadCheckHandler(w http.ResponseWriter, r *http.Request) {
 	ctx := newContext(r, w, "ClusterReadCheckHandler")

-	if isServerNotInitialized() {
+	objLayer := newObjectLayerFn()
+	if objLayer == nil {
 		w.Header().Set(xhttp.MinIOServerStatus, unavailable)
 		writeResponse(w, http.StatusServiceUnavailable, nil, mimeNone)
 		return
 	}

-	objLayer := newObjectLayerFn()
-
 	ctx, cancel := context.WithTimeout(ctx, globalAPIConfig.getClusterDeadline())
 	defer cancel()

@ -106,17 +97,17 @@ func ReadinessCheckHandler(w http.ResponseWriter, r *http.Request) {

 // LivenessCheckHandler - Checks if the process is up. Always returns success.
 func LivenessCheckHandler(w http.ResponseWriter, r *http.Request) {
-	peerCall := r.Header.Get("x-minio-from-peer") != ""
-
-	if peerCall {
-		return
-	}
-
-	if isServerNotInitialized() {
+	objLayer := newObjectLayerFn()
+	if objLayer == nil {
 		// Service not initialized yet
 		w.Header().Set(xhttp.MinIOServerStatus, unavailable)
 	}

+	peerCall := r.Header.Get(xhttp.MinIOPeerCall) != ""
+	if peerCall {
+		return
+	}
+
 	if int(globalHTTPStats.loadRequestsInQueue()) > globalAPIConfig.getRequestsPoolCapacity() {
 		apiErr := getAPIError(ErrBusy)
 		switch r.Method {
--- a/cmd/metrics-v2.go
+++ b/cmd/metrics-v2.go
@ -54,6 +54,7 @@ func init() {
 		getClusterTierMetrics(),
 		getClusterUsageMetrics(),
 		getKMSMetrics(),
+		getClusterHealthMetrics(),
 	}

 	peerMetricsGroups = []*MetricsGroup{
@ -2642,6 +2643,63 @@ func getLocalDriveStorageMetrics() *MetricsGroup {
 	return mg
 }

+func getClusterWriteQuorumMD() MetricDescription {
+	return MetricDescription{
+		Namespace: clusterMetricNamespace,
+		Subsystem: "write",
+		Name:      "quorum",
+		Help:      "Maximum write quorum across all pools and sets",
+		Type:      gaugeMetric,
+	}
+}
+
+func getClusterHealthStatusMD() MetricDescription {
+	return MetricDescription{
+		Namespace: clusterMetricNamespace,
+		Subsystem: "health",
+		Name:      "status",
+		Help:      "Get current cluster health status",
+		Type:      gaugeMetric,
+	}
+}
+
+func getClusterHealthMetrics() *MetricsGroup {
+	mg := &MetricsGroup{
+		cacheInterval: 10 * time.Second,
+	}
+	mg.RegisterRead(func(ctx context.Context) (metrics []Metric) {
+		objLayer := newObjectLayerFn()
+		// Service not initialized yet
+		if objLayer == nil {
+			return
+		}
+
+		metrics = make([]Metric, 0, 2)
+
+		opts := HealthOptions{}
+		result := objLayer.Health(ctx, opts)
+
+		metrics = append(metrics, Metric{
+			Description: getClusterWriteQuorumMD(),
+			Value:       float64(result.WriteQuorum),
+		})
+
+		health := 1
+		if !result.Healthy {
+			health = 0
+		}
+
+		metrics = append(metrics, Metric{
+			Description: getClusterHealthStatusMD(),
+			Value:       float64(health),
+		})
+
+		return
+	})
+
+	return mg
+}
+
 func getClusterStorageMetrics() *MetricsGroup {
 	mg := &MetricsGroup{
 		cacheInterval: 1 * time.Minute,
--- a/cmd/prepare-storage.go
+++ b/cmd/prepare-storage.go
@ -138,8 +138,8 @@ func isServerResolvable(endpoint Endpoint, timeout time.Duration) error {
 	if err != nil {
 		return err
 	}
-
-	req.Header.Set("x-minio-from-peer", "true")
+	// Indicate that the liveness check for a peer call
+	req.Header.Set(xhttp.MinIOPeerCall, "true")

 	resp, err := httpClient.Do(req)
 	if err != nil {
--- a/docs/metrics/prometheus/list.md
+++ b/docs/metrics/prometheus/list.md
@ -40,6 +40,8 @@ These metrics can be obtained from any MinIO server once per collection.
 | `minio_cluster_kms_uptime`                    | The time the KMS has been up and running in seconds.                                                            |
 | `minio_cluster_nodes_offline_total`           | Total number of MinIO nodes offline.                                                                            |
 | `minio_cluster_nodes_online_total`            | Total number of MinIO nodes online.                                                                             |
+| `minio_cluster_write_quorum`                  | Maximum write quorum across all pools and sets                                                                  |
+| `minio_cluster_health_status`                 | Get current cluster health status                                                                               |
 | `minio_heal_objects_errors_total`             | Objects for which healing failed in current self healing run.                                                   |
 | `minio_heal_objects_heal_total`               | Objects healed in current self healing run.                                                                     |
 | `minio_heal_objects_total`                    | Objects scanned in current self healing run.                                                                    |
--- a/internal/http/headers.go
+++ b/internal/http/headers.go
@ -152,6 +152,9 @@ const (
 	// Deployment id.
 	MinioDeploymentID = "x-minio-deployment-id"

+	// Peer call
+	MinIOPeerCall = "x-minio-from-peer"
+
 	// Server-Status
 	MinIOServerStatus = "x-minio-server-status"