diff --git a/cmd/metrics-v3-cluster-erasure-set.go b/cmd/metrics-v3-cluster-erasure-set.go index 69d7523b3..04824c624 100644 --- a/cmd/metrics-v3-cluster-erasure-set.go +++ b/cmd/metrics-v3-cluster-erasure-set.go @@ -30,6 +30,10 @@ const ( erasureSetOnlineDrivesCount = "online_drives_count" erasureSetHealingDrivesCount = "healing_drives_count" erasureSetHealth = "health" + erasureSetReadTolerance = "read_tolerance" + erasureSetWriteTolerance = "write_tolerance" + erasureSetReadHealth = "read_health" + erasureSetWriteHealth = "write_health" ) const ( @@ -53,6 +57,18 @@ var ( erasureSetHealthMD = NewGaugeMD(erasureSetHealth, "Health of the erasure set in a pool (1=healthy, 0=unhealthy)", poolIDL, setIDL) + erasureSetReadToleranceMD = NewGaugeMD(erasureSetReadTolerance, + "No of drive failures that can be tolerated without disrupting read operations", + poolIDL, setIDL) + erasureSetWriteToleranceMD = NewGaugeMD(erasureSetWriteTolerance, + "No of drive failures that can be tolerated without disrupting write operations", + poolIDL, setIDL) + erasureSetReadHealthMD = NewGaugeMD(erasureSetReadHealth, + "Health of the erasure set in a pool for read operations (1=healthy, 0=unhealthy)", + poolIDL, setIDL) + erasureSetWriteHealthMD = NewGaugeMD(erasureSetWriteHealth, + "Health of the erasure set in a pool for write operations (1=healthy, 0=unhealthy)", + poolIDL, setIDL) ) func b2f(v bool) float64 { @@ -73,16 +89,28 @@ func loadClusterErasureSetMetrics(ctx context.Context, m MetricValues, c *metric for _, h := range result.ESHealth { poolLV := strconv.Itoa(h.PoolID) setLV := strconv.Itoa(h.SetID) - m.Set(erasureSetReadQuorum, float64(h.ReadQuorum), - poolIDL, poolLV, setIDL, setLV) - m.Set(erasureSetWriteQuorum, float64(h.WriteQuorum), - poolIDL, poolLV, setIDL, setLV) - m.Set(erasureSetOnlineDrivesCount, float64(h.HealthyDrives), - poolIDL, poolLV, setIDL, setLV) - m.Set(erasureSetHealingDrivesCount, float64(h.HealingDrives), - poolIDL, poolLV, setIDL, setLV) - m.Set(erasureSetHealth, b2f(h.Healthy), - poolIDL, poolLV, setIDL, setLV) + labels := []string{poolIDL, poolLV, setIDL, setLV} + m.Set(erasureSetReadQuorum, float64(h.ReadQuorum), labels...) + m.Set(erasureSetWriteQuorum, float64(h.WriteQuorum), labels...) + m.Set(erasureSetOnlineDrivesCount, float64(h.HealthyDrives), labels...) + m.Set(erasureSetHealingDrivesCount, float64(h.HealingDrives), labels...) + m.Set(erasureSetHealth, b2f(h.Healthy), labels...) + + readHealthy := true + readTolerance := float64(h.HealthyDrives - h.ReadQuorum) + if readTolerance < 0 { + readHealthy = false + } + m.Set(erasureSetReadTolerance, readTolerance, labels...) + m.Set(erasureSetReadHealth, b2f(readHealthy), labels...) + + writeHealthy := true + writeTolerance := float64(h.HealthyDrives + h.HealingDrives - h.WriteQuorum) + if writeTolerance < 0 { + writeHealthy = false + } + m.Set(erasureSetWriteTolerance, writeTolerance, labels...) + m.Set(erasureSetWriteHealth, b2f(writeHealthy), labels...) } return nil diff --git a/cmd/metrics-v3.go b/cmd/metrics-v3.go index beb838433..ad74b7579 100644 --- a/cmd/metrics-v3.go +++ b/cmd/metrics-v3.go @@ -261,6 +261,10 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection { erasureSetOnlineDrivesCountMD, erasureSetHealingDrivesCountMD, erasureSetHealthMD, + erasureSetReadToleranceMD, + erasureSetWriteToleranceMD, + erasureSetReadHealthMD, + erasureSetWriteHealthMD, }, loadClusterErasureSetMetrics, ) diff --git a/docs/metrics/v3.md b/docs/metrics/v3.md index 6af5052bf..438a9158d 100644 --- a/docs/metrics/v3.md +++ b/docs/metrics/v3.md @@ -249,15 +249,19 @@ The standard metrics group for GoCollector is not shown below. ### `/cluster/erasure-set` -| Name | Type | Help | Labels | -|--------------------------------------------------|---------|---------------------------------------------------------------|------------------| -| `minio_cluster_erasure_set_overall_write_quorum` | `gauge` | Overall write quorum across pools and sets | | -| `minio_cluster_erasure_set_overall_health` | `gauge` | Overall health across pools and sets (1=healthy, 0=unhealthy) | | -| `minio_cluster_erasure_set_read_quorum` | `gauge` | Read quorum for the erasure set in a pool | `pool_id,set_id` | -| `minio_cluster_erasure_set_write_quorum` | `gauge` | Write quorum for the erasure set in a pool | `pool_id,set_id` | -| `minio_cluster_erasure_set_online_drives_count` | `gauge` | Count of online drives in the erasure set in a pool | `pool_id,set_id` | -| `minio_cluster_erasure_set_healing_drives_count` | `gauge` | Count of healing drives in the erasure set in a pool | `pool_id,set_id` | -| `minio_cluster_erasure_set_health` | `gauge` | Health of the erasure set in a pool (1=healthy, 0=unhealthy) | `pool_id,set_id` | +| Name | Type | Help | Labels | +|--------------------------------------------------|---------|-----------------------------------------------------------------------------------|------------------| +| `minio_cluster_erasure_set_overall_write_quorum` | `gauge` | Overall write quorum across pools and sets | | +| `minio_cluster_erasure_set_overall_health` | `gauge` | Overall health across pools and sets (1=healthy, 0=unhealthy) | | +| `minio_cluster_erasure_set_read_quorum` | `gauge` | Read quorum for the erasure set in a pool | `pool_id,set_id` | +| `minio_cluster_erasure_set_write_quorum` | `gauge` | Write quorum for the erasure set in a pool | `pool_id,set_id` | +| `minio_cluster_erasure_set_online_drives_count` | `gauge` | Count of online drives in the erasure set in a pool | `pool_id,set_id` | +| `minio_cluster_erasure_set_healing_drives_count` | `gauge` | Count of healing drives in the erasure set in a pool | `pool_id,set_id` | +| `minio_cluster_erasure_set_health` | `gauge` | Health of the erasure set in a pool (1=healthy, 0=unhealthy) | `pool_id,set_id` | +| `minio_cluster_erasure_set_read_tolerance` | `gauge` | No of drive failures that can be tolerated without disrupting read operations | `pool_id,set_id` | +| `minio_cluster_erasure_set_write_tolerance` | `gauge` | No of drive failures that can be tolerated without disrupting write operations | `pool_id,set_id` | +| `minio_cluster_erasure_set_read_health` | `gauge` | Health of the erasure set in a pool for read operations (1=healthy, 0=unhealthy) | `pool_id,set_id` | +| `minio_cluster_erasure_set_write_health` | `gauge` | Health of the erasure set in a pool for write operations (1=healthy, 0=unhealthy) | `pool_id,set_id` | ### `/cluster/notification`