mirror of https://github.com/minio/minio.git
fix: prometheus metrics disks_online count when disks are down (#11689)
prometheus metrics was using total disks instead of online disk count, when disks were down, this PR fixes this and also adds a new metric for total_disk_count
This commit is contained in:
parent
690434514d
commit
2c198ae7b6
|
@ -72,6 +72,7 @@ const (
|
|||
type MetricName string
|
||||
|
||||
const (
|
||||
total MetricName = "total"
|
||||
errorsTotal MetricName = "error_total"
|
||||
healTotal MetricName = "heal_total"
|
||||
hitsTotal MetricName = "hits_total"
|
||||
|
@ -85,7 +86,6 @@ const (
|
|||
openTotal MetricName = "open_total"
|
||||
readTotal MetricName = "read_total"
|
||||
writeTotal MetricName = "write_total"
|
||||
total MetricName = "total"
|
||||
|
||||
failedBytes MetricName = "failed_bytes"
|
||||
freeBytes MetricName = "free_bytes"
|
||||
|
@ -254,7 +254,7 @@ func getNodeDiskFreeBytesMD() MetricDescription {
|
|||
Type: gaugeMetric,
|
||||
}
|
||||
}
|
||||
func getClusterDiskOfflineTotalMD() MetricDescription {
|
||||
func getClusterDisksOfflineTotalMD() MetricDescription {
|
||||
return MetricDescription{
|
||||
Namespace: clusterMetricNamespace,
|
||||
Subsystem: diskSubsystem,
|
||||
|
@ -264,7 +264,7 @@ func getClusterDiskOfflineTotalMD() MetricDescription {
|
|||
}
|
||||
}
|
||||
|
||||
func getClusterDiskOnlineTotalMD() MetricDescription {
|
||||
func getClusterDisksOnlineTotalMD() MetricDescription {
|
||||
return MetricDescription{
|
||||
Namespace: clusterMetricNamespace,
|
||||
Subsystem: diskSubsystem,
|
||||
|
@ -274,6 +274,16 @@ func getClusterDiskOnlineTotalMD() MetricDescription {
|
|||
}
|
||||
}
|
||||
|
||||
func getClusterDisksTotalMD() MetricDescription {
|
||||
return MetricDescription{
|
||||
Namespace: clusterMetricNamespace,
|
||||
Subsystem: diskSubsystem,
|
||||
Name: total,
|
||||
Help: "Total disks.",
|
||||
Type: gaugeMetric,
|
||||
}
|
||||
}
|
||||
|
||||
func getNodeDiskTotalBytesMD() MetricDescription {
|
||||
return MetricDescription{
|
||||
Namespace: nodeMetricNamespace,
|
||||
|
@ -1142,7 +1152,7 @@ func getClusterStorageMetrics() MetricsGroup {
|
|||
// Fetch disk space info, ignore errors
|
||||
storageInfo, _ := objLayer.StorageInfo(ctx)
|
||||
onlineDisks, offlineDisks := getOnlineOfflineDisksStats(storageInfo.Disks)
|
||||
totalDisks := offlineDisks.Merge(onlineDisks)
|
||||
totalDisks := onlineDisks.Merge(offlineDisks)
|
||||
|
||||
metrics.Metrics = append(metrics.Metrics, Metric{
|
||||
Description: getClusterCapacityTotalBytesMD(),
|
||||
|
@ -1165,12 +1175,17 @@ func getClusterStorageMetrics() MetricsGroup {
|
|||
})
|
||||
|
||||
metrics.Metrics = append(metrics.Metrics, Metric{
|
||||
Description: getClusterDiskOfflineTotalMD(),
|
||||
Description: getClusterDisksOfflineTotalMD(),
|
||||
Value: float64(offlineDisks.Sum()),
|
||||
})
|
||||
|
||||
metrics.Metrics = append(metrics.Metrics, Metric{
|
||||
Description: getClusterDiskOnlineTotalMD(),
|
||||
Description: getClusterDisksOnlineTotalMD(),
|
||||
Value: float64(onlineDisks.Sum()),
|
||||
})
|
||||
|
||||
metrics.Metrics = append(metrics.Metrics, Metric{
|
||||
Description: getClusterDisksTotalMD(),
|
||||
Value: float64(totalDisks.Sum()),
|
||||
})
|
||||
},
|
||||
|
|
|
@ -6,7 +6,7 @@ Each metric has a label for the server that generated the metric.
|
|||
These metrics can be from any MinIO server once per collection.
|
||||
|
||||
| Name | Description |
|
||||
|:-----------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------|
|
||||
|:---------------------------------------------|:--------------------------------------------------------------------------------------------------------------------|
|
||||
| `minio_bucket_objects_size_distribution` | Distribution of object sizes in the bucket, includes label for the bucket name. |
|
||||
| `minio_bucket_replication_failed_bytes` | Total number of bytes failed at least once to replicate. |
|
||||
| `minio_bucket_replication_pending_bytes` | Total bytes pending to replicate. |
|
||||
|
@ -18,6 +18,7 @@ These metrics can be from any MinIO server once per collection.
|
|||
| `minio_cluster_capacity_raw_total_bytes` | Total capacity online in the cluster. |
|
||||
| `minio_cluster_capacity_usable_free_bytes` | Total free usable capacity online in the cluster. |
|
||||
| `minio_cluster_capacity_usable_total_bytes` | Total usable capacity online in the cluster. |
|
||||
| `minio_cluster_disk_total` | Total disks. |
|
||||
| `minio_cluster_disk_offline_total` | Total disks offline. |
|
||||
| `minio_cluster_disk_online_total` | Total disks online. |
|
||||
| `minio_cluster_nodes_offline_total` | Total number of MinIO nodes offline. |
|
||||
|
|
Loading…
Reference in New Issue