fix: prometheus metrics disks_online count when disks are down (#11689)

prometheus metrics was using total disks instead
of online disk count, when disks were down, this
PR fixes this and also adds a new metric for
total_disk_count
This commit is contained in:
Harshavardhana
2021-03-03 11:18:41 -08:00
committed by GitHub
parent 690434514d
commit 2c198ae7b6
2 changed files with 71 additions and 55 deletions

View File

@@ -72,6 +72,7 @@ const (
type MetricName string
const (
total MetricName = "total"
errorsTotal MetricName = "error_total"
healTotal MetricName = "heal_total"
hitsTotal MetricName = "hits_total"
@@ -85,7 +86,6 @@ const (
openTotal MetricName = "open_total"
readTotal MetricName = "read_total"
writeTotal MetricName = "write_total"
total MetricName = "total"
failedBytes MetricName = "failed_bytes"
freeBytes MetricName = "free_bytes"
@@ -254,7 +254,7 @@ func getNodeDiskFreeBytesMD() MetricDescription {
Type: gaugeMetric,
}
}
func getClusterDiskOfflineTotalMD() MetricDescription {
func getClusterDisksOfflineTotalMD() MetricDescription {
return MetricDescription{
Namespace: clusterMetricNamespace,
Subsystem: diskSubsystem,
@@ -264,7 +264,7 @@ func getClusterDiskOfflineTotalMD() MetricDescription {
}
}
func getClusterDiskOnlineTotalMD() MetricDescription {
func getClusterDisksOnlineTotalMD() MetricDescription {
return MetricDescription{
Namespace: clusterMetricNamespace,
Subsystem: diskSubsystem,
@@ -274,6 +274,16 @@ func getClusterDiskOnlineTotalMD() MetricDescription {
}
}
func getClusterDisksTotalMD() MetricDescription {
return MetricDescription{
Namespace: clusterMetricNamespace,
Subsystem: diskSubsystem,
Name: total,
Help: "Total disks.",
Type: gaugeMetric,
}
}
func getNodeDiskTotalBytesMD() MetricDescription {
return MetricDescription{
Namespace: nodeMetricNamespace,
@@ -1142,7 +1152,7 @@ func getClusterStorageMetrics() MetricsGroup {
// Fetch disk space info, ignore errors
storageInfo, _ := objLayer.StorageInfo(ctx)
onlineDisks, offlineDisks := getOnlineOfflineDisksStats(storageInfo.Disks)
totalDisks := offlineDisks.Merge(onlineDisks)
totalDisks := onlineDisks.Merge(offlineDisks)
metrics.Metrics = append(metrics.Metrics, Metric{
Description: getClusterCapacityTotalBytesMD(),
@@ -1165,12 +1175,17 @@ func getClusterStorageMetrics() MetricsGroup {
})
metrics.Metrics = append(metrics.Metrics, Metric{
Description: getClusterDiskOfflineTotalMD(),
Description: getClusterDisksOfflineTotalMD(),
Value: float64(offlineDisks.Sum()),
})
metrics.Metrics = append(metrics.Metrics, Metric{
Description: getClusterDiskOnlineTotalMD(),
Description: getClusterDisksOnlineTotalMD(),
Value: float64(onlineDisks.Sum()),
})
metrics.Metrics = append(metrics.Metrics, Metric{
Description: getClusterDisksTotalMD(),
Value: float64(totalDisks.Sum()),
})
},