mirror of
https://github.com/minio/minio.git
synced 2025-04-11 06:57:49 -04:00
Consolidate drive health related metrics into single metric (#19706)
Instead of having "online" and "healing" as two metrics, replace with a single metric "health" which can have following values: 0 = offline 1 = healthy 2 = healing
This commit is contained in:
parent
e8d14c0d90
commit
074d70112d
@ -35,6 +35,10 @@ const (
|
|||||||
|
|
||||||
sectorSize = uint64(512)
|
sectorSize = uint64(512)
|
||||||
kib = float64(1 << 10)
|
kib = float64(1 << 10)
|
||||||
|
|
||||||
|
driveHealthOffline = float64(0)
|
||||||
|
driveHealthOnline = float64(1)
|
||||||
|
driveHealthHealing = float64(2)
|
||||||
)
|
)
|
||||||
|
|
||||||
var allDriveLabels = []string{driveL, poolIndexL, setIndexL, driveIndexL}
|
var allDriveLabels = []string{driveL, poolIndexL, setIndexL, driveIndexL}
|
||||||
@ -51,8 +55,7 @@ const (
|
|||||||
driveAvailabilityErrorsTotal = "availability_errors_total"
|
driveAvailabilityErrorsTotal = "availability_errors_total"
|
||||||
driveWaitingIO = "waiting_io"
|
driveWaitingIO = "waiting_io"
|
||||||
driveAPILatencyMicros = "api_latency_micros"
|
driveAPILatencyMicros = "api_latency_micros"
|
||||||
driveHealing = "healing"
|
driveHealth = "health"
|
||||||
driveOnline = "online"
|
|
||||||
|
|
||||||
driveOfflineCount = "offline_count"
|
driveOfflineCount = "offline_count"
|
||||||
driveOnlineCount = "online_count"
|
driveOnlineCount = "online_count"
|
||||||
@ -93,10 +96,8 @@ var (
|
|||||||
driveAPILatencyMD = NewGaugeMD(driveAPILatencyMicros,
|
driveAPILatencyMD = NewGaugeMD(driveAPILatencyMicros,
|
||||||
"Average last minute latency in µs for drive API storage operations",
|
"Average last minute latency in µs for drive API storage operations",
|
||||||
append(allDriveLabels, apiL)...)
|
append(allDriveLabels, apiL)...)
|
||||||
driveHealingMD = NewGaugeMD(driveHealing,
|
driveHealthMD = NewGaugeMD(driveHealth,
|
||||||
"Is it healing?", allDriveLabels...)
|
"Drive health (0 = offline, 1 = healthy, 2 = healing)", allDriveLabels...)
|
||||||
driveOnlineMD = NewGaugeMD(driveOnline,
|
|
||||||
"Is it online?", allDriveLabels...)
|
|
||||||
|
|
||||||
driveOfflineCountMD = NewGaugeMD(driveOfflineCount,
|
driveOfflineCountMD = NewGaugeMD(driveOfflineCount,
|
||||||
"Count of offline drives")
|
"Count of offline drives")
|
||||||
@ -152,16 +153,18 @@ func (m *MetricValues) setDriveBasicMetrics(drive madmin.Disk, labels []string)
|
|||||||
m.Set(driveFreeInodes, float64(drive.FreeInodes), labels...)
|
m.Set(driveFreeInodes, float64(drive.FreeInodes), labels...)
|
||||||
m.Set(driveTotalInodes, float64(drive.UsedInodes+drive.FreeInodes), labels...)
|
m.Set(driveTotalInodes, float64(drive.UsedInodes+drive.FreeInodes), labels...)
|
||||||
|
|
||||||
var healing, online float64
|
var health float64
|
||||||
if drive.Healing {
|
switch drive.Healing {
|
||||||
healing = 1
|
case true:
|
||||||
}
|
health = driveHealthHealing
|
||||||
m.Set(driveHealing, healing, labels...)
|
case false:
|
||||||
|
|
||||||
if drive.State == "ok" {
|
if drive.State == "ok" {
|
||||||
online = 1
|
health = driveHealthOnline
|
||||||
|
} else {
|
||||||
|
health = driveHealthOffline
|
||||||
}
|
}
|
||||||
m.Set(driveOnline, online, labels...)
|
}
|
||||||
|
m.Set(driveHealth, health, labels...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MetricValues) setDriveAPIMetrics(disk madmin.Disk, labels []string) {
|
func (m *MetricValues) setDriveAPIMetrics(disk madmin.Disk, labels []string) {
|
||||||
|
@ -186,8 +186,7 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
|||||||
driveAvailabilityErrorsMD,
|
driveAvailabilityErrorsMD,
|
||||||
driveWaitingIOMD,
|
driveWaitingIOMD,
|
||||||
driveAPILatencyMD,
|
driveAPILatencyMD,
|
||||||
driveHealingMD,
|
driveHealthMD,
|
||||||
driveOnlineMD,
|
|
||||||
|
|
||||||
driveOfflineCountMD,
|
driveOfflineCountMD,
|
||||||
driveOnlineCountMD,
|
driveOnlineCountMD,
|
||||||
|
@ -132,8 +132,7 @@ The standard metrics group for GoCollector is not shown below.
|
|||||||
| `minio_system_drive_offline_count` | `gauge` | Count of offline drives | `pool_index,server` |
|
| `minio_system_drive_offline_count` | `gauge` | Count of offline drives | `pool_index,server` |
|
||||||
| `minio_system_drive_online_count` | `gauge` | Count of online drives | `pool_index,server` |
|
| `minio_system_drive_online_count` | `gauge` | Count of online drives | `pool_index,server` |
|
||||||
| `minio_system_drive_count` | `gauge` | Count of all drives | `pool_index,server` |
|
| `minio_system_drive_count` | `gauge` | Count of all drives | `pool_index,server` |
|
||||||
| `minio_system_drive_healing` | `gauge` | Is it healing? | `drive,set_index,drive_index,pool_index,server` |
|
| `minio_system_drive_health` | `gauge` | Drive health (0 = offline, 1 = healthy, 2 = healing) | `drive,set_index,drive_index,pool_index,server` |
|
||||||
| `minio_system_drive_online` | `gauge` | Is it online? | `drive,set_index,drive_index,pool_index,server` |
|
|
||||||
| `minio_system_drive_reads_per_sec` | `gauge` | Reads per second on a drive | `drive,set_index,drive_index,pool_index,server` |
|
| `minio_system_drive_reads_per_sec` | `gauge` | Reads per second on a drive | `drive,set_index,drive_index,pool_index,server` |
|
||||||
| `minio_system_drive_reads_kb_per_sec` | `gauge` | Kilobytes read per second on a drive | `drive,set_index,drive_index,pool_index,server` |
|
| `minio_system_drive_reads_kb_per_sec` | `gauge` | Kilobytes read per second on a drive | `drive,set_index,drive_index,pool_index,server` |
|
||||||
| `minio_system_drive_reads_await` | `gauge` | Average time for read requests served on a drive | `drive,set_index,drive_index,pool_index,server` |
|
| `minio_system_drive_reads_await` | `gauge` | Average time for read requests served on a drive | `drive,set_index,drive_index,pool_index,server` |
|
||||||
|
Loading…
x
Reference in New Issue
Block a user