mirror of
https://github.com/minio/minio.git
synced 2024-12-24 06:05:55 -05:00
Consolidate drive health related metrics into single metric (#19706)
Instead of having "online" and "healing" as two metrics, replace with a single metric "health" which can have following values: 0 = offline 1 = healthy 2 = healing
This commit is contained in:
parent
e8d14c0d90
commit
074d70112d
@ -35,6 +35,10 @@ const (
|
||||
|
||||
sectorSize = uint64(512)
|
||||
kib = float64(1 << 10)
|
||||
|
||||
driveHealthOffline = float64(0)
|
||||
driveHealthOnline = float64(1)
|
||||
driveHealthHealing = float64(2)
|
||||
)
|
||||
|
||||
var allDriveLabels = []string{driveL, poolIndexL, setIndexL, driveIndexL}
|
||||
@ -51,8 +55,7 @@ const (
|
||||
driveAvailabilityErrorsTotal = "availability_errors_total"
|
||||
driveWaitingIO = "waiting_io"
|
||||
driveAPILatencyMicros = "api_latency_micros"
|
||||
driveHealing = "healing"
|
||||
driveOnline = "online"
|
||||
driveHealth = "health"
|
||||
|
||||
driveOfflineCount = "offline_count"
|
||||
driveOnlineCount = "online_count"
|
||||
@ -93,10 +96,8 @@ var (
|
||||
driveAPILatencyMD = NewGaugeMD(driveAPILatencyMicros,
|
||||
"Average last minute latency in µs for drive API storage operations",
|
||||
append(allDriveLabels, apiL)...)
|
||||
driveHealingMD = NewGaugeMD(driveHealing,
|
||||
"Is it healing?", allDriveLabels...)
|
||||
driveOnlineMD = NewGaugeMD(driveOnline,
|
||||
"Is it online?", allDriveLabels...)
|
||||
driveHealthMD = NewGaugeMD(driveHealth,
|
||||
"Drive health (0 = offline, 1 = healthy, 2 = healing)", allDriveLabels...)
|
||||
|
||||
driveOfflineCountMD = NewGaugeMD(driveOfflineCount,
|
||||
"Count of offline drives")
|
||||
@ -152,16 +153,18 @@ func (m *MetricValues) setDriveBasicMetrics(drive madmin.Disk, labels []string)
|
||||
m.Set(driveFreeInodes, float64(drive.FreeInodes), labels...)
|
||||
m.Set(driveTotalInodes, float64(drive.UsedInodes+drive.FreeInodes), labels...)
|
||||
|
||||
var healing, online float64
|
||||
if drive.Healing {
|
||||
healing = 1
|
||||
var health float64
|
||||
switch drive.Healing {
|
||||
case true:
|
||||
health = driveHealthHealing
|
||||
case false:
|
||||
if drive.State == "ok" {
|
||||
health = driveHealthOnline
|
||||
} else {
|
||||
health = driveHealthOffline
|
||||
}
|
||||
}
|
||||
m.Set(driveHealing, healing, labels...)
|
||||
|
||||
if drive.State == "ok" {
|
||||
online = 1
|
||||
}
|
||||
m.Set(driveOnline, online, labels...)
|
||||
m.Set(driveHealth, health, labels...)
|
||||
}
|
||||
|
||||
func (m *MetricValues) setDriveAPIMetrics(disk madmin.Disk, labels []string) {
|
||||
|
@ -186,8 +186,7 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
||||
driveAvailabilityErrorsMD,
|
||||
driveWaitingIOMD,
|
||||
driveAPILatencyMD,
|
||||
driveHealingMD,
|
||||
driveOnlineMD,
|
||||
driveHealthMD,
|
||||
|
||||
driveOfflineCountMD,
|
||||
driveOnlineCountMD,
|
||||
|
@ -132,8 +132,7 @@ The standard metrics group for GoCollector is not shown below.
|
||||
| `minio_system_drive_offline_count` | `gauge` | Count of offline drives | `pool_index,server` |
|
||||
| `minio_system_drive_online_count` | `gauge` | Count of online drives | `pool_index,server` |
|
||||
| `minio_system_drive_count` | `gauge` | Count of all drives | `pool_index,server` |
|
||||
| `minio_system_drive_healing` | `gauge` | Is it healing? | `drive,set_index,drive_index,pool_index,server` |
|
||||
| `minio_system_drive_online` | `gauge` | Is it online? | `drive,set_index,drive_index,pool_index,server` |
|
||||
| `minio_system_drive_health` | `gauge` | Drive health (0 = offline, 1 = healthy, 2 = healing) | `drive,set_index,drive_index,pool_index,server` |
|
||||
| `minio_system_drive_reads_per_sec` | `gauge` | Reads per second on a drive | `drive,set_index,drive_index,pool_index,server` |
|
||||
| `minio_system_drive_reads_kb_per_sec` | `gauge` | Kilobytes read per second on a drive | `drive,set_index,drive_index,pool_index,server` |
|
||||
| `minio_system_drive_reads_await` | `gauge` | Average time for read requests served on a drive | `drive,set_index,drive_index,pool_index,server` |
|
||||
|
Loading…
Reference in New Issue
Block a user