mirror of https://github.com/minio/minio.git
Capture percentage of cpu load and memory used (#18596)
By default the cpu load is the cumulative of all cores. Capture the percentage load (load * 100 / cpu-count) Also capture the percentage memory used (used * 100 / total)
This commit is contained in:
parent
5cc2c62c66
commit
7350a29fec
|
@ -91,7 +91,7 @@ func collectLocalMetrics(types madmin.MetricType, opts collectMetricsOpts) (m ma
|
||||||
}
|
}
|
||||||
cm, err := c.Times(false)
|
cm, err := c.Times(false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (cputimes)", globalMinioAddr, err.Error()))
|
m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (cpuTimes)", globalMinioAddr, err.Error()))
|
||||||
} else {
|
} else {
|
||||||
// not collecting per-cpu stats, so there will be only one element
|
// not collecting per-cpu stats, so there will be only one element
|
||||||
if len(cm) == 1 {
|
if len(cm) == 1 {
|
||||||
|
@ -100,6 +100,13 @@ func collectLocalMetrics(types madmin.MetricType, opts collectMetricsOpts) (m ma
|
||||||
m.Errors = append(m.Errors, fmt.Sprintf("%s: Expected one CPU stat, got %d", globalMinioAddr, len(cm)))
|
m.Errors = append(m.Errors, fmt.Sprintf("%s: Expected one CPU stat, got %d", globalMinioAddr, len(cm)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
cpuCount, err := c.Counts(true)
|
||||||
|
if err != nil {
|
||||||
|
m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (cpuCount)", globalMinioAddr, err.Error()))
|
||||||
|
} else {
|
||||||
|
m.Aggregated.CPU.CPUCount = cpuCount
|
||||||
|
}
|
||||||
|
|
||||||
loadStat, err := load.Avg()
|
loadStat, err := load.Avg()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (loadStat)", globalMinioAddr, err.Error()))
|
m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (loadStat)", globalMinioAddr, err.Error()))
|
||||||
|
|
|
@ -53,6 +53,7 @@ const (
|
||||||
|
|
||||||
// memory stats
|
// memory stats
|
||||||
memUsed MetricName = "used"
|
memUsed MetricName = "used"
|
||||||
|
memUsedPerc MetricName = "used_perc"
|
||||||
memFree MetricName = "free"
|
memFree MetricName = "free"
|
||||||
memShared MetricName = "shared"
|
memShared MetricName = "shared"
|
||||||
memBuffers MetricName = "buffers"
|
memBuffers MetricName = "buffers"
|
||||||
|
@ -60,15 +61,18 @@ const (
|
||||||
memAvailable MetricName = "available"
|
memAvailable MetricName = "available"
|
||||||
|
|
||||||
// cpu stats
|
// cpu stats
|
||||||
cpuUser MetricName = "user"
|
cpuUser MetricName = "user"
|
||||||
cpuSystem MetricName = "system"
|
cpuSystem MetricName = "system"
|
||||||
cpuIOWait MetricName = "iowait"
|
cpuIOWait MetricName = "iowait"
|
||||||
cpuIdle MetricName = "idle"
|
cpuIdle MetricName = "idle"
|
||||||
cpuNice MetricName = "nice"
|
cpuNice MetricName = "nice"
|
||||||
cpuSteal MetricName = "steal"
|
cpuSteal MetricName = "steal"
|
||||||
cpuLoad1 MetricName = "load1"
|
cpuLoad1 MetricName = "load1"
|
||||||
cpuLoad5 MetricName = "load5"
|
cpuLoad5 MetricName = "load5"
|
||||||
cpuLoad15 MetricName = "load15"
|
cpuLoad15 MetricName = "load15"
|
||||||
|
cpuLoad1Perc MetricName = "load1_perc"
|
||||||
|
cpuLoad5Perc MetricName = "load5_perc"
|
||||||
|
cpuLoad15Perc MetricName = "load15_perc"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -126,6 +130,7 @@ func init() {
|
||||||
interfaceTxErrors: "Transmit errors in " + interval,
|
interfaceTxErrors: "Transmit errors in " + interval,
|
||||||
total: "Total memory on the node",
|
total: "Total memory on the node",
|
||||||
memUsed: "Used memory on the node",
|
memUsed: "Used memory on the node",
|
||||||
|
memUsedPerc: "Used memory percentage on the node",
|
||||||
memFree: "Free memory on the node",
|
memFree: "Free memory on the node",
|
||||||
memShared: "Shared memory on the node",
|
memShared: "Shared memory on the node",
|
||||||
memBuffers: "Buffers memory on the node",
|
memBuffers: "Buffers memory on the node",
|
||||||
|
@ -151,6 +156,9 @@ func init() {
|
||||||
cpuLoad1: "CPU load average 1min",
|
cpuLoad1: "CPU load average 1min",
|
||||||
cpuLoad5: "CPU load average 5min",
|
cpuLoad5: "CPU load average 5min",
|
||||||
cpuLoad15: "CPU load average 15min",
|
cpuLoad15: "CPU load average 15min",
|
||||||
|
cpuLoad1Perc: "CPU load average 1min (perentage)",
|
||||||
|
cpuLoad5Perc: "CPU load average 5min (percentage)",
|
||||||
|
cpuLoad15Perc: "CPU load average 15min (percentage)",
|
||||||
}
|
}
|
||||||
resourceMetricsGroups = []*MetricsGroup{
|
resourceMetricsGroups = []*MetricsGroup{
|
||||||
getResourceMetrics(),
|
getResourceMetrics(),
|
||||||
|
@ -283,6 +291,8 @@ func collectLocalResourceMetrics() {
|
||||||
stats := hm.Mem.Info
|
stats := hm.Mem.Info
|
||||||
updateResourceMetrics(memSubsystem, total, float64(stats.Total), labels, false)
|
updateResourceMetrics(memSubsystem, total, float64(stats.Total), labels, false)
|
||||||
updateResourceMetrics(memSubsystem, memUsed, float64(stats.Used), labels, false)
|
updateResourceMetrics(memSubsystem, memUsed, float64(stats.Used), labels, false)
|
||||||
|
perc := math.Round(float64(stats.Used*100*100)/float64(stats.Total)) / 100
|
||||||
|
updateResourceMetrics(memSubsystem, memUsedPerc, perc, labels, false)
|
||||||
updateResourceMetrics(memSubsystem, memFree, float64(stats.Free), labels, false)
|
updateResourceMetrics(memSubsystem, memFree, float64(stats.Free), labels, false)
|
||||||
updateResourceMetrics(memSubsystem, memShared, float64(stats.Shared), labels, false)
|
updateResourceMetrics(memSubsystem, memShared, float64(stats.Shared), labels, false)
|
||||||
updateResourceMetrics(memSubsystem, memBuffers, float64(stats.Buffers), labels, false)
|
updateResourceMetrics(memSubsystem, memBuffers, float64(stats.Buffers), labels, false)
|
||||||
|
@ -312,6 +322,14 @@ func collectLocalResourceMetrics() {
|
||||||
updateResourceMetrics(cpuSubsystem, cpuLoad1, ls.Load1, labels, false)
|
updateResourceMetrics(cpuSubsystem, cpuLoad1, ls.Load1, labels, false)
|
||||||
updateResourceMetrics(cpuSubsystem, cpuLoad5, ls.Load5, labels, false)
|
updateResourceMetrics(cpuSubsystem, cpuLoad5, ls.Load5, labels, false)
|
||||||
updateResourceMetrics(cpuSubsystem, cpuLoad15, ls.Load15, labels, false)
|
updateResourceMetrics(cpuSubsystem, cpuLoad15, ls.Load15, labels, false)
|
||||||
|
if hm.CPU.CPUCount > 0 {
|
||||||
|
perc := math.Round(ls.Load1*100*100/float64(hm.CPU.CPUCount)) / 100
|
||||||
|
updateResourceMetrics(cpuSubsystem, cpuLoad1Perc, perc, labels, false)
|
||||||
|
perc = math.Round(ls.Load5*100*100/float64(hm.CPU.CPUCount)) / 100
|
||||||
|
updateResourceMetrics(cpuSubsystem, cpuLoad5Perc, perc, labels, false)
|
||||||
|
perc = math.Round(ls.Load15*100*100/float64(hm.CPU.CPUCount)) / 100
|
||||||
|
updateResourceMetrics(cpuSubsystem, cpuLoad15Perc, perc, labels, false)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break // only one host expected
|
break // only one host expected
|
||||||
|
|
|
@ -345,58 +345,70 @@ For deployments behind a load balancer, use the load balancer hostname instead o
|
||||||
|
|
||||||
## CPU Metrics
|
## CPU Metrics
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
|:--------------------------------|:------------------------------|
|
|:-------------------------------------|:-------------------------------------------|
|
||||||
| `minio_node_cpu_avg_user` | CPU user time. |
|
| `minio_node_cpu_avg_user` | CPU user time. |
|
||||||
| `minio_node_cpu_avg_user_avg` | CPU user time (avg). |
|
| `minio_node_cpu_avg_user_avg` | CPU user time (avg). |
|
||||||
| `minio_node_cpu_avg_user_max` | CPU user time (max). |
|
| `minio_node_cpu_avg_user_max` | CPU user time (max). |
|
||||||
| `minio_node_cpu_avg_system` | CPU system time. |
|
| `minio_node_cpu_avg_system` | CPU system time. |
|
||||||
| `minio_node_cpu_avg_system_avg` | CPU system time (avg). |
|
| `minio_node_cpu_avg_system_avg` | CPU system time (avg). |
|
||||||
| `minio_node_cpu_avg_system_max` | CPU system time (max). |
|
| `minio_node_cpu_avg_system_max` | CPU system time (max). |
|
||||||
| `minio_node_cpu_avg_idle` | CPU idle time. |
|
| `minio_node_cpu_avg_idle` | CPU idle time. |
|
||||||
| `minio_node_cpu_avg_idle_avg` | CPU idle time (avg). |
|
| `minio_node_cpu_avg_idle_avg` | CPU idle time (avg). |
|
||||||
| `minio_node_cpu_avg_idle_max` | CPU idle time (max). |
|
| `minio_node_cpu_avg_idle_max` | CPU idle time (max). |
|
||||||
| `minio_node_cpu_avg_iowait` | CPU ioWait time. |
|
| `minio_node_cpu_avg_iowait` | CPU ioWait time. |
|
||||||
| `minio_node_cpu_avg_iowait_avg` | CPU ioWait time (avg). |
|
| `minio_node_cpu_avg_iowait_avg` | CPU ioWait time (avg). |
|
||||||
| `minio_node_cpu_avg_iowait_max` | CPU ioWait time (max). |
|
| `minio_node_cpu_avg_iowait_max` | CPU ioWait time (max). |
|
||||||
| `minio_node_cpu_avg_nice` | CPU nice time. |
|
| `minio_node_cpu_avg_nice` | CPU nice time. |
|
||||||
| `minio_node_cpu_avg_nice_avg` | CPU nice time (avg). |
|
| `minio_node_cpu_avg_nice_avg` | CPU nice time (avg). |
|
||||||
| `minio_node_cpu_avg_nice_max` | CPU nice time (max). |
|
| `minio_node_cpu_avg_nice_max` | CPU nice time (max). |
|
||||||
| `minio_node_cpu_avg_steal` | CPU steam time. |
|
| `minio_node_cpu_avg_steal` | CPU steam time. |
|
||||||
| `minio_node_cpu_avg_steal_avg` | CPU steam time (avg). |
|
| `minio_node_cpu_avg_steal_avg` | CPU steam time (avg). |
|
||||||
| `minio_node_cpu_avg_steal_max` | CPU steam time (max). |
|
| `minio_node_cpu_avg_steal_max` | CPU steam time (max). |
|
||||||
| `minio_node_cpu_avg_load1` | CPU load average 1min. |
|
| `minio_node_cpu_avg_load1` | CPU load average 1min. |
|
||||||
| `minio_node_cpu_avg_load1_avg` | CPU load average 1min (avg). |
|
| `minio_node_cpu_avg_load1_avg` | CPU load average 1min (avg). |
|
||||||
| `minio_node_cpu_avg_load1_max` | CPU load average 1min (max). |
|
| `minio_node_cpu_avg_load1_max` | CPU load average 1min (max). |
|
||||||
| `minio_node_cpu_avg_load5` | CPU load average 5min. |
|
| `minio_node_cpu_avg_load1_perc` | CPU load average 1min (percentage). |
|
||||||
| `minio_node_cpu_avg_load5_avg` | CPU load average 5min (avg). |
|
| `minio_node_cpu_avg_load1_perc_avg` | CPU load average 1min (percentage) (avg). |
|
||||||
| `minio_node_cpu_avg_load5_max` | CPU load average 5min (max). |
|
| `minio_node_cpu_avg_load1_perc_max` | CPU load average 1min (percentage) (max). |
|
||||||
| `minio_node_cpu_avg_load15` | CPU load average 15min. |
|
| `minio_node_cpu_avg_load5` | CPU load average 5min. |
|
||||||
| `minio_node_cpu_avg_load15_avg` | CPU load average 15min (avg). |
|
| `minio_node_cpu_avg_load5_avg` | CPU load average 5min (avg). |
|
||||||
| `minio_node_cpu_avg_load15_max` | CPU load average 15min (max). |
|
| `minio_node_cpu_avg_load5_max` | CPU load average 5min (max). |
|
||||||
|
| `minio_node_cpu_avg_load5_perc` | CPU load average 5min (percentage). |
|
||||||
|
| `minio_node_cpu_avg_load5_perc_avg` | CPU load average 5min (percentage) (avg). |
|
||||||
|
| `minio_node_cpu_avg_load5_perc_max` | CPU load average 5min (percentage) (max). |
|
||||||
|
| `minio_node_cpu_avg_load15` | CPU load average 15min. |
|
||||||
|
| `minio_node_cpu_avg_load15_avg` | CPU load average 15min (avg). |
|
||||||
|
| `minio_node_cpu_avg_load15_max` | CPU load average 15min (max). |
|
||||||
|
| `minio_node_cpu_avg_load15_perc` | CPU load average 15min (percentage). |
|
||||||
|
| `minio_node_cpu_avg_load15_perc_avg` | CPU load average 15min (percentage) (avg). |
|
||||||
|
| `minio_node_cpu_avg_load15_perc_max` | CPU load average 15min (percentage) (max). |
|
||||||
|
|
||||||
## Memory Metrics
|
## Memory Metrics
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
|:-------------------------------|:------------------------------------|
|
|:-------------------------------|:------------------------------------------|
|
||||||
| `minio_node_mem_available` | Available memory on the node. |
|
| `minio_node_mem_available` | Available memory on the node. |
|
||||||
| `minio_node_mem_available_avg` | Available memory on the node (avg). |
|
| `minio_node_mem_available_avg` | Available memory on the node (avg). |
|
||||||
| `minio_node_mem_available_max` | Available memory on the node (max). |
|
| `minio_node_mem_available_max` | Available memory on the node (max). |
|
||||||
| `minio_node_mem_buffers` | Buffers memory on the node. |
|
| `minio_node_mem_buffers` | Buffers memory on the node. |
|
||||||
| `minio_node_mem_buffers_avg` | Buffers memory on the node (avg). |
|
| `minio_node_mem_buffers_avg` | Buffers memory on the node (avg). |
|
||||||
| `minio_node_mem_buffers_max` | Buffers memory on the node (max). |
|
| `minio_node_mem_buffers_max` | Buffers memory on the node (max). |
|
||||||
| `minio_node_mem_cache` | Cache memory on the node. |
|
| `minio_node_mem_cache` | Cache memory on the node. |
|
||||||
| `minio_node_mem_cache_avg` | Cache memory on the node (avg). |
|
| `minio_node_mem_cache_avg` | Cache memory on the node (avg). |
|
||||||
| `minio_node_mem_cache_max` | Cache memory on the node (max). |
|
| `minio_node_mem_cache_max` | Cache memory on the node (max). |
|
||||||
| `minio_node_mem_free` | Free memory on the node. |
|
| `minio_node_mem_free` | Free memory on the node. |
|
||||||
| `minio_node_mem_free_avg` | Free memory on the node (avg). |
|
| `minio_node_mem_free_avg` | Free memory on the node (avg). |
|
||||||
| `minio_node_mem_free_max` | Free memory on the node (max). |
|
| `minio_node_mem_free_max` | Free memory on the node (max). |
|
||||||
| `minio_node_mem_shared` | Shared memory on the node. |
|
| `minio_node_mem_shared` | Shared memory on the node. |
|
||||||
| `minio_node_mem_shared_avg` | Shared memory on the node (avg). |
|
| `minio_node_mem_shared_avg` | Shared memory on the node (avg). |
|
||||||
| `minio_node_mem_shared_max` | Shared memory on the node (max). |
|
| `minio_node_mem_shared_max` | Shared memory on the node (max). |
|
||||||
| `minio_node_mem_total` | Total memory on the node. |
|
| `minio_node_mem_total` | Total memory on the node. |
|
||||||
| `minio_node_mem_total_avg` | Total memory on the node (avg). |
|
| `minio_node_mem_total_avg` | Total memory on the node (avg). |
|
||||||
| `minio_node_mem_total_max` | Total memory on the node (max). |
|
| `minio_node_mem_total_max` | Total memory on the node (max). |
|
||||||
| `minio_node_mem_used` | Used memory on the node. |
|
| `minio_node_mem_used` | Used memory on the node. |
|
||||||
| `minio_node_mem_used_avg` | Used memory on the node (avg). |
|
| `minio_node_mem_used_avg` | Used memory on the node (avg). |
|
||||||
| `minio_node_mem_used_max` | Used memory on the node (max). |
|
| `minio_node_mem_used_max` | Used memory on the node (max). |
|
||||||
|
| `minio_node_mem_used_perc` | Used memory percentage on the node. |
|
||||||
|
| `minio_node_mem_used_perc_avg` | Used memory percentage on the node (avg). |
|
||||||
|
| `minio_node_mem_used_perc_max` | Used memory percentage on the node (max). |
|
||||||
|
|
2
go.mod
2
go.mod
|
@ -49,7 +49,7 @@ require (
|
||||||
github.com/minio/dperf v0.5.2
|
github.com/minio/dperf v0.5.2
|
||||||
github.com/minio/highwayhash v1.0.2
|
github.com/minio/highwayhash v1.0.2
|
||||||
github.com/minio/kes-go v0.2.0
|
github.com/minio/kes-go v0.2.0
|
||||||
github.com/minio/madmin-go/v3 v3.0.35-0.20231130082526-199918d0ff20
|
github.com/minio/madmin-go/v3 v3.0.36
|
||||||
github.com/minio/minio-go/v7 v7.0.65-0.20231122233251-1f7dd6b7e3e1
|
github.com/minio/minio-go/v7 v7.0.65-0.20231122233251-1f7dd6b7e3e1
|
||||||
github.com/minio/mux v1.9.0
|
github.com/minio/mux v1.9.0
|
||||||
github.com/minio/pkg/v2 v2.0.4
|
github.com/minio/pkg/v2 v2.0.4
|
||||||
|
|
4
go.sum
4
go.sum
|
@ -446,8 +446,8 @@ github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA
|
||||||
github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY=
|
github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY=
|
||||||
github.com/minio/kes-go v0.2.0 h1:HA33arq9s3MErbsj3PAXFVfFo4U4yw7lTKQ5kWFrpCA=
|
github.com/minio/kes-go v0.2.0 h1:HA33arq9s3MErbsj3PAXFVfFo4U4yw7lTKQ5kWFrpCA=
|
||||||
github.com/minio/kes-go v0.2.0/go.mod h1:VorHLaIYis9/MxAHAtXN4d8PUMNKhIxTIlvFt0hBOEo=
|
github.com/minio/kes-go v0.2.0/go.mod h1:VorHLaIYis9/MxAHAtXN4d8PUMNKhIxTIlvFt0hBOEo=
|
||||||
github.com/minio/madmin-go/v3 v3.0.35-0.20231130082526-199918d0ff20 h1:5kfjAypPN18QOOQaZjR3jfGzXyIwzLdKMS7d/cPY3Wc=
|
github.com/minio/madmin-go/v3 v3.0.36 h1:Ewu/Rt7WVSs9slWW+SZHRc5RPQdYAGIdNZnRr+gyN4k=
|
||||||
github.com/minio/madmin-go/v3 v3.0.35-0.20231130082526-199918d0ff20/go.mod h1:4QN2NftLSV7MdlT50dkrenOMmNVHluxTvlqJou3hte8=
|
github.com/minio/madmin-go/v3 v3.0.36/go.mod h1:4QN2NftLSV7MdlT50dkrenOMmNVHluxTvlqJou3hte8=
|
||||||
github.com/minio/mc v0.0.0-20231127112613-5e6ae2172e25 h1:8jT9Tz4opgrX6mnyFWW+TQ90AnrJqJ0mzeFXUWDHNGo=
|
github.com/minio/mc v0.0.0-20231127112613-5e6ae2172e25 h1:8jT9Tz4opgrX6mnyFWW+TQ90AnrJqJ0mzeFXUWDHNGo=
|
||||||
github.com/minio/mc v0.0.0-20231127112613-5e6ae2172e25/go.mod h1:8kat72LmpzZ2/xykDcq64tcRRJkkWo1Kd/Z5coC6t0w=
|
github.com/minio/mc v0.0.0-20231127112613-5e6ae2172e25/go.mod h1:8kat72LmpzZ2/xykDcq64tcRRJkkWo1Kd/Z5coC6t0w=
|
||||||
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
|
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
|
||||||
|
|
Loading…
Reference in New Issue