Capture percentage of cpu load and memory used (#18596)

By default the cpu load is the cumulative of all cores. Capture the
percentage load (load * 100 / cpu-count)

Also capture the percentage memory used (used * 100 / total)
This commit is contained in:
Shireesh Anjal 2023-12-07 02:49:59 +05:30 committed by GitHub
parent 5cc2c62c66
commit 7350a29fec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 102 additions and 65 deletions

View File

@ -91,7 +91,7 @@ func collectLocalMetrics(types madmin.MetricType, opts collectMetricsOpts) (m ma
} }
cm, err := c.Times(false) cm, err := c.Times(false)
if err != nil { if err != nil {
m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (cputimes)", globalMinioAddr, err.Error())) m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (cpuTimes)", globalMinioAddr, err.Error()))
} else { } else {
// not collecting per-cpu stats, so there will be only one element // not collecting per-cpu stats, so there will be only one element
if len(cm) == 1 { if len(cm) == 1 {
@ -100,6 +100,13 @@ func collectLocalMetrics(types madmin.MetricType, opts collectMetricsOpts) (m ma
m.Errors = append(m.Errors, fmt.Sprintf("%s: Expected one CPU stat, got %d", globalMinioAddr, len(cm))) m.Errors = append(m.Errors, fmt.Sprintf("%s: Expected one CPU stat, got %d", globalMinioAddr, len(cm)))
} }
} }
cpuCount, err := c.Counts(true)
if err != nil {
m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (cpuCount)", globalMinioAddr, err.Error()))
} else {
m.Aggregated.CPU.CPUCount = cpuCount
}
loadStat, err := load.Avg() loadStat, err := load.Avg()
if err != nil { if err != nil {
m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (loadStat)", globalMinioAddr, err.Error())) m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (loadStat)", globalMinioAddr, err.Error()))

View File

@ -53,6 +53,7 @@ const (
// memory stats // memory stats
memUsed MetricName = "used" memUsed MetricName = "used"
memUsedPerc MetricName = "used_perc"
memFree MetricName = "free" memFree MetricName = "free"
memShared MetricName = "shared" memShared MetricName = "shared"
memBuffers MetricName = "buffers" memBuffers MetricName = "buffers"
@ -60,15 +61,18 @@ const (
memAvailable MetricName = "available" memAvailable MetricName = "available"
// cpu stats // cpu stats
cpuUser MetricName = "user" cpuUser MetricName = "user"
cpuSystem MetricName = "system" cpuSystem MetricName = "system"
cpuIOWait MetricName = "iowait" cpuIOWait MetricName = "iowait"
cpuIdle MetricName = "idle" cpuIdle MetricName = "idle"
cpuNice MetricName = "nice" cpuNice MetricName = "nice"
cpuSteal MetricName = "steal" cpuSteal MetricName = "steal"
cpuLoad1 MetricName = "load1" cpuLoad1 MetricName = "load1"
cpuLoad5 MetricName = "load5" cpuLoad5 MetricName = "load5"
cpuLoad15 MetricName = "load15" cpuLoad15 MetricName = "load15"
cpuLoad1Perc MetricName = "load1_perc"
cpuLoad5Perc MetricName = "load5_perc"
cpuLoad15Perc MetricName = "load15_perc"
) )
var ( var (
@ -126,6 +130,7 @@ func init() {
interfaceTxErrors: "Transmit errors in " + interval, interfaceTxErrors: "Transmit errors in " + interval,
total: "Total memory on the node", total: "Total memory on the node",
memUsed: "Used memory on the node", memUsed: "Used memory on the node",
memUsedPerc: "Used memory percentage on the node",
memFree: "Free memory on the node", memFree: "Free memory on the node",
memShared: "Shared memory on the node", memShared: "Shared memory on the node",
memBuffers: "Buffers memory on the node", memBuffers: "Buffers memory on the node",
@ -151,6 +156,9 @@ func init() {
cpuLoad1: "CPU load average 1min", cpuLoad1: "CPU load average 1min",
cpuLoad5: "CPU load average 5min", cpuLoad5: "CPU load average 5min",
cpuLoad15: "CPU load average 15min", cpuLoad15: "CPU load average 15min",
cpuLoad1Perc: "CPU load average 1min (perentage)",
cpuLoad5Perc: "CPU load average 5min (percentage)",
cpuLoad15Perc: "CPU load average 15min (percentage)",
} }
resourceMetricsGroups = []*MetricsGroup{ resourceMetricsGroups = []*MetricsGroup{
getResourceMetrics(), getResourceMetrics(),
@ -283,6 +291,8 @@ func collectLocalResourceMetrics() {
stats := hm.Mem.Info stats := hm.Mem.Info
updateResourceMetrics(memSubsystem, total, float64(stats.Total), labels, false) updateResourceMetrics(memSubsystem, total, float64(stats.Total), labels, false)
updateResourceMetrics(memSubsystem, memUsed, float64(stats.Used), labels, false) updateResourceMetrics(memSubsystem, memUsed, float64(stats.Used), labels, false)
perc := math.Round(float64(stats.Used*100*100)/float64(stats.Total)) / 100
updateResourceMetrics(memSubsystem, memUsedPerc, perc, labels, false)
updateResourceMetrics(memSubsystem, memFree, float64(stats.Free), labels, false) updateResourceMetrics(memSubsystem, memFree, float64(stats.Free), labels, false)
updateResourceMetrics(memSubsystem, memShared, float64(stats.Shared), labels, false) updateResourceMetrics(memSubsystem, memShared, float64(stats.Shared), labels, false)
updateResourceMetrics(memSubsystem, memBuffers, float64(stats.Buffers), labels, false) updateResourceMetrics(memSubsystem, memBuffers, float64(stats.Buffers), labels, false)
@ -312,6 +322,14 @@ func collectLocalResourceMetrics() {
updateResourceMetrics(cpuSubsystem, cpuLoad1, ls.Load1, labels, false) updateResourceMetrics(cpuSubsystem, cpuLoad1, ls.Load1, labels, false)
updateResourceMetrics(cpuSubsystem, cpuLoad5, ls.Load5, labels, false) updateResourceMetrics(cpuSubsystem, cpuLoad5, ls.Load5, labels, false)
updateResourceMetrics(cpuSubsystem, cpuLoad15, ls.Load15, labels, false) updateResourceMetrics(cpuSubsystem, cpuLoad15, ls.Load15, labels, false)
if hm.CPU.CPUCount > 0 {
perc := math.Round(ls.Load1*100*100/float64(hm.CPU.CPUCount)) / 100
updateResourceMetrics(cpuSubsystem, cpuLoad1Perc, perc, labels, false)
perc = math.Round(ls.Load5*100*100/float64(hm.CPU.CPUCount)) / 100
updateResourceMetrics(cpuSubsystem, cpuLoad5Perc, perc, labels, false)
perc = math.Round(ls.Load15*100*100/float64(hm.CPU.CPUCount)) / 100
updateResourceMetrics(cpuSubsystem, cpuLoad15Perc, perc, labels, false)
}
} }
} }
break // only one host expected break // only one host expected

View File

@ -345,58 +345,70 @@ For deployments behind a load balancer, use the load balancer hostname instead o
## CPU Metrics ## CPU Metrics
| Name | Description | | Name | Description |
|:--------------------------------|:------------------------------| |:-------------------------------------|:-------------------------------------------|
| `minio_node_cpu_avg_user` | CPU user time. | | `minio_node_cpu_avg_user` | CPU user time. |
| `minio_node_cpu_avg_user_avg` | CPU user time (avg). | | `minio_node_cpu_avg_user_avg` | CPU user time (avg). |
| `minio_node_cpu_avg_user_max` | CPU user time (max). | | `minio_node_cpu_avg_user_max` | CPU user time (max). |
| `minio_node_cpu_avg_system` | CPU system time. | | `minio_node_cpu_avg_system` | CPU system time. |
| `minio_node_cpu_avg_system_avg` | CPU system time (avg). | | `minio_node_cpu_avg_system_avg` | CPU system time (avg). |
| `minio_node_cpu_avg_system_max` | CPU system time (max). | | `minio_node_cpu_avg_system_max` | CPU system time (max). |
| `minio_node_cpu_avg_idle` | CPU idle time. | | `minio_node_cpu_avg_idle` | CPU idle time. |
| `minio_node_cpu_avg_idle_avg` | CPU idle time (avg). | | `minio_node_cpu_avg_idle_avg` | CPU idle time (avg). |
| `minio_node_cpu_avg_idle_max` | CPU idle time (max). | | `minio_node_cpu_avg_idle_max` | CPU idle time (max). |
| `minio_node_cpu_avg_iowait` | CPU ioWait time. | | `minio_node_cpu_avg_iowait` | CPU ioWait time. |
| `minio_node_cpu_avg_iowait_avg` | CPU ioWait time (avg). | | `minio_node_cpu_avg_iowait_avg` | CPU ioWait time (avg). |
| `minio_node_cpu_avg_iowait_max` | CPU ioWait time (max). | | `minio_node_cpu_avg_iowait_max` | CPU ioWait time (max). |
| `minio_node_cpu_avg_nice` | CPU nice time. | | `minio_node_cpu_avg_nice` | CPU nice time. |
| `minio_node_cpu_avg_nice_avg` | CPU nice time (avg). | | `minio_node_cpu_avg_nice_avg` | CPU nice time (avg). |
| `minio_node_cpu_avg_nice_max` | CPU nice time (max). | | `minio_node_cpu_avg_nice_max` | CPU nice time (max). |
| `minio_node_cpu_avg_steal` | CPU steam time. | | `minio_node_cpu_avg_steal` | CPU steam time. |
| `minio_node_cpu_avg_steal_avg` | CPU steam time (avg). | | `minio_node_cpu_avg_steal_avg` | CPU steam time (avg). |
| `minio_node_cpu_avg_steal_max` | CPU steam time (max). | | `minio_node_cpu_avg_steal_max` | CPU steam time (max). |
| `minio_node_cpu_avg_load1` | CPU load average 1min. | | `minio_node_cpu_avg_load1` | CPU load average 1min. |
| `minio_node_cpu_avg_load1_avg` | CPU load average 1min (avg). | | `minio_node_cpu_avg_load1_avg` | CPU load average 1min (avg). |
| `minio_node_cpu_avg_load1_max` | CPU load average 1min (max). | | `minio_node_cpu_avg_load1_max` | CPU load average 1min (max). |
| `minio_node_cpu_avg_load5` | CPU load average 5min. | | `minio_node_cpu_avg_load1_perc` | CPU load average 1min (percentage). |
| `minio_node_cpu_avg_load5_avg` | CPU load average 5min (avg). | | `minio_node_cpu_avg_load1_perc_avg` | CPU load average 1min (percentage) (avg). |
| `minio_node_cpu_avg_load5_max` | CPU load average 5min (max). | | `minio_node_cpu_avg_load1_perc_max` | CPU load average 1min (percentage) (max). |
| `minio_node_cpu_avg_load15` | CPU load average 15min. | | `minio_node_cpu_avg_load5` | CPU load average 5min. |
| `minio_node_cpu_avg_load15_avg` | CPU load average 15min (avg). | | `minio_node_cpu_avg_load5_avg` | CPU load average 5min (avg). |
| `minio_node_cpu_avg_load15_max` | CPU load average 15min (max). | | `minio_node_cpu_avg_load5_max` | CPU load average 5min (max). |
| `minio_node_cpu_avg_load5_perc` | CPU load average 5min (percentage). |
| `minio_node_cpu_avg_load5_perc_avg` | CPU load average 5min (percentage) (avg). |
| `minio_node_cpu_avg_load5_perc_max` | CPU load average 5min (percentage) (max). |
| `minio_node_cpu_avg_load15` | CPU load average 15min. |
| `minio_node_cpu_avg_load15_avg` | CPU load average 15min (avg). |
| `minio_node_cpu_avg_load15_max` | CPU load average 15min (max). |
| `minio_node_cpu_avg_load15_perc` | CPU load average 15min (percentage). |
| `minio_node_cpu_avg_load15_perc_avg` | CPU load average 15min (percentage) (avg). |
| `minio_node_cpu_avg_load15_perc_max` | CPU load average 15min (percentage) (max). |
## Memory Metrics ## Memory Metrics
| Name | Description | | Name | Description |
|:-------------------------------|:------------------------------------| |:-------------------------------|:------------------------------------------|
| `minio_node_mem_available` | Available memory on the node. | | `minio_node_mem_available` | Available memory on the node. |
| `minio_node_mem_available_avg` | Available memory on the node (avg). | | `minio_node_mem_available_avg` | Available memory on the node (avg). |
| `minio_node_mem_available_max` | Available memory on the node (max). | | `minio_node_mem_available_max` | Available memory on the node (max). |
| `minio_node_mem_buffers` | Buffers memory on the node. | | `minio_node_mem_buffers` | Buffers memory on the node. |
| `minio_node_mem_buffers_avg` | Buffers memory on the node (avg). | | `minio_node_mem_buffers_avg` | Buffers memory on the node (avg). |
| `minio_node_mem_buffers_max` | Buffers memory on the node (max). | | `minio_node_mem_buffers_max` | Buffers memory on the node (max). |
| `minio_node_mem_cache` | Cache memory on the node. | | `minio_node_mem_cache` | Cache memory on the node. |
| `minio_node_mem_cache_avg` | Cache memory on the node (avg). | | `minio_node_mem_cache_avg` | Cache memory on the node (avg). |
| `minio_node_mem_cache_max` | Cache memory on the node (max). | | `minio_node_mem_cache_max` | Cache memory on the node (max). |
| `minio_node_mem_free` | Free memory on the node. | | `minio_node_mem_free` | Free memory on the node. |
| `minio_node_mem_free_avg` | Free memory on the node (avg). | | `minio_node_mem_free_avg` | Free memory on the node (avg). |
| `minio_node_mem_free_max` | Free memory on the node (max). | | `minio_node_mem_free_max` | Free memory on the node (max). |
| `minio_node_mem_shared` | Shared memory on the node. | | `minio_node_mem_shared` | Shared memory on the node. |
| `minio_node_mem_shared_avg` | Shared memory on the node (avg). | | `minio_node_mem_shared_avg` | Shared memory on the node (avg). |
| `minio_node_mem_shared_max` | Shared memory on the node (max). | | `minio_node_mem_shared_max` | Shared memory on the node (max). |
| `minio_node_mem_total` | Total memory on the node. | | `minio_node_mem_total` | Total memory on the node. |
| `minio_node_mem_total_avg` | Total memory on the node (avg). | | `minio_node_mem_total_avg` | Total memory on the node (avg). |
| `minio_node_mem_total_max` | Total memory on the node (max). | | `minio_node_mem_total_max` | Total memory on the node (max). |
| `minio_node_mem_used` | Used memory on the node. | | `minio_node_mem_used` | Used memory on the node. |
| `minio_node_mem_used_avg` | Used memory on the node (avg). | | `minio_node_mem_used_avg` | Used memory on the node (avg). |
| `minio_node_mem_used_max` | Used memory on the node (max). | | `minio_node_mem_used_max` | Used memory on the node (max). |
| `minio_node_mem_used_perc` | Used memory percentage on the node. |
| `minio_node_mem_used_perc_avg` | Used memory percentage on the node (avg). |
| `minio_node_mem_used_perc_max` | Used memory percentage on the node (max). |

2
go.mod
View File

@ -49,7 +49,7 @@ require (
github.com/minio/dperf v0.5.2 github.com/minio/dperf v0.5.2
github.com/minio/highwayhash v1.0.2 github.com/minio/highwayhash v1.0.2
github.com/minio/kes-go v0.2.0 github.com/minio/kes-go v0.2.0
github.com/minio/madmin-go/v3 v3.0.35-0.20231130082526-199918d0ff20 github.com/minio/madmin-go/v3 v3.0.36
github.com/minio/minio-go/v7 v7.0.65-0.20231122233251-1f7dd6b7e3e1 github.com/minio/minio-go/v7 v7.0.65-0.20231122233251-1f7dd6b7e3e1
github.com/minio/mux v1.9.0 github.com/minio/mux v1.9.0
github.com/minio/pkg/v2 v2.0.4 github.com/minio/pkg/v2 v2.0.4

4
go.sum
View File

@ -446,8 +446,8 @@ github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA
github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY= github.com/minio/highwayhash v1.0.2/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY=
github.com/minio/kes-go v0.2.0 h1:HA33arq9s3MErbsj3PAXFVfFo4U4yw7lTKQ5kWFrpCA= github.com/minio/kes-go v0.2.0 h1:HA33arq9s3MErbsj3PAXFVfFo4U4yw7lTKQ5kWFrpCA=
github.com/minio/kes-go v0.2.0/go.mod h1:VorHLaIYis9/MxAHAtXN4d8PUMNKhIxTIlvFt0hBOEo= github.com/minio/kes-go v0.2.0/go.mod h1:VorHLaIYis9/MxAHAtXN4d8PUMNKhIxTIlvFt0hBOEo=
github.com/minio/madmin-go/v3 v3.0.35-0.20231130082526-199918d0ff20 h1:5kfjAypPN18QOOQaZjR3jfGzXyIwzLdKMS7d/cPY3Wc= github.com/minio/madmin-go/v3 v3.0.36 h1:Ewu/Rt7WVSs9slWW+SZHRc5RPQdYAGIdNZnRr+gyN4k=
github.com/minio/madmin-go/v3 v3.0.35-0.20231130082526-199918d0ff20/go.mod h1:4QN2NftLSV7MdlT50dkrenOMmNVHluxTvlqJou3hte8= github.com/minio/madmin-go/v3 v3.0.36/go.mod h1:4QN2NftLSV7MdlT50dkrenOMmNVHluxTvlqJou3hte8=
github.com/minio/mc v0.0.0-20231127112613-5e6ae2172e25 h1:8jT9Tz4opgrX6mnyFWW+TQ90AnrJqJ0mzeFXUWDHNGo= github.com/minio/mc v0.0.0-20231127112613-5e6ae2172e25 h1:8jT9Tz4opgrX6mnyFWW+TQ90AnrJqJ0mzeFXUWDHNGo=
github.com/minio/mc v0.0.0-20231127112613-5e6ae2172e25/go.mod h1:8kat72LmpzZ2/xykDcq64tcRRJkkWo1Kd/Z5coC6t0w= github.com/minio/mc v0.0.0-20231127112613-5e6ae2172e25/go.mod h1:8kat72LmpzZ2/xykDcq64tcRRJkkWo1Kd/Z5coC6t0w=
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=