mirror of
https://github.com/minio/minio.git
synced 2025-11-07 04:42:56 -05:00
Capture percentage of cpu load and memory used (#18596)
By default the cpu load is the cumulative of all cores. Capture the percentage load (load * 100 / cpu-count) Also capture the percentage memory used (used * 100 / total)
This commit is contained in:
@@ -91,7 +91,7 @@ func collectLocalMetrics(types madmin.MetricType, opts collectMetricsOpts) (m ma
|
||||
}
|
||||
cm, err := c.Times(false)
|
||||
if err != nil {
|
||||
m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (cputimes)", globalMinioAddr, err.Error()))
|
||||
m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (cpuTimes)", globalMinioAddr, err.Error()))
|
||||
} else {
|
||||
// not collecting per-cpu stats, so there will be only one element
|
||||
if len(cm) == 1 {
|
||||
@@ -100,6 +100,13 @@ func collectLocalMetrics(types madmin.MetricType, opts collectMetricsOpts) (m ma
|
||||
m.Errors = append(m.Errors, fmt.Sprintf("%s: Expected one CPU stat, got %d", globalMinioAddr, len(cm)))
|
||||
}
|
||||
}
|
||||
cpuCount, err := c.Counts(true)
|
||||
if err != nil {
|
||||
m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (cpuCount)", globalMinioAddr, err.Error()))
|
||||
} else {
|
||||
m.Aggregated.CPU.CPUCount = cpuCount
|
||||
}
|
||||
|
||||
loadStat, err := load.Avg()
|
||||
if err != nil {
|
||||
m.Errors = append(m.Errors, fmt.Sprintf("%s: %v (loadStat)", globalMinioAddr, err.Error()))
|
||||
|
||||
@@ -53,6 +53,7 @@ const (
|
||||
|
||||
// memory stats
|
||||
memUsed MetricName = "used"
|
||||
memUsedPerc MetricName = "used_perc"
|
||||
memFree MetricName = "free"
|
||||
memShared MetricName = "shared"
|
||||
memBuffers MetricName = "buffers"
|
||||
@@ -60,15 +61,18 @@ const (
|
||||
memAvailable MetricName = "available"
|
||||
|
||||
// cpu stats
|
||||
cpuUser MetricName = "user"
|
||||
cpuSystem MetricName = "system"
|
||||
cpuIOWait MetricName = "iowait"
|
||||
cpuIdle MetricName = "idle"
|
||||
cpuNice MetricName = "nice"
|
||||
cpuSteal MetricName = "steal"
|
||||
cpuLoad1 MetricName = "load1"
|
||||
cpuLoad5 MetricName = "load5"
|
||||
cpuLoad15 MetricName = "load15"
|
||||
cpuUser MetricName = "user"
|
||||
cpuSystem MetricName = "system"
|
||||
cpuIOWait MetricName = "iowait"
|
||||
cpuIdle MetricName = "idle"
|
||||
cpuNice MetricName = "nice"
|
||||
cpuSteal MetricName = "steal"
|
||||
cpuLoad1 MetricName = "load1"
|
||||
cpuLoad5 MetricName = "load5"
|
||||
cpuLoad15 MetricName = "load15"
|
||||
cpuLoad1Perc MetricName = "load1_perc"
|
||||
cpuLoad5Perc MetricName = "load5_perc"
|
||||
cpuLoad15Perc MetricName = "load15_perc"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -126,6 +130,7 @@ func init() {
|
||||
interfaceTxErrors: "Transmit errors in " + interval,
|
||||
total: "Total memory on the node",
|
||||
memUsed: "Used memory on the node",
|
||||
memUsedPerc: "Used memory percentage on the node",
|
||||
memFree: "Free memory on the node",
|
||||
memShared: "Shared memory on the node",
|
||||
memBuffers: "Buffers memory on the node",
|
||||
@@ -151,6 +156,9 @@ func init() {
|
||||
cpuLoad1: "CPU load average 1min",
|
||||
cpuLoad5: "CPU load average 5min",
|
||||
cpuLoad15: "CPU load average 15min",
|
||||
cpuLoad1Perc: "CPU load average 1min (perentage)",
|
||||
cpuLoad5Perc: "CPU load average 5min (percentage)",
|
||||
cpuLoad15Perc: "CPU load average 15min (percentage)",
|
||||
}
|
||||
resourceMetricsGroups = []*MetricsGroup{
|
||||
getResourceMetrics(),
|
||||
@@ -283,6 +291,8 @@ func collectLocalResourceMetrics() {
|
||||
stats := hm.Mem.Info
|
||||
updateResourceMetrics(memSubsystem, total, float64(stats.Total), labels, false)
|
||||
updateResourceMetrics(memSubsystem, memUsed, float64(stats.Used), labels, false)
|
||||
perc := math.Round(float64(stats.Used*100*100)/float64(stats.Total)) / 100
|
||||
updateResourceMetrics(memSubsystem, memUsedPerc, perc, labels, false)
|
||||
updateResourceMetrics(memSubsystem, memFree, float64(stats.Free), labels, false)
|
||||
updateResourceMetrics(memSubsystem, memShared, float64(stats.Shared), labels, false)
|
||||
updateResourceMetrics(memSubsystem, memBuffers, float64(stats.Buffers), labels, false)
|
||||
@@ -312,6 +322,14 @@ func collectLocalResourceMetrics() {
|
||||
updateResourceMetrics(cpuSubsystem, cpuLoad1, ls.Load1, labels, false)
|
||||
updateResourceMetrics(cpuSubsystem, cpuLoad5, ls.Load5, labels, false)
|
||||
updateResourceMetrics(cpuSubsystem, cpuLoad15, ls.Load15, labels, false)
|
||||
if hm.CPU.CPUCount > 0 {
|
||||
perc := math.Round(ls.Load1*100*100/float64(hm.CPU.CPUCount)) / 100
|
||||
updateResourceMetrics(cpuSubsystem, cpuLoad1Perc, perc, labels, false)
|
||||
perc = math.Round(ls.Load5*100*100/float64(hm.CPU.CPUCount)) / 100
|
||||
updateResourceMetrics(cpuSubsystem, cpuLoad5Perc, perc, labels, false)
|
||||
perc = math.Round(ls.Load15*100*100/float64(hm.CPU.CPUCount)) / 100
|
||||
updateResourceMetrics(cpuSubsystem, cpuLoad15Perc, perc, labels, false)
|
||||
}
|
||||
}
|
||||
}
|
||||
break // only one host expected
|
||||
|
||||
Reference in New Issue
Block a user