mirror of
https://github.com/minio/minio.git
synced 2024-12-24 06:05:55 -05:00
Add system CPU metrics to metrics-v3 (#19560)
endpoint: /minio/metrics/v3/system/cpu metrics: - minio_system_cpu_avg_idle - minio_system_cpu_avg_iowait - minio_system_cpu_load - minio_system_cpu_load_perc - minio_system_cpu_nice - minio_system_cpu_steal - minio_system_cpu_system - minio_system_cpu_user
This commit is contained in:
parent
9693c382a8
commit
f7b665347e
@ -162,14 +162,7 @@ func init() {
|
||||
resourceCollector = newMinioResourceCollector(resourceMetricsGroups)
|
||||
}
|
||||
|
||||
func updateResourceMetrics(subSys MetricSubsystem, name MetricName, val float64, labels map[string]string, isCumulative bool) {
|
||||
resourceMetricsMapMu.Lock()
|
||||
defer resourceMetricsMapMu.Unlock()
|
||||
subsysMetrics, found := resourceMetricsMap[subSys]
|
||||
if !found {
|
||||
subsysMetrics = ResourceMetrics{}
|
||||
}
|
||||
|
||||
func getResourceKey(name MetricName, labels map[string]string) string {
|
||||
// labels are used to uniquely identify a metric
|
||||
// e.g. reads_per_sec_{drive} inside the map
|
||||
sfx := ""
|
||||
@ -180,7 +173,18 @@ func updateResourceMetrics(subSys MetricSubsystem, name MetricName, val float64,
|
||||
sfx += v
|
||||
}
|
||||
|
||||
key := string(name) + "_" + sfx
|
||||
return string(name) + "_" + sfx
|
||||
}
|
||||
|
||||
func updateResourceMetrics(subSys MetricSubsystem, name MetricName, val float64, labels map[string]string, isCumulative bool) {
|
||||
resourceMetricsMapMu.Lock()
|
||||
defer resourceMetricsMapMu.Unlock()
|
||||
subsysMetrics, found := resourceMetricsMap[subSys]
|
||||
if !found {
|
||||
subsysMetrics = ResourceMetrics{}
|
||||
}
|
||||
|
||||
key := getResourceKey(name, labels)
|
||||
metric, found := subsysMetrics[key]
|
||||
if !found {
|
||||
metric = ResourceMetric{
|
||||
|
@ -35,6 +35,7 @@ type metricsCache struct {
|
||||
esetHealthResult *cachevalue.Cache[HealthResult]
|
||||
driveMetrics *cachevalue.Cache[storageMetrics]
|
||||
memoryMetrics *cachevalue.Cache[madmin.MemInfo]
|
||||
cpuMetrics *cachevalue.Cache[madmin.CPUMetrics]
|
||||
clusterDriveMetrics *cachevalue.Cache[storageMetrics]
|
||||
nodesUpDown *cachevalue.Cache[nodesOnline]
|
||||
}
|
||||
@ -45,6 +46,7 @@ func newMetricsCache() *metricsCache {
|
||||
esetHealthResult: newESetHealthResultCache(),
|
||||
driveMetrics: newDriveMetricsCache(),
|
||||
memoryMetrics: newMemoryMetricsCache(),
|
||||
cpuMetrics: newCPUMetricsCache(),
|
||||
clusterDriveMetrics: newClusterStorageInfoCache(),
|
||||
nodesUpDown: newNodesUpDownCache(),
|
||||
}
|
||||
@ -200,6 +202,31 @@ func newDriveMetricsCache() *cachevalue.Cache[storageMetrics] {
|
||||
loadDriveMetrics)
|
||||
}
|
||||
|
||||
func newCPUMetricsCache() *cachevalue.Cache[madmin.CPUMetrics] {
|
||||
loadCPUMetrics := func() (v madmin.CPUMetrics, err error) {
|
||||
var types madmin.MetricType = madmin.MetricsCPU
|
||||
|
||||
m := collectLocalMetrics(types, collectMetricsOpts{
|
||||
hosts: map[string]struct{}{
|
||||
globalLocalNodeName: {},
|
||||
},
|
||||
})
|
||||
|
||||
for _, hm := range m.ByHost {
|
||||
if hm.CPU != nil {
|
||||
v = *hm.CPU
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
return cachevalue.NewFromFunc(1*time.Minute,
|
||||
cachevalue.Opts{ReturnLastGood: true},
|
||||
loadCPUMetrics)
|
||||
}
|
||||
|
||||
func newMemoryMetricsCache() *cachevalue.Cache[madmin.MemInfo] {
|
||||
loadMemoryMetrics := func() (v madmin.MemInfo, err error) {
|
||||
var types madmin.MetricType = madmin.MetricsMem
|
||||
|
82
cmd/metrics-v3-system-cpu.go
Normal file
82
cmd/metrics-v3-system-cpu.go
Normal file
@ -0,0 +1,82 @@
|
||||
// Copyright (c) 2015-2024 MinIO, Inc.
|
||||
//
|
||||
// # This file is part of MinIO Object Storage stack
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math"
|
||||
)
|
||||
|
||||
const (
|
||||
sysCPUAvgIdle = "avg_idle"
|
||||
sysCPUAvgIOWait = "avg_iowait"
|
||||
sysCPULoad = "load"
|
||||
sysCPULoadPerc = "load_perc"
|
||||
sysCPUNice = "nice"
|
||||
sysCPUSteal = "steal"
|
||||
sysCPUSystem = "system"
|
||||
sysCPUUser = "user"
|
||||
)
|
||||
|
||||
var (
|
||||
sysCPUAvgIdleMD = NewGaugeMD(sysCPUAvgIdle, "Average CPU idle time")
|
||||
sysCPUAvgIOWaitMD = NewGaugeMD(sysCPUAvgIOWait, "Average CPU IOWait time")
|
||||
sysCPULoadMD = NewGaugeMD(sysCPULoad, "CPU load average 1min")
|
||||
sysCPULoadPercMD = NewGaugeMD(sysCPULoadPerc, "CPU load average 1min (percentage)")
|
||||
sysCPUNiceMD = NewGaugeMD(sysCPUNice, "CPU nice time")
|
||||
sysCPUStealMD = NewGaugeMD(sysCPUSteal, "CPU steal time")
|
||||
sysCPUSystemMD = NewGaugeMD(sysCPUSystem, "CPU system time")
|
||||
sysCPUUserMD = NewGaugeMD(sysCPUUser, "CPU user time")
|
||||
)
|
||||
|
||||
// loadCPUMetrics - `MetricsLoaderFn` for system CPU metrics.
|
||||
func loadCPUMetrics(ctx context.Context, m MetricValues, c *metricsCache) error {
|
||||
cpuMetrics, _ := c.cpuMetrics.Get()
|
||||
|
||||
if cpuMetrics.LoadStat != nil {
|
||||
m.Set(sysCPULoad, cpuMetrics.LoadStat.Load1)
|
||||
perc := cpuMetrics.LoadStat.Load1 * 100 / float64(cpuMetrics.CPUCount)
|
||||
m.Set(sysCPULoadPerc, math.Round(perc*100)/100)
|
||||
}
|
||||
|
||||
ts := cpuMetrics.TimesStat
|
||||
tot := ts.User + ts.System + ts.Idle + ts.Iowait + ts.Nice + ts.Steal
|
||||
cpuUserVal := math.Round(ts.User/tot*100*100) / 100
|
||||
m.Set(sysCPUUser, cpuUserVal)
|
||||
cpuSystemVal := math.Round(ts.System/tot*100*100) / 100
|
||||
m.Set(sysCPUSystem, cpuSystemVal)
|
||||
cpuNiceVal := math.Round(ts.Nice/tot*100*100) / 100
|
||||
m.Set(sysCPUNice, cpuNiceVal)
|
||||
cpuStealVal := math.Round(ts.Steal/tot*100*100) / 100
|
||||
m.Set(sysCPUSteal, cpuStealVal)
|
||||
|
||||
// metrics-resource.go runs a job to collect resource metrics including their Avg values and
|
||||
// stores them in resourceMetricsMap. We can use it to get the Avg values of CPU idle and IOWait.
|
||||
cpuResourceMetrics, found := resourceMetricsMap[cpuSubsystem]
|
||||
if found {
|
||||
if cpuIdleMetric, ok := cpuResourceMetrics[getResourceKey(cpuIdle, nil)]; ok {
|
||||
avgVal := math.Round(cpuIdleMetric.Avg*100) / 100
|
||||
m.Set(sysCPUAvgIdle, avgVal)
|
||||
}
|
||||
if cpuIOWaitMetric, ok := cpuResourceMetrics[getResourceKey(cpuIOWait, nil)]; ok {
|
||||
avgVal := math.Round(cpuIOWaitMetric.Avg*100) / 100
|
||||
m.Set(sysCPUAvgIOWait, avgVal)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
@ -36,6 +36,7 @@ const (
|
||||
systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode"
|
||||
systemDriveCollectorPath collectorPath = "/system/drive"
|
||||
systemMemoryCollectorPath collectorPath = "/system/memory"
|
||||
systemCPUCollectorPath collectorPath = "/system/cpu"
|
||||
systemProcessCollectorPath collectorPath = "/system/process"
|
||||
systemGoCollectorPath collectorPath = "/system/go"
|
||||
|
||||
@ -128,6 +129,20 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
||||
loadMemoryMetrics,
|
||||
)
|
||||
|
||||
systemCPUMG := NewMetricsGroup(systemCPUCollectorPath,
|
||||
[]MetricDescriptor{
|
||||
sysCPUAvgIdleMD,
|
||||
sysCPUAvgIOWaitMD,
|
||||
sysCPULoadMD,
|
||||
sysCPULoadPercMD,
|
||||
sysCPUNiceMD,
|
||||
sysCPUStealMD,
|
||||
sysCPUSystemMD,
|
||||
sysCPUUserMD,
|
||||
},
|
||||
loadCPUMetrics,
|
||||
)
|
||||
|
||||
systemDriveMG := NewMetricsGroup(systemDriveCollectorPath,
|
||||
[]MetricDescriptor{
|
||||
driveUsedBytesMD,
|
||||
@ -235,6 +250,7 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
||||
systemNetworkInternodeMG,
|
||||
systemDriveMG,
|
||||
systemMemoryMG,
|
||||
systemCPUMG,
|
||||
|
||||
clusterHealthMG,
|
||||
clusterUsageObjectsMG,
|
||||
|
@ -139,6 +139,18 @@ The standard metrics groups for ProcessCollector and GoCollector are not shown b
|
||||
| `minio_system_memory_shared` | `gauge` | Shared memory on the node | `server` |
|
||||
| `minio_system_memory_available` | `gauge` | Available memory on the node | `server` |
|
||||
|
||||
### `/system/cpu`
|
||||
|
||||
| Name | Type | Help | Labels |
|
||||
|-------------------------------|---------|------------------------------------|----------|
|
||||
| `minio_system_cpu_avg_idle` | `gauge` | Average CPU idle time | `server` |
|
||||
| `minio_system_cpu_avg_iowait` | `gauge` | Average CPU IOWait time | `server` |
|
||||
| `minio_system_cpu_load` | `gauge` | CPU load average 1min | `server` |
|
||||
| `minio_system_cpu_load_perc` | `gauge` | CPU load average 1min (percentage) | `server` |
|
||||
| `minio_system_cpu_nice` | `gauge` | CPU nice time | `server` |
|
||||
| `minio_system_cpu_steal` | `gauge` | CPU steal time | `server` |
|
||||
| `minio_system_cpu_system` | `gauge` | CPU system time | `server` |
|
||||
| `minio_system_cpu_user` | `gauge` | CPU user time | `server` |
|
||||
|
||||
### `/system/network/internode`
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user