mirror of
https://github.com/minio/minio.git
synced 2025-04-03 19:30:29 -04:00
Add system CPU metrics to metrics-v3 (#19560)
endpoint: /minio/metrics/v3/system/cpu metrics: - minio_system_cpu_avg_idle - minio_system_cpu_avg_iowait - minio_system_cpu_load - minio_system_cpu_load_perc - minio_system_cpu_nice - minio_system_cpu_steal - minio_system_cpu_system - minio_system_cpu_user
This commit is contained in:
parent
9693c382a8
commit
f7b665347e
@ -162,14 +162,7 @@ func init() {
|
|||||||
resourceCollector = newMinioResourceCollector(resourceMetricsGroups)
|
resourceCollector = newMinioResourceCollector(resourceMetricsGroups)
|
||||||
}
|
}
|
||||||
|
|
||||||
func updateResourceMetrics(subSys MetricSubsystem, name MetricName, val float64, labels map[string]string, isCumulative bool) {
|
func getResourceKey(name MetricName, labels map[string]string) string {
|
||||||
resourceMetricsMapMu.Lock()
|
|
||||||
defer resourceMetricsMapMu.Unlock()
|
|
||||||
subsysMetrics, found := resourceMetricsMap[subSys]
|
|
||||||
if !found {
|
|
||||||
subsysMetrics = ResourceMetrics{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// labels are used to uniquely identify a metric
|
// labels are used to uniquely identify a metric
|
||||||
// e.g. reads_per_sec_{drive} inside the map
|
// e.g. reads_per_sec_{drive} inside the map
|
||||||
sfx := ""
|
sfx := ""
|
||||||
@ -180,7 +173,18 @@ func updateResourceMetrics(subSys MetricSubsystem, name MetricName, val float64,
|
|||||||
sfx += v
|
sfx += v
|
||||||
}
|
}
|
||||||
|
|
||||||
key := string(name) + "_" + sfx
|
return string(name) + "_" + sfx
|
||||||
|
}
|
||||||
|
|
||||||
|
func updateResourceMetrics(subSys MetricSubsystem, name MetricName, val float64, labels map[string]string, isCumulative bool) {
|
||||||
|
resourceMetricsMapMu.Lock()
|
||||||
|
defer resourceMetricsMapMu.Unlock()
|
||||||
|
subsysMetrics, found := resourceMetricsMap[subSys]
|
||||||
|
if !found {
|
||||||
|
subsysMetrics = ResourceMetrics{}
|
||||||
|
}
|
||||||
|
|
||||||
|
key := getResourceKey(name, labels)
|
||||||
metric, found := subsysMetrics[key]
|
metric, found := subsysMetrics[key]
|
||||||
if !found {
|
if !found {
|
||||||
metric = ResourceMetric{
|
metric = ResourceMetric{
|
||||||
|
@ -35,6 +35,7 @@ type metricsCache struct {
|
|||||||
esetHealthResult *cachevalue.Cache[HealthResult]
|
esetHealthResult *cachevalue.Cache[HealthResult]
|
||||||
driveMetrics *cachevalue.Cache[storageMetrics]
|
driveMetrics *cachevalue.Cache[storageMetrics]
|
||||||
memoryMetrics *cachevalue.Cache[madmin.MemInfo]
|
memoryMetrics *cachevalue.Cache[madmin.MemInfo]
|
||||||
|
cpuMetrics *cachevalue.Cache[madmin.CPUMetrics]
|
||||||
clusterDriveMetrics *cachevalue.Cache[storageMetrics]
|
clusterDriveMetrics *cachevalue.Cache[storageMetrics]
|
||||||
nodesUpDown *cachevalue.Cache[nodesOnline]
|
nodesUpDown *cachevalue.Cache[nodesOnline]
|
||||||
}
|
}
|
||||||
@ -45,6 +46,7 @@ func newMetricsCache() *metricsCache {
|
|||||||
esetHealthResult: newESetHealthResultCache(),
|
esetHealthResult: newESetHealthResultCache(),
|
||||||
driveMetrics: newDriveMetricsCache(),
|
driveMetrics: newDriveMetricsCache(),
|
||||||
memoryMetrics: newMemoryMetricsCache(),
|
memoryMetrics: newMemoryMetricsCache(),
|
||||||
|
cpuMetrics: newCPUMetricsCache(),
|
||||||
clusterDriveMetrics: newClusterStorageInfoCache(),
|
clusterDriveMetrics: newClusterStorageInfoCache(),
|
||||||
nodesUpDown: newNodesUpDownCache(),
|
nodesUpDown: newNodesUpDownCache(),
|
||||||
}
|
}
|
||||||
@ -200,6 +202,31 @@ func newDriveMetricsCache() *cachevalue.Cache[storageMetrics] {
|
|||||||
loadDriveMetrics)
|
loadDriveMetrics)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func newCPUMetricsCache() *cachevalue.Cache[madmin.CPUMetrics] {
|
||||||
|
loadCPUMetrics := func() (v madmin.CPUMetrics, err error) {
|
||||||
|
var types madmin.MetricType = madmin.MetricsCPU
|
||||||
|
|
||||||
|
m := collectLocalMetrics(types, collectMetricsOpts{
|
||||||
|
hosts: map[string]struct{}{
|
||||||
|
globalLocalNodeName: {},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
for _, hm := range m.ByHost {
|
||||||
|
if hm.CPU != nil {
|
||||||
|
v = *hm.CPU
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
return cachevalue.NewFromFunc(1*time.Minute,
|
||||||
|
cachevalue.Opts{ReturnLastGood: true},
|
||||||
|
loadCPUMetrics)
|
||||||
|
}
|
||||||
|
|
||||||
func newMemoryMetricsCache() *cachevalue.Cache[madmin.MemInfo] {
|
func newMemoryMetricsCache() *cachevalue.Cache[madmin.MemInfo] {
|
||||||
loadMemoryMetrics := func() (v madmin.MemInfo, err error) {
|
loadMemoryMetrics := func() (v madmin.MemInfo, err error) {
|
||||||
var types madmin.MetricType = madmin.MetricsMem
|
var types madmin.MetricType = madmin.MetricsMem
|
||||||
|
82
cmd/metrics-v3-system-cpu.go
Normal file
82
cmd/metrics-v3-system-cpu.go
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
// Copyright (c) 2015-2024 MinIO, Inc.
|
||||||
|
//
|
||||||
|
// # This file is part of MinIO Object Storage stack
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Affero General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Affero General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"math"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
sysCPUAvgIdle = "avg_idle"
|
||||||
|
sysCPUAvgIOWait = "avg_iowait"
|
||||||
|
sysCPULoad = "load"
|
||||||
|
sysCPULoadPerc = "load_perc"
|
||||||
|
sysCPUNice = "nice"
|
||||||
|
sysCPUSteal = "steal"
|
||||||
|
sysCPUSystem = "system"
|
||||||
|
sysCPUUser = "user"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
sysCPUAvgIdleMD = NewGaugeMD(sysCPUAvgIdle, "Average CPU idle time")
|
||||||
|
sysCPUAvgIOWaitMD = NewGaugeMD(sysCPUAvgIOWait, "Average CPU IOWait time")
|
||||||
|
sysCPULoadMD = NewGaugeMD(sysCPULoad, "CPU load average 1min")
|
||||||
|
sysCPULoadPercMD = NewGaugeMD(sysCPULoadPerc, "CPU load average 1min (percentage)")
|
||||||
|
sysCPUNiceMD = NewGaugeMD(sysCPUNice, "CPU nice time")
|
||||||
|
sysCPUStealMD = NewGaugeMD(sysCPUSteal, "CPU steal time")
|
||||||
|
sysCPUSystemMD = NewGaugeMD(sysCPUSystem, "CPU system time")
|
||||||
|
sysCPUUserMD = NewGaugeMD(sysCPUUser, "CPU user time")
|
||||||
|
)
|
||||||
|
|
||||||
|
// loadCPUMetrics - `MetricsLoaderFn` for system CPU metrics.
|
||||||
|
func loadCPUMetrics(ctx context.Context, m MetricValues, c *metricsCache) error {
|
||||||
|
cpuMetrics, _ := c.cpuMetrics.Get()
|
||||||
|
|
||||||
|
if cpuMetrics.LoadStat != nil {
|
||||||
|
m.Set(sysCPULoad, cpuMetrics.LoadStat.Load1)
|
||||||
|
perc := cpuMetrics.LoadStat.Load1 * 100 / float64(cpuMetrics.CPUCount)
|
||||||
|
m.Set(sysCPULoadPerc, math.Round(perc*100)/100)
|
||||||
|
}
|
||||||
|
|
||||||
|
ts := cpuMetrics.TimesStat
|
||||||
|
tot := ts.User + ts.System + ts.Idle + ts.Iowait + ts.Nice + ts.Steal
|
||||||
|
cpuUserVal := math.Round(ts.User/tot*100*100) / 100
|
||||||
|
m.Set(sysCPUUser, cpuUserVal)
|
||||||
|
cpuSystemVal := math.Round(ts.System/tot*100*100) / 100
|
||||||
|
m.Set(sysCPUSystem, cpuSystemVal)
|
||||||
|
cpuNiceVal := math.Round(ts.Nice/tot*100*100) / 100
|
||||||
|
m.Set(sysCPUNice, cpuNiceVal)
|
||||||
|
cpuStealVal := math.Round(ts.Steal/tot*100*100) / 100
|
||||||
|
m.Set(sysCPUSteal, cpuStealVal)
|
||||||
|
|
||||||
|
// metrics-resource.go runs a job to collect resource metrics including their Avg values and
|
||||||
|
// stores them in resourceMetricsMap. We can use it to get the Avg values of CPU idle and IOWait.
|
||||||
|
cpuResourceMetrics, found := resourceMetricsMap[cpuSubsystem]
|
||||||
|
if found {
|
||||||
|
if cpuIdleMetric, ok := cpuResourceMetrics[getResourceKey(cpuIdle, nil)]; ok {
|
||||||
|
avgVal := math.Round(cpuIdleMetric.Avg*100) / 100
|
||||||
|
m.Set(sysCPUAvgIdle, avgVal)
|
||||||
|
}
|
||||||
|
if cpuIOWaitMetric, ok := cpuResourceMetrics[getResourceKey(cpuIOWait, nil)]; ok {
|
||||||
|
avgVal := math.Round(cpuIOWaitMetric.Avg*100) / 100
|
||||||
|
m.Set(sysCPUAvgIOWait, avgVal)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
@ -36,6 +36,7 @@ const (
|
|||||||
systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode"
|
systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode"
|
||||||
systemDriveCollectorPath collectorPath = "/system/drive"
|
systemDriveCollectorPath collectorPath = "/system/drive"
|
||||||
systemMemoryCollectorPath collectorPath = "/system/memory"
|
systemMemoryCollectorPath collectorPath = "/system/memory"
|
||||||
|
systemCPUCollectorPath collectorPath = "/system/cpu"
|
||||||
systemProcessCollectorPath collectorPath = "/system/process"
|
systemProcessCollectorPath collectorPath = "/system/process"
|
||||||
systemGoCollectorPath collectorPath = "/system/go"
|
systemGoCollectorPath collectorPath = "/system/go"
|
||||||
|
|
||||||
@ -128,6 +129,20 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
|||||||
loadMemoryMetrics,
|
loadMemoryMetrics,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
systemCPUMG := NewMetricsGroup(systemCPUCollectorPath,
|
||||||
|
[]MetricDescriptor{
|
||||||
|
sysCPUAvgIdleMD,
|
||||||
|
sysCPUAvgIOWaitMD,
|
||||||
|
sysCPULoadMD,
|
||||||
|
sysCPULoadPercMD,
|
||||||
|
sysCPUNiceMD,
|
||||||
|
sysCPUStealMD,
|
||||||
|
sysCPUSystemMD,
|
||||||
|
sysCPUUserMD,
|
||||||
|
},
|
||||||
|
loadCPUMetrics,
|
||||||
|
)
|
||||||
|
|
||||||
systemDriveMG := NewMetricsGroup(systemDriveCollectorPath,
|
systemDriveMG := NewMetricsGroup(systemDriveCollectorPath,
|
||||||
[]MetricDescriptor{
|
[]MetricDescriptor{
|
||||||
driveUsedBytesMD,
|
driveUsedBytesMD,
|
||||||
@ -235,6 +250,7 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
|||||||
systemNetworkInternodeMG,
|
systemNetworkInternodeMG,
|
||||||
systemDriveMG,
|
systemDriveMG,
|
||||||
systemMemoryMG,
|
systemMemoryMG,
|
||||||
|
systemCPUMG,
|
||||||
|
|
||||||
clusterHealthMG,
|
clusterHealthMG,
|
||||||
clusterUsageObjectsMG,
|
clusterUsageObjectsMG,
|
||||||
|
@ -139,6 +139,18 @@ The standard metrics groups for ProcessCollector and GoCollector are not shown b
|
|||||||
| `minio_system_memory_shared` | `gauge` | Shared memory on the node | `server` |
|
| `minio_system_memory_shared` | `gauge` | Shared memory on the node | `server` |
|
||||||
| `minio_system_memory_available` | `gauge` | Available memory on the node | `server` |
|
| `minio_system_memory_available` | `gauge` | Available memory on the node | `server` |
|
||||||
|
|
||||||
|
### `/system/cpu`
|
||||||
|
|
||||||
|
| Name | Type | Help | Labels |
|
||||||
|
|-------------------------------|---------|------------------------------------|----------|
|
||||||
|
| `minio_system_cpu_avg_idle` | `gauge` | Average CPU idle time | `server` |
|
||||||
|
| `minio_system_cpu_avg_iowait` | `gauge` | Average CPU IOWait time | `server` |
|
||||||
|
| `minio_system_cpu_load` | `gauge` | CPU load average 1min | `server` |
|
||||||
|
| `minio_system_cpu_load_perc` | `gauge` | CPU load average 1min (percentage) | `server` |
|
||||||
|
| `minio_system_cpu_nice` | `gauge` | CPU nice time | `server` |
|
||||||
|
| `minio_system_cpu_steal` | `gauge` | CPU steal time | `server` |
|
||||||
|
| `minio_system_cpu_system` | `gauge` | CPU system time | `server` |
|
||||||
|
| `minio_system_cpu_user` | `gauge` | CPU user time | `server` |
|
||||||
|
|
||||||
### `/system/network/internode`
|
### `/system/network/internode`
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user