mirror of
https://github.com/minio/minio.git
synced 2024-12-24 22:25:54 -05:00
Add process metrics in metrics-v3
(#19612)
endpoint: /minio/metrics/v3/system/process metrics: - locks_read_total - locks_write_total - cpu_total_seconds - go_routine_total - io_rchar_bytes - io_read_bytes - io_wchar_bytes - io_write_bytes - start_time_seconds - uptime_seconds - file_descriptor_limit_total - file_descriptor_open_total - syscall_read_total - syscall_write_total - resident_memory_bytes - virtual_memory_bytes - virtual_memory_max_bytes Since the standard process collector implements only a subset of these metrics, remove it and implement our own custom process collector that captures all the process metrics we need.
This commit is contained in:
parent
a658b976f5
commit
4caa3422bd
172
cmd/metrics-v3-system-process.go
Normal file
172
cmd/metrics-v3-system-process.go
Normal file
@ -0,0 +1,172 @@
|
||||
// Copyright (c) 2015-2024 MinIO, Inc.
|
||||
//
|
||||
// # This file is part of MinIO Object Storage stack
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/procfs"
|
||||
)
|
||||
|
||||
const (
|
||||
processLocksReadTotal = "locks_read_total"
|
||||
processLocksWriteTotal = "locks_write_total"
|
||||
processCPUTotalSeconds = "cpu_total_seconds"
|
||||
processGoRoutineTotal = "go_routine_total"
|
||||
processIORCharBytes = "io_rchar_bytes"
|
||||
processIOReadBytes = "io_read_bytes"
|
||||
processIOWCharBytes = "io_wchar_bytes"
|
||||
processIOWriteBytes = "io_write_bytes"
|
||||
processStartTimeSeconds = "start_time_seconds"
|
||||
processUptimeSeconds = "uptime_seconds"
|
||||
processFileDescriptorLimitTotal = "file_descriptor_limit_total"
|
||||
processFileDescriptorOpenTotal = "file_descriptor_open_total"
|
||||
processSyscallReadTotal = "syscall_read_total"
|
||||
processSyscallWriteTotal = "syscall_write_total"
|
||||
processResidentMemoryBytes = "resident_memory_bytes"
|
||||
processVirtualMemoryBytes = "virtual_memory_bytes"
|
||||
processVirtualMemoryMaxBytes = "virtual_memory_max_bytes"
|
||||
)
|
||||
|
||||
var (
|
||||
processLocksReadTotalMD = NewGaugeMD(processLocksReadTotal, "Number of current READ locks on this peer")
|
||||
processLocksWriteTotalMD = NewGaugeMD(processLocksWriteTotal, "Number of current WRITE locks on this peer")
|
||||
processCPUTotalSecondsMD = NewCounterMD(processCPUTotalSeconds, "Total user and system CPU time spent in seconds")
|
||||
processGoRoutineTotalMD = NewGaugeMD(processGoRoutineTotal, "Total number of go routines running")
|
||||
processIORCharBytesMD = NewCounterMD(processIORCharBytes, "Total bytes read by the process from the underlying storage system including cache, /proc/[pid]/io rchar")
|
||||
processIOReadBytesMD = NewCounterMD(processIOReadBytes, "Total bytes read by the process from the underlying storage system, /proc/[pid]/io read_bytes")
|
||||
processIOWCharBytesMD = NewCounterMD(processIOWCharBytes, "Total bytes written by the process to the underlying storage system including page cache, /proc/[pid]/io wchar")
|
||||
processIOWriteBytesMD = NewCounterMD(processIOWriteBytes, "Total bytes written by the process to the underlying storage system, /proc/[pid]/io write_bytes")
|
||||
processStarttimeSecondsMD = NewGaugeMD(processStartTimeSeconds, "Start time for MinIO process in seconds since Unix epoc")
|
||||
processUptimeSecondsMD = NewGaugeMD(processUptimeSeconds, "Uptime for MinIO process in seconds")
|
||||
processFileDescriptorLimitTotalMD = NewGaugeMD(processFileDescriptorLimitTotal, "Limit on total number of open file descriptors for the MinIO Server process")
|
||||
processFileDescriptorOpenTotalMD = NewGaugeMD(processFileDescriptorOpenTotal, "Total number of open file descriptors by the MinIO Server process")
|
||||
processSyscallReadTotalMD = NewCounterMD(processSyscallReadTotal, "Total read SysCalls to the kernel. /proc/[pid]/io syscr")
|
||||
processSyscallWriteTotalMD = NewCounterMD(processSyscallWriteTotal, "Total write SysCalls to the kernel. /proc/[pid]/io syscw")
|
||||
processResidentMemoryBytesMD = NewGaugeMD(processResidentMemoryBytes, "Resident memory size in bytes")
|
||||
processVirtualMemoryBytesMD = NewGaugeMD(processVirtualMemoryBytes, "Virtual memory size in bytes")
|
||||
processVirtualMemoryMaxBytesMD = NewGaugeMD(processVirtualMemoryMaxBytes, "Maximum virtual memory size in bytes")
|
||||
)
|
||||
|
||||
func loadProcStatMetrics(ctx context.Context, stat procfs.ProcStat, m MetricValues) {
|
||||
if stat.CPUTime() > 0 {
|
||||
m.Set(processCPUTotalSeconds, float64(stat.CPUTime()))
|
||||
}
|
||||
|
||||
if stat.ResidentMemory() > 0 {
|
||||
m.Set(processResidentMemoryBytes, float64(stat.ResidentMemory()))
|
||||
}
|
||||
|
||||
if stat.VirtualMemory() > 0 {
|
||||
m.Set(processVirtualMemoryBytes, float64(stat.VirtualMemory()))
|
||||
}
|
||||
|
||||
startTime, err := stat.StartTime()
|
||||
if err != nil {
|
||||
metricsLogIf(ctx, err)
|
||||
} else if startTime > 0 {
|
||||
m.Set(processStartTimeSeconds, float64(startTime))
|
||||
}
|
||||
}
|
||||
|
||||
func loadProcIOMetrics(ctx context.Context, io procfs.ProcIO, m MetricValues) {
|
||||
if io.RChar > 0 {
|
||||
m.Set(processIORCharBytes, float64(io.RChar))
|
||||
}
|
||||
|
||||
if io.ReadBytes > 0 {
|
||||
m.Set(processIOReadBytes, float64(io.ReadBytes))
|
||||
}
|
||||
|
||||
if io.WChar > 0 {
|
||||
m.Set(processIOWCharBytes, float64(io.WChar))
|
||||
}
|
||||
|
||||
if io.WriteBytes > 0 {
|
||||
m.Set(processIOWriteBytes, float64(io.WriteBytes))
|
||||
}
|
||||
|
||||
if io.SyscR > 0 {
|
||||
m.Set(processSyscallReadTotal, float64(io.SyscR))
|
||||
}
|
||||
|
||||
if io.SyscW > 0 {
|
||||
m.Set(processSyscallWriteTotal, float64(io.SyscW))
|
||||
}
|
||||
}
|
||||
|
||||
func loadProcFSMetrics(ctx context.Context, p procfs.Proc, m MetricValues) {
|
||||
stat, err := p.Stat()
|
||||
if err != nil {
|
||||
metricsLogIf(ctx, err)
|
||||
} else {
|
||||
loadProcStatMetrics(ctx, stat, m)
|
||||
}
|
||||
|
||||
io, err := p.IO()
|
||||
if err != nil {
|
||||
metricsLogIf(ctx, err)
|
||||
} else {
|
||||
loadProcIOMetrics(ctx, io, m)
|
||||
}
|
||||
|
||||
l, err := p.Limits()
|
||||
if err != nil {
|
||||
metricsLogIf(ctx, err)
|
||||
} else {
|
||||
if l.OpenFiles > 0 {
|
||||
m.Set(processFileDescriptorLimitTotal, float64(l.OpenFiles))
|
||||
}
|
||||
|
||||
if l.AddressSpace > 0 {
|
||||
m.Set(processVirtualMemoryMaxBytes, float64(l.AddressSpace))
|
||||
}
|
||||
}
|
||||
|
||||
openFDs, err := p.FileDescriptorsLen()
|
||||
if err != nil {
|
||||
metricsLogIf(ctx, err)
|
||||
} else if openFDs > 0 {
|
||||
m.Set(processFileDescriptorOpenTotal, float64(openFDs))
|
||||
}
|
||||
}
|
||||
|
||||
// loadProcessMetrics - `MetricsLoaderFn` for process metrics
|
||||
func loadProcessMetrics(ctx context.Context, m MetricValues, c *metricsCache) error {
|
||||
m.Set(processGoRoutineTotal, float64(runtime.NumGoroutine()))
|
||||
|
||||
if !globalBootTime.IsZero() {
|
||||
m.Set(processUptimeSeconds, time.Since(globalBootTime).Seconds())
|
||||
}
|
||||
|
||||
p, err := procfs.Self()
|
||||
if err != nil {
|
||||
metricsLogIf(ctx, err)
|
||||
} else {
|
||||
loadProcFSMetrics(ctx, p, m)
|
||||
}
|
||||
|
||||
if globalIsDistErasure && globalLockServer != nil {
|
||||
st := globalLockServer.stats()
|
||||
m.Set(processLocksReadTotal, float64(st.Reads))
|
||||
m.Set(processLocksWriteTotal, float64(st.Writes))
|
||||
}
|
||||
return nil
|
||||
}
|
@ -144,6 +144,29 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
||||
loadCPUMetrics,
|
||||
)
|
||||
|
||||
systemProcessMG := NewMetricsGroup(systemProcessCollectorPath,
|
||||
[]MetricDescriptor{
|
||||
processLocksReadTotalMD,
|
||||
processLocksWriteTotalMD,
|
||||
processCPUTotalSecondsMD,
|
||||
processGoRoutineTotalMD,
|
||||
processIORCharBytesMD,
|
||||
processIOReadBytesMD,
|
||||
processIOWCharBytesMD,
|
||||
processIOWriteBytesMD,
|
||||
processStarttimeSecondsMD,
|
||||
processUptimeSecondsMD,
|
||||
processFileDescriptorLimitTotalMD,
|
||||
processFileDescriptorOpenTotalMD,
|
||||
processSyscallReadTotalMD,
|
||||
processSyscallWriteTotalMD,
|
||||
processResidentMemoryBytesMD,
|
||||
processVirtualMemoryBytesMD,
|
||||
processVirtualMemoryMaxBytesMD,
|
||||
},
|
||||
loadProcessMetrics,
|
||||
)
|
||||
|
||||
systemDriveMG := NewMetricsGroup(systemDriveCollectorPath,
|
||||
[]MetricDescriptor{
|
||||
driveUsedBytesMD,
|
||||
@ -263,6 +286,7 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
||||
systemDriveMG,
|
||||
systemMemoryMG,
|
||||
systemCPUMG,
|
||||
systemProcessMG,
|
||||
|
||||
clusterHealthMG,
|
||||
clusterUsageObjectsMG,
|
||||
@ -299,13 +323,10 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
||||
}
|
||||
|
||||
// Prepare to register the collectors. Other than `MetricGroup` collectors,
|
||||
// we also have standard collectors like `ProcessCollector` and `GoCollector`.
|
||||
// we also have standard collectors like `GoCollector`.
|
||||
|
||||
// Create all Non-`MetricGroup` collectors here.
|
||||
collectors := map[collectorPath]prometheus.Collector{
|
||||
systemProcessCollectorPath: collectors.NewProcessCollector(collectors.ProcessCollectorOpts{
|
||||
ReportErrors: true,
|
||||
}),
|
||||
systemGoCollectorPath: collectors.NewGoCollector(),
|
||||
}
|
||||
|
||||
|
@ -64,7 +64,7 @@ These present metrics about the whole MinIO cluster.
|
||||
|
||||
Each of the following sub-sections list metrics returned by each of the endpoints.
|
||||
|
||||
The standard metrics groups for ProcessCollector and GoCollector are not shown below.
|
||||
The standard metrics group for GoCollector is not shown below.
|
||||
|
||||
### `/api/requests`
|
||||
|
||||
@ -163,6 +163,28 @@ The standard metrics groups for ProcessCollector and GoCollector are not shown b
|
||||
| `minio_system_network_internode_sent_bytes_total` | `counter` | Total number of bytes sent to other peer nodes | `server,pool_index` |
|
||||
| `minio_system_network_internode_recv_bytes_total` | `counter` | Total number of bytes received from other peer nodes | `server,pool_index` |
|
||||
|
||||
### `/system/process`
|
||||
|
||||
| Name | Type | Help | Labels |
|
||||
|-------------------------------|-----------|----------------------------------------------------------------------------------------------------------------|----------|
|
||||
| `locks_read_total` | `gauge` | Number of current READ locks on this peer | `server` |
|
||||
| `locks_write_total` | `gauge` | Number of current WRITE locks on this peer | `server` |
|
||||
| `cpu_total_seconds` | `counter` | Total user and system CPU time spent in seconds | `server` |
|
||||
| `go_routine_total` | `gauge` | Total number of go routines running | `server` |
|
||||
| `io_rchar_bytes` | `counter` | Total bytes read by the process from the underlying storage system including cache, /proc/[pid]/io rchar | `server` |
|
||||
| `io_read_bytes` | `counter` | Total bytes read by the process from the underlying storage system, /proc/[pid]/io read_bytes | `server` |
|
||||
| `io_wchar_bytes` | `counter` | Total bytes written by the process to the underlying storage system including page cache, /proc/[pid]/io wchar | `server` |
|
||||
| `io_write_bytes` | `counter` | Total bytes written by the process to the underlying storage system, /proc/[pid]/io write_bytes | `server` |
|
||||
| `start_time_seconds` | `gauge` | Start time for MinIO process in seconds since Unix epoc | `server` |
|
||||
| `uptime_seconds` | `gauge` | Uptime for MinIO process in seconds | `server` |
|
||||
| `file_descriptor_limit_total` | `gauge` | Limit on total number of open file descriptors for the MinIO Server process | `server` |
|
||||
| `file_descriptor_open_total` | `gauge` | Total number of open file descriptors by the MinIO Server process | `server` |
|
||||
| `syscall_read_total` | `counter` | Total read SysCalls to the kernel. /proc/[pid]/io syscr | `server` |
|
||||
| `syscall_write_total` | `counter` | Total write SysCalls to the kernel. /proc/[pid]/io syscw | `server` |
|
||||
| `resident_memory_bytes` | `gauge` | Resident memory size in bytes | `server` |
|
||||
| `virtual_memory_bytes` | `gauge` | Virtual memory size in bytes | `server` |
|
||||
| `virtual_memory_max_bytes` | `gauge` | Maximum virtual memory size in bytes | `server` |
|
||||
|
||||
### `/cluster/health`
|
||||
|
||||
| Name | Type | Help | Labels |
|
||||
|
Loading…
Reference in New Issue
Block a user