Add system memory metrics in v3 (#19486)

Following memory metrics will be added under /system/memory

- available
- buffers
- cache
- free
- shared
- total
- used
- used_perc
This commit is contained in:
Shireesh Anjal 2024-04-17 10:40:25 +05:30 committed by GitHub
parent f65dd3e5a2
commit 6df76ca73c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 123 additions and 9 deletions

View File

@ -50,15 +50,6 @@ const (
interfaceTxBytes MetricName = "tx_bytes" interfaceTxBytes MetricName = "tx_bytes"
interfaceTxErrors MetricName = "tx_errors" interfaceTxErrors MetricName = "tx_errors"
// memory stats
memUsed MetricName = "used"
memUsedPerc MetricName = "used_perc"
memFree MetricName = "free"
memShared MetricName = "shared"
memBuffers MetricName = "buffers"
memCache MetricName = "cache"
memAvailable MetricName = "available"
// cpu stats // cpu stats
cpuUser MetricName = "user" cpuUser MetricName = "user"
cpuSystem MetricName = "system" cpuSystem MetricName = "system"

View File

@ -34,6 +34,7 @@ type metricsCache struct {
dataUsageInfo *cachevalue.Cache[DataUsageInfo] dataUsageInfo *cachevalue.Cache[DataUsageInfo]
esetHealthResult *cachevalue.Cache[HealthResult] esetHealthResult *cachevalue.Cache[HealthResult]
driveMetrics *cachevalue.Cache[storageMetrics] driveMetrics *cachevalue.Cache[storageMetrics]
memoryMetrics *cachevalue.Cache[madmin.MemInfo]
clusterDriveMetrics *cachevalue.Cache[storageMetrics] clusterDriveMetrics *cachevalue.Cache[storageMetrics]
nodesUpDown *cachevalue.Cache[nodesOnline] nodesUpDown *cachevalue.Cache[nodesOnline]
} }
@ -43,6 +44,7 @@ func newMetricsCache() *metricsCache {
dataUsageInfo: newDataUsageInfoCache(), dataUsageInfo: newDataUsageInfoCache(),
esetHealthResult: newESetHealthResultCache(), esetHealthResult: newESetHealthResultCache(),
driveMetrics: newDriveMetricsCache(), driveMetrics: newDriveMetricsCache(),
memoryMetrics: newMemoryMetricsCache(),
clusterDriveMetrics: newClusterStorageInfoCache(), clusterDriveMetrics: newClusterStorageInfoCache(),
nodesUpDown: newNodesUpDownCache(), nodesUpDown: newNodesUpDownCache(),
} }
@ -198,6 +200,31 @@ func newDriveMetricsCache() *cachevalue.Cache[storageMetrics] {
loadDriveMetrics) loadDriveMetrics)
} }
func newMemoryMetricsCache() *cachevalue.Cache[madmin.MemInfo] {
loadMemoryMetrics := func() (v madmin.MemInfo, err error) {
var types madmin.MetricType = madmin.MetricsMem
m := collectLocalMetrics(types, collectMetricsOpts{
hosts: map[string]struct{}{
globalLocalNodeName: {},
},
})
for _, hm := range m.ByHost {
if hm.Mem != nil && len(hm.Mem.Info.Addr) > 0 {
v = hm.Mem.Info
break
}
}
return
}
return cachevalue.NewFromFunc(1*time.Minute,
cachevalue.Opts{ReturnLastGood: true},
loadMemoryMetrics)
}
func newClusterStorageInfoCache() *cachevalue.Cache[storageMetrics] { func newClusterStorageInfoCache() *cachevalue.Cache[storageMetrics] {
loadStorageInfo := func() (v storageMetrics, err error) { loadStorageInfo := func() (v storageMetrics, err error) {
objLayer := newObjectLayerFn() objLayer := newObjectLayerFn()

View File

@ -0,0 +1,65 @@
// Copyright (c) 2015-2024 MinIO, Inc.
//
// # This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cmd
import (
"context"
)
const (
memTotal = "total"
memUsed = "used"
memFree = "free"
memBuffers = "buffers"
memCache = "cache"
memUsedPerc = "used_perc"
memShared = "shared"
memAvailable = "available"
)
var (
memTotalMD = NewGaugeMD(memTotal, "Total memory on the node")
memUsedMD = NewGaugeMD(memUsed, "Used memory on the node")
memUsedPercMD = NewGaugeMD(memUsedPerc, "Used memory percentage on the node")
memFreeMD = NewGaugeMD(memFree, "Free memory on the node")
memBuffersMD = NewGaugeMD(memBuffers, "Buffers memory on the node")
memCacheMD = NewGaugeMD(memCache, "Cache memory on the node")
memSharedMD = NewGaugeMD(memShared, "Shared memory on the node")
memAvailableMD = NewGaugeMD(memAvailable, "Available memory on the node")
)
// loadMemoryMetrics - `MetricsLoaderFn` for node memory metrics.
func loadMemoryMetrics(ctx context.Context, m MetricValues, c *metricsCache) error {
memMetrics, err := c.memoryMetrics.Get()
if err != nil {
metricsLogIf(ctx, err)
return err
}
m.Set(memTotal, float64(memMetrics.Total))
m.Set(memUsed, float64(memMetrics.Used))
usedPerc := float64(memMetrics.Used) * 100 / float64(memMetrics.Total)
m.Set(memUsedPerc, usedPerc)
m.Set(memFree, float64(memMetrics.Free))
m.Set(memBuffers, float64(memMetrics.Buffers))
m.Set(memCache, float64(memMetrics.Cache))
m.Set(memShared, float64(memMetrics.Shared))
m.Set(memAvailable, float64(memMetrics.Available))
return nil
}

View File

@ -35,6 +35,7 @@ const (
systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode" systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode"
systemDriveCollectorPath collectorPath = "/system/drive" systemDriveCollectorPath collectorPath = "/system/drive"
systemMemoryCollectorPath collectorPath = "/system/memory"
systemProcessCollectorPath collectorPath = "/system/process" systemProcessCollectorPath collectorPath = "/system/process"
systemGoCollectorPath collectorPath = "/system/go" systemGoCollectorPath collectorPath = "/system/go"
@ -112,6 +113,20 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
loadNetworkInternodeMetrics, loadNetworkInternodeMetrics,
) )
systemMemoryMG := NewMetricsGroup(systemMemoryCollectorPath,
[]MetricDescriptor{
memTotalMD,
memUsedMD,
memFreeMD,
memAvailableMD,
memBuffersMD,
memCacheMD,
memSharedMD,
memUsedPercMD,
},
loadMemoryMetrics,
)
systemDriveMG := NewMetricsGroup(systemDriveCollectorPath, systemDriveMG := NewMetricsGroup(systemDriveCollectorPath,
[]MetricDescriptor{ []MetricDescriptor{
driveUsedBytesMD, driveUsedBytesMD,
@ -209,6 +224,7 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
systemNetworkInternodeMG, systemNetworkInternodeMG,
systemDriveMG, systemDriveMG,
systemMemoryMG,
clusterHealthMG, clusterHealthMG,
clusterUsageObjectsMG, clusterUsageObjectsMG,

View File

@ -42,6 +42,7 @@ These are metrics about the minio process and the node.
| Path | Description | | Path | Description |
|-----------------------------|---------------------------------------------------| |-----------------------------|---------------------------------------------------|
| `/system/drive` | Metrics about drives on the system | | `/system/drive` | Metrics about drives on the system |
| `/system/memory` | Metrics about memory on the system |
| `/system/network/internode` | Metrics about internode requests made by the node | | `/system/network/internode` | Metrics about internode requests made by the node |
| `/system/process` | Standard process metrics | | `/system/process` | Standard process metrics |
| `/system/go` | Standard Go lang metrics | | `/system/go` | Standard Go lang metrics |
@ -125,6 +126,20 @@ The standard metrics groups for ProcessCollector and GoCollector are not shown b
| `minio_system_drive_writes_await` | `gauge` | Average time for write requests served on a drive | `drive,set_index,drive_index,pool_index,server` | | `minio_system_drive_writes_await` | `gauge` | Average time for write requests served on a drive | `drive,set_index,drive_index,pool_index,server` |
| `minio_system_drive_perc_util` | `gauge` | Percentage of time the disk was busy | `drive,set_index,drive_index,pool_index,server` | | `minio_system_drive_perc_util` | `gauge` | Percentage of time the disk was busy | `drive,set_index,drive_index,pool_index,server` |
### `/system/memory`
| Name | Type | Help | Labels |
|----------------------------------|---------|------------------------------------|----------|
| `minio_system_memory_used` | `gauge` | Used memory on the node | `server` |
| `minio_system_memory_used_perc` | `gauge` | Used memory percentage on the node | `server` |
| `minio_system_memory_free` | `gauge` | Free memory on the node | `server` |
| `minio_system_memory_total` | `gauge` | Total memory on the node | `server` |
| `minio_system_memory_buffers` | `gauge` | Buffers memory on the node | `server` |
| `minio_system_memory_cache` | `gauge` | Cache memory on the node | `server` |
| `minio_system_memory_shared` | `gauge` | Shared memory on the node | `server` |
| `minio_system_memory_available` | `gauge` | Available memory on the node | `server` |
### `/system/network/internode` ### `/system/network/internode`
| Name | Type | Help | Labels | | Name | Type | Help | Labels |