diff --git a/cmd/metrics-resource.go b/cmd/metrics-resource.go index 9392231f5..eaa07ba5c 100644 --- a/cmd/metrics-resource.go +++ b/cmd/metrics-resource.go @@ -50,15 +50,6 @@ const ( interfaceTxBytes MetricName = "tx_bytes" interfaceTxErrors MetricName = "tx_errors" - // memory stats - memUsed MetricName = "used" - memUsedPerc MetricName = "used_perc" - memFree MetricName = "free" - memShared MetricName = "shared" - memBuffers MetricName = "buffers" - memCache MetricName = "cache" - memAvailable MetricName = "available" - // cpu stats cpuUser MetricName = "user" cpuSystem MetricName = "system" diff --git a/cmd/metrics-v3-cache.go b/cmd/metrics-v3-cache.go index 8b1c7fd1c..3f178f8e9 100644 --- a/cmd/metrics-v3-cache.go +++ b/cmd/metrics-v3-cache.go @@ -34,6 +34,7 @@ type metricsCache struct { dataUsageInfo *cachevalue.Cache[DataUsageInfo] esetHealthResult *cachevalue.Cache[HealthResult] driveMetrics *cachevalue.Cache[storageMetrics] + memoryMetrics *cachevalue.Cache[madmin.MemInfo] clusterDriveMetrics *cachevalue.Cache[storageMetrics] nodesUpDown *cachevalue.Cache[nodesOnline] } @@ -43,6 +44,7 @@ func newMetricsCache() *metricsCache { dataUsageInfo: newDataUsageInfoCache(), esetHealthResult: newESetHealthResultCache(), driveMetrics: newDriveMetricsCache(), + memoryMetrics: newMemoryMetricsCache(), clusterDriveMetrics: newClusterStorageInfoCache(), nodesUpDown: newNodesUpDownCache(), } @@ -198,6 +200,31 @@ func newDriveMetricsCache() *cachevalue.Cache[storageMetrics] { loadDriveMetrics) } +func newMemoryMetricsCache() *cachevalue.Cache[madmin.MemInfo] { + loadMemoryMetrics := func() (v madmin.MemInfo, err error) { + var types madmin.MetricType = madmin.MetricsMem + + m := collectLocalMetrics(types, collectMetricsOpts{ + hosts: map[string]struct{}{ + globalLocalNodeName: {}, + }, + }) + + for _, hm := range m.ByHost { + if hm.Mem != nil && len(hm.Mem.Info.Addr) > 0 { + v = hm.Mem.Info + break + } + } + + return + } + + return cachevalue.NewFromFunc(1*time.Minute, + cachevalue.Opts{ReturnLastGood: true}, + loadMemoryMetrics) +} + func newClusterStorageInfoCache() *cachevalue.Cache[storageMetrics] { loadStorageInfo := func() (v storageMetrics, err error) { objLayer := newObjectLayerFn() diff --git a/cmd/metrics-v3-system-memory.go b/cmd/metrics-v3-system-memory.go new file mode 100644 index 000000000..f304631bc --- /dev/null +++ b/cmd/metrics-v3-system-memory.go @@ -0,0 +1,65 @@ +// Copyright (c) 2015-2024 MinIO, Inc. +// +// # This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "context" +) + +const ( + memTotal = "total" + memUsed = "used" + memFree = "free" + memBuffers = "buffers" + memCache = "cache" + memUsedPerc = "used_perc" + memShared = "shared" + memAvailable = "available" +) + +var ( + memTotalMD = NewGaugeMD(memTotal, "Total memory on the node") + memUsedMD = NewGaugeMD(memUsed, "Used memory on the node") + memUsedPercMD = NewGaugeMD(memUsedPerc, "Used memory percentage on the node") + memFreeMD = NewGaugeMD(memFree, "Free memory on the node") + memBuffersMD = NewGaugeMD(memBuffers, "Buffers memory on the node") + memCacheMD = NewGaugeMD(memCache, "Cache memory on the node") + memSharedMD = NewGaugeMD(memShared, "Shared memory on the node") + memAvailableMD = NewGaugeMD(memAvailable, "Available memory on the node") +) + +// loadMemoryMetrics - `MetricsLoaderFn` for node memory metrics. +func loadMemoryMetrics(ctx context.Context, m MetricValues, c *metricsCache) error { + memMetrics, err := c.memoryMetrics.Get() + if err != nil { + metricsLogIf(ctx, err) + return err + } + + m.Set(memTotal, float64(memMetrics.Total)) + m.Set(memUsed, float64(memMetrics.Used)) + usedPerc := float64(memMetrics.Used) * 100 / float64(memMetrics.Total) + m.Set(memUsedPerc, usedPerc) + m.Set(memFree, float64(memMetrics.Free)) + m.Set(memBuffers, float64(memMetrics.Buffers)) + m.Set(memCache, float64(memMetrics.Cache)) + m.Set(memShared, float64(memMetrics.Shared)) + m.Set(memAvailable, float64(memMetrics.Available)) + + return nil +} diff --git a/cmd/metrics-v3.go b/cmd/metrics-v3.go index a8353882d..c7650faca 100644 --- a/cmd/metrics-v3.go +++ b/cmd/metrics-v3.go @@ -35,6 +35,7 @@ const ( systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode" systemDriveCollectorPath collectorPath = "/system/drive" + systemMemoryCollectorPath collectorPath = "/system/memory" systemProcessCollectorPath collectorPath = "/system/process" systemGoCollectorPath collectorPath = "/system/go" @@ -112,6 +113,20 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection { loadNetworkInternodeMetrics, ) + systemMemoryMG := NewMetricsGroup(systemMemoryCollectorPath, + []MetricDescriptor{ + memTotalMD, + memUsedMD, + memFreeMD, + memAvailableMD, + memBuffersMD, + memCacheMD, + memSharedMD, + memUsedPercMD, + }, + loadMemoryMetrics, + ) + systemDriveMG := NewMetricsGroup(systemDriveCollectorPath, []MetricDescriptor{ driveUsedBytesMD, @@ -209,6 +224,7 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection { systemNetworkInternodeMG, systemDriveMG, + systemMemoryMG, clusterHealthMG, clusterUsageObjectsMG, diff --git a/docs/metrics/v3.md b/docs/metrics/v3.md index 185b9c4ef..f197fd95d 100644 --- a/docs/metrics/v3.md +++ b/docs/metrics/v3.md @@ -42,6 +42,7 @@ These are metrics about the minio process and the node. | Path | Description | |-----------------------------|---------------------------------------------------| | `/system/drive` | Metrics about drives on the system | +| `/system/memory` | Metrics about memory on the system | | `/system/network/internode` | Metrics about internode requests made by the node | | `/system/process` | Standard process metrics | | `/system/go` | Standard Go lang metrics | @@ -125,6 +126,20 @@ The standard metrics groups for ProcessCollector and GoCollector are not shown b | `minio_system_drive_writes_await` | `gauge` | Average time for write requests served on a drive | `drive,set_index,drive_index,pool_index,server` | | `minio_system_drive_perc_util` | `gauge` | Percentage of time the disk was busy | `drive,set_index,drive_index,pool_index,server` | +### `/system/memory` + +| Name | Type | Help | Labels | +|----------------------------------|---------|------------------------------------|----------| +| `minio_system_memory_used` | `gauge` | Used memory on the node | `server` | +| `minio_system_memory_used_perc` | `gauge` | Used memory percentage on the node | `server` | +| `minio_system_memory_free` | `gauge` | Free memory on the node | `server` | +| `minio_system_memory_total` | `gauge` | Total memory on the node | `server` | +| `minio_system_memory_buffers` | `gauge` | Buffers memory on the node | `server` | +| `minio_system_memory_cache` | `gauge` | Cache memory on the node | `server` | +| `minio_system_memory_shared` | `gauge` | Shared memory on the node | `server` | +| `minio_system_memory_available` | `gauge` | Available memory on the node | `server` | + + ### `/system/network/internode` | Name | Type | Help | Labels |