minio/cmd/metrics-v3-cache.go
Aditya Manthramurthy b2c5b75efa
feat: Add Metrics V3 API (#19068)
Metrics v3 is mainly a reorganization of metrics into smaller groups of
metrics and the removal of internal aggregation of metrics received from
peer nodes in a MinIO cluster.

This change adds the endpoint `/minio/metrics/v3` as the top-level metrics
endpoint and under this, various sub-endpoints are implemented. These
are currently documented in `docs/metrics/v3.md`

The handler will serve metrics at any path
`/minio/metrics/v3/PATH`, as follows:

when PATH is a sub-endpoint listed above => serves the group of
metrics under that path; or when PATH is a (non-empty) parent 
directory of the sub-endpoints listed above => serves metrics
from each child sub-endpoint of PATH. otherwise, returns a no 
resource found error

All available metrics are listed in the `docs/metrics/v3.md`. More will
be added subsequently.
2024-03-10 01:15:15 -08:00

146 lines
4.2 KiB
Go

// Copyright (c) 2015-2024 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cmd
import (
"time"
"github.com/minio/madmin-go/v3"
"github.com/minio/minio/internal/cachevalue"
)
// metricsCache - cache for metrics.
//
// When serving metrics, this cache is passed to the MetricsLoaderFn.
//
// This cache is used for metrics that would result in network/storage calls.
type metricsCache struct {
dataUsageInfo *cachevalue.Cache[DataUsageInfo]
esetHealthResult *cachevalue.Cache[HealthResult]
driveMetrics *cachevalue.Cache[storageMetrics]
clusterDriveMetrics *cachevalue.Cache[storageMetrics]
nodesUpDown *cachevalue.Cache[nodesOnline]
}
func newMetricsCache() *metricsCache {
return &metricsCache{
dataUsageInfo: newDataUsageInfoCache(),
esetHealthResult: newESetHealthResultCache(),
driveMetrics: newDriveMetricsCache(),
clusterDriveMetrics: newClusterStorageInfoCache(),
nodesUpDown: newNodesUpDownCache(),
}
}
type nodesOnline struct {
Online, Offline int
}
func newNodesUpDownCache() *cachevalue.Cache[nodesOnline] {
loadNodesUpDown := func() (v nodesOnline, err error) {
v.Online, v.Offline = globalNotificationSys.GetPeerOnlineCount()
return
}
return cachevalue.NewFromFunc(1*time.Minute,
cachevalue.Opts{ReturnLastGood: true},
loadNodesUpDown)
}
type storageMetrics struct {
storageInfo madmin.StorageInfo
onlineDrives, offlineDrives, totalDrives int
}
func newDataUsageInfoCache() *cachevalue.Cache[DataUsageInfo] {
loadDataUsage := func() (u DataUsageInfo, err error) {
objLayer := newObjectLayerFn()
if objLayer == nil {
return
}
// Collect cluster level object metrics.
u, err = loadDataUsageFromBackend(GlobalContext, objLayer)
return
}
return cachevalue.NewFromFunc(1*time.Minute,
cachevalue.Opts{ReturnLastGood: true},
loadDataUsage)
}
func newESetHealthResultCache() *cachevalue.Cache[HealthResult] {
loadHealth := func() (r HealthResult, err error) {
objLayer := newObjectLayerFn()
if objLayer == nil {
return
}
r = objLayer.Health(GlobalContext, HealthOptions{})
return
}
return cachevalue.NewFromFunc(1*time.Minute,
cachevalue.Opts{ReturnLastGood: true},
loadHealth,
)
}
func newDriveMetricsCache() *cachevalue.Cache[storageMetrics] {
loadDriveMetrics := func() (v storageMetrics, err error) {
objLayer := newObjectLayerFn()
if objLayer == nil {
return
}
storageInfo := objLayer.LocalStorageInfo(GlobalContext, true)
onlineDrives, offlineDrives := getOnlineOfflineDisksStats(storageInfo.Disks)
totalDrives := onlineDrives.Merge(offlineDrives)
v = storageMetrics{
storageInfo: storageInfo,
onlineDrives: onlineDrives.Sum(),
offlineDrives: offlineDrives.Sum(),
totalDrives: totalDrives.Sum(),
}
return
}
return cachevalue.NewFromFunc(1*time.Minute,
cachevalue.Opts{ReturnLastGood: true},
loadDriveMetrics)
}
func newClusterStorageInfoCache() *cachevalue.Cache[storageMetrics] {
loadStorageInfo := func() (v storageMetrics, err error) {
objLayer := newObjectLayerFn()
if objLayer == nil {
return storageMetrics{}, nil
}
storageInfo := objLayer.StorageInfo(GlobalContext, true)
onlineDrives, offlineDrives := getOnlineOfflineDisksStats(storageInfo.Disks)
totalDrives := onlineDrives.Merge(offlineDrives)
v = storageMetrics{
storageInfo: storageInfo,
onlineDrives: onlineDrives.Sum(),
offlineDrives: offlineDrives.Sum(),
totalDrives: totalDrives.Sum(),
}
return
}
return cachevalue.NewFromFunc(1*time.Minute,
cachevalue.Opts{ReturnLastGood: true},
loadStorageInfo,
)
}