mirror of
https://github.com/minio/minio.git
synced 2025-11-07 12:52:58 -05:00
feat: Add Metrics V3 API (#19068)
Metrics v3 is mainly a reorganization of metrics into smaller groups of metrics and the removal of internal aggregation of metrics received from peer nodes in a MinIO cluster. This change adds the endpoint `/minio/metrics/v3` as the top-level metrics endpoint and under this, various sub-endpoints are implemented. These are currently documented in `docs/metrics/v3.md` The handler will serve metrics at any path `/minio/metrics/v3/PATH`, as follows: when PATH is a sub-endpoint listed above => serves the group of metrics under that path; or when PATH is a (non-empty) parent directory of the sub-endpoints listed above => serves metrics from each child sub-endpoint of PATH. otherwise, returns a no resource found error All available metrics are listed in the `docs/metrics/v3.md`. More will be added subsequently.
This commit is contained in:
committed by
GitHub
parent
2dfa9adc5d
commit
b2c5b75efa
272
cmd/metrics-v3.go
Normal file
272
cmd/metrics-v3.go
Normal file
@@ -0,0 +1,272 @@
|
||||
// Copyright (c) 2015-2024 MinIO, Inc.
|
||||
//
|
||||
// This file is part of MinIO Object Storage stack
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/collectors"
|
||||
)
|
||||
|
||||
// Collector paths.
|
||||
//
|
||||
// These are paths under the top-level /minio/metrics/v3 metrics endpoint. Each
|
||||
// of these paths returns a set of V3 metrics.
|
||||
const (
|
||||
apiRequestsCollectorPath collectorPath = "/api/requests"
|
||||
apiBucketCollectorPath collectorPath = "/api/bucket"
|
||||
|
||||
systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode"
|
||||
systemDriveCollectorPath collectorPath = "/system/drive"
|
||||
systemProcessCollectorPath collectorPath = "/system/process"
|
||||
systemGoCollectorPath collectorPath = "/system/go"
|
||||
|
||||
clusterHealthCollectorPath collectorPath = "/cluster/health"
|
||||
clusterUsageObjectsCollectorPath collectorPath = "/cluster/usage/objects"
|
||||
clusterUsageBucketsCollectorPath collectorPath = "/cluster/usage/buckets"
|
||||
clusterErasureSetCollectorPath collectorPath = "/cluster/erasure-set"
|
||||
)
|
||||
|
||||
const (
|
||||
clusterBasePath = "/cluster"
|
||||
)
|
||||
|
||||
type metricsV3Collection struct {
|
||||
mgMap map[collectorPath]*MetricsGroup
|
||||
bucketMGMap map[collectorPath]*MetricsGroup
|
||||
|
||||
// Gatherers for non-bucket MetricsGroup's
|
||||
mgGatherers map[collectorPath]prometheus.Gatherer
|
||||
|
||||
collectorPaths []collectorPath
|
||||
}
|
||||
|
||||
func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
||||
// Create all metric groups.
|
||||
apiRequestsMG := NewMetricsGroup(apiRequestsCollectorPath,
|
||||
[]MetricDescriptor{
|
||||
apiRejectedAuthTotalMD,
|
||||
apiRejectedHeaderTotalMD,
|
||||
apiRejectedTimestampTotalMD,
|
||||
apiRejectedInvalidTotalMD,
|
||||
|
||||
apiRequestsWaitingTotalMD,
|
||||
apiRequestsIncomingTotalMD,
|
||||
apiRequestsInFlightTotalMD,
|
||||
apiRequestsTotalMD,
|
||||
apiRequestsErrorsTotalMD,
|
||||
apiRequests5xxErrorsTotalMD,
|
||||
apiRequests4xxErrorsTotalMD,
|
||||
apiRequestsCanceledTotalMD,
|
||||
|
||||
apiRequestsTTFBSecondsDistributionMD,
|
||||
|
||||
apiTrafficSentBytesMD,
|
||||
apiTrafficRecvBytesMD,
|
||||
},
|
||||
JoinLoaders(loadAPIRequestsHTTPMetrics, loadAPIRequestsTTFBMetrics,
|
||||
loadAPIRequestsNetworkMetrics),
|
||||
)
|
||||
|
||||
apiBucketMG := NewBucketMetricsGroup(apiBucketCollectorPath,
|
||||
[]MetricDescriptor{
|
||||
apiBucketTrafficRecvBytesMD,
|
||||
apiBucketTrafficSentBytesMD,
|
||||
|
||||
apiBucketRequestsInFlightMD,
|
||||
apiBucketRequestsTotalMD,
|
||||
apiBucketRequestsCanceledMD,
|
||||
apiBucketRequests4xxErrorsMD,
|
||||
apiBucketRequests5xxErrorsMD,
|
||||
|
||||
apiBucketRequestsTTFBSecondsDistributionMD,
|
||||
},
|
||||
JoinBucketLoaders(loadAPIBucketHTTPMetrics, loadAPIBucketTTFBMetrics),
|
||||
)
|
||||
|
||||
systemNetworkInternodeMG := NewMetricsGroup(systemNetworkInternodeCollectorPath,
|
||||
[]MetricDescriptor{
|
||||
internodeErrorsTotalMD,
|
||||
internodeDialedErrorsTotalMD,
|
||||
internodeDialAvgTimeNanosMD,
|
||||
internodeSentBytesTotalMD,
|
||||
internodeRecvBytesTotalMD,
|
||||
},
|
||||
loadNetworkInternodeMetrics,
|
||||
)
|
||||
|
||||
systemDriveMG := NewMetricsGroup(systemDriveCollectorPath,
|
||||
[]MetricDescriptor{
|
||||
driveUsedBytesMD,
|
||||
driveFreeBytesMD,
|
||||
driveTotalBytesMD,
|
||||
driveFreeInodesMD,
|
||||
driveTimeoutErrorsMD,
|
||||
driveAvailabilityErrorsMD,
|
||||
driveWaitingIOMD,
|
||||
driveAPILatencyMD,
|
||||
|
||||
driveOfflineCountMD,
|
||||
driveOnlineCountMD,
|
||||
driveCountMD,
|
||||
},
|
||||
loadDriveMetrics,
|
||||
)
|
||||
|
||||
clusterHealthMG := NewMetricsGroup(clusterHealthCollectorPath,
|
||||
[]MetricDescriptor{
|
||||
healthDrivesOfflineCountMD,
|
||||
healthDrivesOnlineCountMD,
|
||||
healthDrivesCountMD,
|
||||
|
||||
healthNodesOfflineCountMD,
|
||||
healthNodesOnlineCountMD,
|
||||
|
||||
healthCapacityRawTotalBytesMD,
|
||||
healthCapacityRawFreeBytesMD,
|
||||
healthCapacityUsableTotalBytesMD,
|
||||
healthCapacityUsableFreeBytesMD,
|
||||
},
|
||||
JoinLoaders(loadClusterHealthDriveMetrics,
|
||||
loadClusterHealthNodeMetrics,
|
||||
loadClusterHealthCapacityMetrics),
|
||||
)
|
||||
|
||||
clusterUsageObjectsMG := NewMetricsGroup(clusterUsageObjectsCollectorPath,
|
||||
[]MetricDescriptor{
|
||||
usageSinceLastUpdateSecondsMD,
|
||||
usageTotalBytesMD,
|
||||
usageObjectsCountMD,
|
||||
usageVersionsCountMD,
|
||||
usageDeleteMarkersCountMD,
|
||||
usageBucketsCountMD,
|
||||
usageObjectsDistributionMD,
|
||||
usageVersionsDistributionMD,
|
||||
},
|
||||
loadClusterUsageObjectMetrics,
|
||||
)
|
||||
|
||||
clusterUsageBucketsMG := NewBucketMetricsGroup(clusterUsageBucketsCollectorPath,
|
||||
[]MetricDescriptor{
|
||||
usageSinceLastUpdateSecondsMD,
|
||||
usageBucketTotalBytesMD,
|
||||
usageBucketObjectsTotalMD,
|
||||
usageBucketVersionsCountMD,
|
||||
usageBucketDeleteMarkersCountMD,
|
||||
usageBucketQuotaTotalBytesMD,
|
||||
usageBucketObjectSizeDistributionMD,
|
||||
usageBucketObjectVersionCountDistributionMD,
|
||||
},
|
||||
loadClusterUsageBucketMetrics,
|
||||
)
|
||||
|
||||
clusterErasureSetMG := NewMetricsGroup(clusterErasureSetCollectorPath,
|
||||
[]MetricDescriptor{
|
||||
erasureSetOverallWriteQuorumMD,
|
||||
erasureSetOverallHealthMD,
|
||||
erasureSetReadQuorumMD,
|
||||
erasureSetWriteQuorumMD,
|
||||
erasureSetOnlineDrivesCountMD,
|
||||
erasureSetHealingDrivesCountMD,
|
||||
erasureSetHealthMD,
|
||||
},
|
||||
loadClusterErasureSetMetrics,
|
||||
)
|
||||
|
||||
allMetricGroups := []*MetricsGroup{
|
||||
apiRequestsMG,
|
||||
apiBucketMG,
|
||||
|
||||
systemNetworkInternodeMG,
|
||||
systemDriveMG,
|
||||
|
||||
clusterHealthMG,
|
||||
clusterUsageObjectsMG,
|
||||
clusterUsageBucketsMG,
|
||||
clusterErasureSetMG,
|
||||
}
|
||||
|
||||
// Bucket metrics are special, they always include the bucket label. These
|
||||
// metrics required a list of buckets to be passed to the loader, and the list
|
||||
// of buckets is not known until the request is made. So we keep a separate
|
||||
// map for bucket metrics and handle them specially.
|
||||
|
||||
// Add the serverName and poolIndex labels to all non-cluster metrics.
|
||||
//
|
||||
// Also create metric group maps and set the cache.
|
||||
metricsCache := newMetricsCache()
|
||||
mgMap := make(map[collectorPath]*MetricsGroup)
|
||||
bucketMGMap := make(map[collectorPath]*MetricsGroup)
|
||||
for _, mg := range allMetricGroups {
|
||||
if !strings.HasPrefix(string(mg.CollectorPath), clusterBasePath) {
|
||||
mg.AddExtraLabels(
|
||||
serverName, globalLocalNodeName,
|
||||
// poolIndex, strconv.Itoa(globalLocalPoolIdx),
|
||||
)
|
||||
}
|
||||
mg.SetCache(metricsCache)
|
||||
if mg.IsBucketMetricsGroup() {
|
||||
bucketMGMap[mg.CollectorPath] = mg
|
||||
} else {
|
||||
mgMap[mg.CollectorPath] = mg
|
||||
}
|
||||
}
|
||||
|
||||
// Prepare to register the collectors. Other than `MetricGroup` collectors,
|
||||
// we also have standard collectors like `ProcessCollector` and `GoCollector`.
|
||||
|
||||
// Create all Non-`MetricGroup` collectors here.
|
||||
collectors := map[collectorPath]prometheus.Collector{
|
||||
systemProcessCollectorPath: collectors.NewProcessCollector(collectors.ProcessCollectorOpts{
|
||||
ReportErrors: true,
|
||||
}),
|
||||
systemGoCollectorPath: collectors.NewGoCollector(),
|
||||
}
|
||||
|
||||
// Add all `MetricGroup` collectors to the map.
|
||||
for _, mg := range allMetricGroups {
|
||||
collectors[mg.CollectorPath] = mg
|
||||
}
|
||||
|
||||
// Helper function to register a collector and return a gatherer for it.
|
||||
mustRegister := func(c ...prometheus.Collector) prometheus.Gatherer {
|
||||
subRegistry := prometheus.NewRegistry()
|
||||
for _, col := range c {
|
||||
subRegistry.MustRegister(col)
|
||||
}
|
||||
r.MustRegister(subRegistry)
|
||||
return subRegistry
|
||||
}
|
||||
|
||||
// Register all collectors and create gatherers for them.
|
||||
gatherers := make(map[collectorPath]prometheus.Gatherer, len(collectors))
|
||||
collectorPaths := make([]collectorPath, 0, len(collectors))
|
||||
for path, collector := range collectors {
|
||||
gatherers[path] = mustRegister(collector)
|
||||
collectorPaths = append(collectorPaths, path)
|
||||
}
|
||||
slices.Sort(collectorPaths)
|
||||
return &metricsV3Collection{
|
||||
mgMap: mgMap,
|
||||
bucketMGMap: bucketMGMap,
|
||||
mgGatherers: gatherers,
|
||||
collectorPaths: collectorPaths,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user