mirror of
https://github.com/minio/minio.git
synced 2024-12-24 22:25:54 -05:00
74c047cb03
also adding missing recent_backlog_count metric to v3 metrics
488 lines
14 KiB
Go
488 lines
14 KiB
Go
// Copyright (c) 2015-2024 MinIO, Inc.
|
|
//
|
|
// This file is part of MinIO Object Storage stack
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package cmd
|
|
|
|
import (
|
|
"slices"
|
|
"strings"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/collectors"
|
|
)
|
|
|
|
// Collector paths.
|
|
//
|
|
// These are paths under the top-level /minio/metrics/v3 metrics endpoint. Each
|
|
// of these paths returns a set of V3 metrics.
|
|
//
|
|
// Per-bucket metrics endpoints always start with /bucket and the bucket name is
|
|
// appended to the path. e.g. if the collector path is /bucket/api, the endpoint
|
|
// for the bucket "mybucket" would be /minio/metrics/v3/bucket/api/mybucket
|
|
const (
|
|
apiRequestsCollectorPath collectorPath = "/api/requests"
|
|
|
|
bucketAPICollectorPath collectorPath = "/bucket/api"
|
|
bucketReplicationCollectorPath collectorPath = "/bucket/replication"
|
|
|
|
systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode"
|
|
systemDriveCollectorPath collectorPath = "/system/drive"
|
|
systemMemoryCollectorPath collectorPath = "/system/memory"
|
|
systemCPUCollectorPath collectorPath = "/system/cpu"
|
|
systemProcessCollectorPath collectorPath = "/system/process"
|
|
|
|
debugGoCollectorPath collectorPath = "/debug/go"
|
|
|
|
clusterHealthCollectorPath collectorPath = "/cluster/health"
|
|
clusterUsageObjectsCollectorPath collectorPath = "/cluster/usage/objects"
|
|
clusterUsageBucketsCollectorPath collectorPath = "/cluster/usage/buckets"
|
|
clusterErasureSetCollectorPath collectorPath = "/cluster/erasure-set"
|
|
clusterIAMCollectorPath collectorPath = "/cluster/iam"
|
|
clusterConfigCollectorPath collectorPath = "/cluster/config"
|
|
|
|
ilmCollectorPath collectorPath = "/ilm"
|
|
auditCollectorPath collectorPath = "/audit"
|
|
loggerWebhookCollectorPath collectorPath = "/logger/webhook"
|
|
replicationCollectorPath collectorPath = "/replication"
|
|
notificationCollectorPath collectorPath = "/notification"
|
|
scannerCollectorPath collectorPath = "/scanner"
|
|
)
|
|
|
|
const (
|
|
clusterBasePath = "/cluster"
|
|
)
|
|
|
|
type metricsV3Collection struct {
|
|
mgMap map[collectorPath]*MetricsGroup
|
|
bucketMGMap map[collectorPath]*MetricsGroup
|
|
|
|
// Gatherers for non-bucket MetricsGroup's
|
|
mgGatherers map[collectorPath]prometheus.Gatherer
|
|
|
|
collectorPaths []collectorPath
|
|
}
|
|
|
|
func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
|
// Create all metric groups.
|
|
apiRequestsMG := NewMetricsGroup(apiRequestsCollectorPath,
|
|
[]MetricDescriptor{
|
|
apiRejectedAuthTotalMD,
|
|
apiRejectedHeaderTotalMD,
|
|
apiRejectedTimestampTotalMD,
|
|
apiRejectedInvalidTotalMD,
|
|
|
|
apiRequestsWaitingTotalMD,
|
|
apiRequestsIncomingTotalMD,
|
|
apiRequestsInFlightTotalMD,
|
|
apiRequestsTotalMD,
|
|
apiRequestsErrorsTotalMD,
|
|
apiRequests5xxErrorsTotalMD,
|
|
apiRequests4xxErrorsTotalMD,
|
|
apiRequestsCanceledTotalMD,
|
|
|
|
apiRequestsTTFBSecondsDistributionMD,
|
|
|
|
apiTrafficSentBytesMD,
|
|
apiTrafficRecvBytesMD,
|
|
},
|
|
JoinLoaders(loadAPIRequestsHTTPMetrics, loadAPIRequestsTTFBMetrics,
|
|
loadAPIRequestsNetworkMetrics),
|
|
)
|
|
|
|
bucketAPIMG := NewBucketMetricsGroup(bucketAPICollectorPath,
|
|
[]MetricDescriptor{
|
|
bucketAPITrafficRecvBytesMD,
|
|
bucketAPITrafficSentBytesMD,
|
|
|
|
bucketAPIRequestsInFlightMD,
|
|
bucketAPIRequestsTotalMD,
|
|
bucketAPIRequestsCanceledMD,
|
|
bucketAPIRequests4xxErrorsMD,
|
|
bucketAPIRequests5xxErrorsMD,
|
|
|
|
bucketAPIRequestsTTFBSecondsDistributionMD,
|
|
},
|
|
JoinBucketLoaders(loadBucketAPIHTTPMetrics, loadBucketAPITTFBMetrics),
|
|
)
|
|
|
|
bucketReplicationMG := NewBucketMetricsGroup(bucketReplicationCollectorPath,
|
|
[]MetricDescriptor{
|
|
bucketReplLastHrFailedBytesMD,
|
|
bucketReplLastHrFailedCountMD,
|
|
bucketReplLastMinFailedBytesMD,
|
|
bucketReplLastMinFailedCountMD,
|
|
bucketReplLatencyMsMD,
|
|
bucketReplProxiedDeleteTaggingRequestsTotalMD,
|
|
bucketReplProxiedGetRequestsFailuresMD,
|
|
bucketReplProxiedGetRequestsTotalMD,
|
|
bucketReplProxiedGetTaggingRequestsFailuresMD,
|
|
bucketReplProxiedGetTaggingRequestsTotalMD,
|
|
bucketReplProxiedHeadRequestsFailuresMD,
|
|
bucketReplProxiedHeadRequestsTotalMD,
|
|
bucketReplProxiedPutTaggingRequestsFailuresMD,
|
|
bucketReplProxiedPutTaggingRequestsTotalMD,
|
|
bucketReplSentBytesMD,
|
|
bucketReplSentCountMD,
|
|
bucketReplTotalFailedBytesMD,
|
|
bucketReplTotalFailedCountMD,
|
|
bucketReplProxiedDeleteTaggingRequestsFailuresMD,
|
|
},
|
|
loadBucketReplicationMetrics,
|
|
)
|
|
|
|
systemNetworkInternodeMG := NewMetricsGroup(systemNetworkInternodeCollectorPath,
|
|
[]MetricDescriptor{
|
|
internodeErrorsTotalMD,
|
|
internodeDialedErrorsTotalMD,
|
|
internodeDialAvgTimeNanosMD,
|
|
internodeSentBytesTotalMD,
|
|
internodeRecvBytesTotalMD,
|
|
},
|
|
loadNetworkInternodeMetrics,
|
|
)
|
|
|
|
systemMemoryMG := NewMetricsGroup(systemMemoryCollectorPath,
|
|
[]MetricDescriptor{
|
|
memTotalMD,
|
|
memUsedMD,
|
|
memFreeMD,
|
|
memAvailableMD,
|
|
memBuffersMD,
|
|
memCacheMD,
|
|
memSharedMD,
|
|
memUsedPercMD,
|
|
},
|
|
loadMemoryMetrics,
|
|
)
|
|
|
|
systemCPUMG := NewMetricsGroup(systemCPUCollectorPath,
|
|
[]MetricDescriptor{
|
|
sysCPUAvgIdleMD,
|
|
sysCPUAvgIOWaitMD,
|
|
sysCPULoadMD,
|
|
sysCPULoadPercMD,
|
|
sysCPUNiceMD,
|
|
sysCPUStealMD,
|
|
sysCPUSystemMD,
|
|
sysCPUUserMD,
|
|
},
|
|
loadCPUMetrics,
|
|
)
|
|
|
|
systemProcessMG := NewMetricsGroup(systemProcessCollectorPath,
|
|
[]MetricDescriptor{
|
|
processLocksReadTotalMD,
|
|
processLocksWriteTotalMD,
|
|
processCPUTotalSecondsMD,
|
|
processGoRoutineTotalMD,
|
|
processIORCharBytesMD,
|
|
processIOReadBytesMD,
|
|
processIOWCharBytesMD,
|
|
processIOWriteBytesMD,
|
|
processStarttimeSecondsMD,
|
|
processUptimeSecondsMD,
|
|
processFileDescriptorLimitTotalMD,
|
|
processFileDescriptorOpenTotalMD,
|
|
processSyscallReadTotalMD,
|
|
processSyscallWriteTotalMD,
|
|
processResidentMemoryBytesMD,
|
|
processVirtualMemoryBytesMD,
|
|
processVirtualMemoryMaxBytesMD,
|
|
},
|
|
loadProcessMetrics,
|
|
)
|
|
|
|
systemDriveMG := NewMetricsGroup(systemDriveCollectorPath,
|
|
[]MetricDescriptor{
|
|
driveUsedBytesMD,
|
|
driveFreeBytesMD,
|
|
driveTotalBytesMD,
|
|
driveUsedInodesMD,
|
|
driveFreeInodesMD,
|
|
driveTotalInodesMD,
|
|
driveTimeoutErrorsMD,
|
|
driveIOErrorsMD,
|
|
driveAvailabilityErrorsMD,
|
|
driveWaitingIOMD,
|
|
driveAPILatencyMD,
|
|
driveHealthMD,
|
|
|
|
driveOfflineCountMD,
|
|
driveOnlineCountMD,
|
|
driveCountMD,
|
|
|
|
// iostat related
|
|
driveReadsPerSecMD,
|
|
driveReadsKBPerSecMD,
|
|
driveReadsAwaitMD,
|
|
driveWritesPerSecMD,
|
|
driveWritesKBPerSecMD,
|
|
driveWritesAwaitMD,
|
|
drivePercUtilMD,
|
|
},
|
|
loadDriveMetrics,
|
|
)
|
|
|
|
clusterHealthMG := NewMetricsGroup(clusterHealthCollectorPath,
|
|
[]MetricDescriptor{
|
|
healthDrivesOfflineCountMD,
|
|
healthDrivesOnlineCountMD,
|
|
healthDrivesCountMD,
|
|
|
|
healthNodesOfflineCountMD,
|
|
healthNodesOnlineCountMD,
|
|
|
|
healthCapacityRawTotalBytesMD,
|
|
healthCapacityRawFreeBytesMD,
|
|
healthCapacityUsableTotalBytesMD,
|
|
healthCapacityUsableFreeBytesMD,
|
|
},
|
|
JoinLoaders(loadClusterHealthDriveMetrics,
|
|
loadClusterHealthNodeMetrics,
|
|
loadClusterHealthCapacityMetrics),
|
|
)
|
|
|
|
clusterUsageObjectsMG := NewMetricsGroup(clusterUsageObjectsCollectorPath,
|
|
[]MetricDescriptor{
|
|
usageSinceLastUpdateSecondsMD,
|
|
usageTotalBytesMD,
|
|
usageObjectsCountMD,
|
|
usageVersionsCountMD,
|
|
usageDeleteMarkersCountMD,
|
|
usageBucketsCountMD,
|
|
usageObjectsDistributionMD,
|
|
usageVersionsDistributionMD,
|
|
},
|
|
loadClusterUsageObjectMetrics,
|
|
)
|
|
|
|
clusterUsageBucketsMG := NewMetricsGroup(clusterUsageBucketsCollectorPath,
|
|
[]MetricDescriptor{
|
|
usageSinceLastUpdateSecondsMD,
|
|
usageBucketTotalBytesMD,
|
|
usageBucketObjectsTotalMD,
|
|
usageBucketVersionsCountMD,
|
|
usageBucketDeleteMarkersCountMD,
|
|
usageBucketQuotaTotalBytesMD,
|
|
usageBucketObjectSizeDistributionMD,
|
|
usageBucketObjectVersionCountDistributionMD,
|
|
},
|
|
loadClusterUsageBucketMetrics,
|
|
)
|
|
|
|
clusterErasureSetMG := NewMetricsGroup(clusterErasureSetCollectorPath,
|
|
[]MetricDescriptor{
|
|
erasureSetOverallWriteQuorumMD,
|
|
erasureSetOverallHealthMD,
|
|
erasureSetReadQuorumMD,
|
|
erasureSetWriteQuorumMD,
|
|
erasureSetOnlineDrivesCountMD,
|
|
erasureSetHealingDrivesCountMD,
|
|
erasureSetHealthMD,
|
|
erasureSetReadToleranceMD,
|
|
erasureSetWriteToleranceMD,
|
|
erasureSetReadHealthMD,
|
|
erasureSetWriteHealthMD,
|
|
},
|
|
loadClusterErasureSetMetrics,
|
|
)
|
|
|
|
clusterNotificationMG := NewMetricsGroup(notificationCollectorPath,
|
|
[]MetricDescriptor{
|
|
notificationCurrentSendInProgressMD,
|
|
notificationEventsErrorsTotalMD,
|
|
notificationEventsSentTotalMD,
|
|
notificationEventsSkippedTotalMD,
|
|
},
|
|
loadClusterNotificationMetrics,
|
|
)
|
|
|
|
clusterIAMMG := NewMetricsGroup(clusterIAMCollectorPath,
|
|
[]MetricDescriptor{
|
|
lastSyncDurationMillisMD,
|
|
pluginAuthnServiceFailedRequestsMinuteMD,
|
|
pluginAuthnServiceLastFailSecondsMD,
|
|
pluginAuthnServiceLastSuccSecondsMD,
|
|
pluginAuthnServiceSuccAvgRttMsMinuteMD,
|
|
pluginAuthnServiceSuccMaxRttMsMinuteMD,
|
|
pluginAuthnServiceTotalRequestsMinuteMD,
|
|
sinceLastSyncMillisMD,
|
|
syncFailuresMD,
|
|
syncSuccessesMD,
|
|
},
|
|
loadClusterIAMMetrics,
|
|
)
|
|
|
|
clusterReplicationMG := NewMetricsGroup(replicationCollectorPath,
|
|
[]MetricDescriptor{
|
|
replicationAverageActiveWorkersMD,
|
|
replicationAverageQueuedBytesMD,
|
|
replicationAverageQueuedCountMD,
|
|
replicationAverageDataTransferRateMD,
|
|
replicationCurrentActiveWorkersMD,
|
|
replicationCurrentDataTransferRateMD,
|
|
replicationLastMinuteQueuedBytesMD,
|
|
replicationLastMinuteQueuedCountMD,
|
|
replicationMaxActiveWorkersMD,
|
|
replicationMaxQueuedBytesMD,
|
|
replicationMaxQueuedCountMD,
|
|
replicationMaxDataTransferRateMD,
|
|
replicationRecentBacklogCountMD,
|
|
},
|
|
loadClusterReplicationMetrics,
|
|
)
|
|
|
|
clusterConfigMG := NewMetricsGroup(clusterConfigCollectorPath,
|
|
[]MetricDescriptor{
|
|
configRRSParityMD,
|
|
configStandardParityMD,
|
|
},
|
|
loadClusterConfigMetrics,
|
|
)
|
|
|
|
scannerMG := NewMetricsGroup(scannerCollectorPath,
|
|
[]MetricDescriptor{
|
|
scannerBucketScansFinishedMD,
|
|
scannerBucketScansStartedMD,
|
|
scannerDirectoriesScannedMD,
|
|
scannerObjectsScannedMD,
|
|
scannerVersionsScannedMD,
|
|
scannerLastActivitySecondsMD,
|
|
},
|
|
loadClusterScannerMetrics,
|
|
)
|
|
|
|
loggerWebhookMG := NewMetricsGroup(loggerWebhookCollectorPath,
|
|
[]MetricDescriptor{
|
|
webhookFailedMessagesMD,
|
|
webhookQueueLengthMD,
|
|
webhookTotalMessagesMD,
|
|
},
|
|
loadLoggerWebhookMetrics,
|
|
)
|
|
|
|
auditMG := NewMetricsGroup(auditCollectorPath,
|
|
[]MetricDescriptor{
|
|
auditFailedMessagesMD,
|
|
auditTargetQueueLengthMD,
|
|
auditTotalMessagesMD,
|
|
},
|
|
loadAuditMetrics,
|
|
)
|
|
|
|
ilmMG := NewMetricsGroup(ilmCollectorPath,
|
|
[]MetricDescriptor{
|
|
ilmExpiryPendingTasksMD,
|
|
ilmTransitionActiveTasksMD,
|
|
ilmTransitionPendingTasksMD,
|
|
ilmTransitionMissedImmediateTasksMD,
|
|
ilmVersionsScannedMD,
|
|
},
|
|
loadILMMetrics,
|
|
)
|
|
|
|
allMetricGroups := []*MetricsGroup{
|
|
apiRequestsMG,
|
|
bucketAPIMG,
|
|
bucketReplicationMG,
|
|
|
|
systemNetworkInternodeMG,
|
|
systemDriveMG,
|
|
systemMemoryMG,
|
|
systemCPUMG,
|
|
systemProcessMG,
|
|
|
|
clusterHealthMG,
|
|
clusterUsageObjectsMG,
|
|
clusterUsageBucketsMG,
|
|
clusterErasureSetMG,
|
|
clusterNotificationMG,
|
|
clusterIAMMG,
|
|
clusterReplicationMG,
|
|
clusterConfigMG,
|
|
|
|
ilmMG,
|
|
scannerMG,
|
|
auditMG,
|
|
loggerWebhookMG,
|
|
}
|
|
|
|
// Bucket metrics are special, they always include the bucket label. These
|
|
// metrics required a list of buckets to be passed to the loader, and the list
|
|
// of buckets is not known until the request is made. So we keep a separate
|
|
// map for bucket metrics and handle them specially.
|
|
|
|
// Add the serverName and poolIndex labels to all non-cluster metrics.
|
|
//
|
|
// Also create metric group maps and set the cache.
|
|
metricsCache := newMetricsCache()
|
|
mgMap := make(map[collectorPath]*MetricsGroup)
|
|
bucketMGMap := make(map[collectorPath]*MetricsGroup)
|
|
for _, mg := range allMetricGroups {
|
|
if !strings.HasPrefix(string(mg.CollectorPath), clusterBasePath) {
|
|
mg.AddExtraLabels(
|
|
serverName, globalLocalNodeName,
|
|
// poolIndex, strconv.Itoa(globalLocalPoolIdx),
|
|
)
|
|
}
|
|
mg.SetCache(metricsCache)
|
|
if mg.IsBucketMetricsGroup() {
|
|
bucketMGMap[mg.CollectorPath] = mg
|
|
} else {
|
|
mgMap[mg.CollectorPath] = mg
|
|
}
|
|
}
|
|
|
|
// Prepare to register the collectors. Other than `MetricGroup` collectors,
|
|
// we also have standard collectors like `GoCollector`.
|
|
|
|
// Create all Non-`MetricGroup` collectors here.
|
|
collectors := map[collectorPath]prometheus.Collector{
|
|
debugGoCollectorPath: collectors.NewGoCollector(),
|
|
}
|
|
|
|
// Add all `MetricGroup` collectors to the map.
|
|
for _, mg := range allMetricGroups {
|
|
collectors[mg.CollectorPath] = mg
|
|
}
|
|
|
|
// Helper function to register a collector and return a gatherer for it.
|
|
mustRegister := func(c ...prometheus.Collector) prometheus.Gatherer {
|
|
subRegistry := prometheus.NewRegistry()
|
|
for _, col := range c {
|
|
subRegistry.MustRegister(col)
|
|
}
|
|
r.MustRegister(subRegistry)
|
|
return subRegistry
|
|
}
|
|
|
|
// Register all collectors and create gatherers for them.
|
|
gatherers := make(map[collectorPath]prometheus.Gatherer, len(collectors))
|
|
collectorPaths := make([]collectorPath, 0, len(collectors))
|
|
for path, collector := range collectors {
|
|
gatherers[path] = mustRegister(collector)
|
|
collectorPaths = append(collectorPaths, path)
|
|
}
|
|
slices.Sort(collectorPaths)
|
|
return &metricsV3Collection{
|
|
mgMap: mgMap,
|
|
bucketMGMap: bucketMGMap,
|
|
mgGatherers: gatherers,
|
|
collectorPaths: collectorPaths,
|
|
}
|
|
}
|