Add cluster and bucket replication metrics in metrics-v3 (#19546)

endpoint: /minio/metrics/v3/cluster/replication
metrics:
- average_active_workers
- average_queued_bytes
- average_queued_count
- average_transfer_rate
- current_active_workers
- current_transfer_rate
- last_minute_queued_bytes
- last_minute_queued_count
- max_active_workers
- max_queued_bytes
- max_queued_count
- max_transfer_rate
- recent_backlog_count

endpoint: /minio/metrics/v3/api/bucket/replication
metrics:
- last_hour_failed_bytes
- last_hour_failed_count
- last_minute_failed_bytes
- last_minute_failed_count
- latency_ms
- proxied_delete_tagging_requests_total
- proxied_get_requests_failures
- proxied_get_requests_total
- proxied_get_tagging_requests_failures
- proxied_get_tagging_requests_total
- proxied_head_requests_failures
- proxied_head_requests_total
- proxied_put_tagging_requests_failures
- proxied_put_tagging_requests_total
- sent_bytes
- sent_count
- total_failed_bytes
- total_failed_count
- proxied_delete_tagging_requests_failures
This commit is contained in:
Shireesh Anjal
2024-05-23 13:11:18 +05:30
committed by GitHub
parent 6d5bc045bc
commit 7981509cc8
6 changed files with 395 additions and 45 deletions

View File

@@ -35,7 +35,9 @@ import (
// for the bucket "mybucket" would be /minio/metrics/v3/bucket/api/mybucket
const (
apiRequestsCollectorPath collectorPath = "/api/requests"
apiBucketCollectorPath collectorPath = "/bucket/api"
bucketAPICollectorPath collectorPath = "/bucket/api"
bucketReplicationCollectorPath collectorPath = "/bucket/replication"
systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode"
systemDriveCollectorPath collectorPath = "/system/drive"
@@ -54,6 +56,7 @@ const (
auditCollectorPath collectorPath = "/audit"
loggerWebhookCollectorPath collectorPath = "/logger/webhook"
replicationCollectorPath collectorPath = "/replication"
)
const (
@@ -97,20 +100,45 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
loadAPIRequestsNetworkMetrics),
)
apiBucketMG := NewBucketMetricsGroup(apiBucketCollectorPath,
bucketAPIMG := NewBucketMetricsGroup(bucketAPICollectorPath,
[]MetricDescriptor{
apiBucketTrafficRecvBytesMD,
apiBucketTrafficSentBytesMD,
bucketAPITrafficRecvBytesMD,
bucketAPITrafficSentBytesMD,
apiBucketRequestsInFlightMD,
apiBucketRequestsTotalMD,
apiBucketRequestsCanceledMD,
apiBucketRequests4xxErrorsMD,
apiBucketRequests5xxErrorsMD,
bucketAPIRequestsInFlightMD,
bucketAPIRequestsTotalMD,
bucketAPIRequestsCanceledMD,
bucketAPIRequests4xxErrorsMD,
bucketAPIRequests5xxErrorsMD,
apiBucketRequestsTTFBSecondsDistributionMD,
bucketAPIRequestsTTFBSecondsDistributionMD,
},
JoinBucketLoaders(loadAPIBucketHTTPMetrics, loadAPIBucketTTFBMetrics),
JoinBucketLoaders(loadBucketAPIHTTPMetrics, loadBucketAPITTFBMetrics),
)
bucketReplicationMG := NewBucketMetricsGroup(bucketReplicationCollectorPath,
[]MetricDescriptor{
bucketReplLastHrFailedBytesMD,
bucketReplLastHrFailedCountMD,
bucketReplLastMinFailedBytesMD,
bucketReplLastMinFailedCountMD,
bucketReplLatencyMsMD,
bucketReplProxiedDeleteTaggingRequestsTotalMD,
bucketReplProxiedGetRequestsFailuresMD,
bucketReplProxiedGetRequestsTotalMD,
bucketReplProxiedGetTaggingRequestsFailuresMD,
bucketReplProxiedGetTaggingRequestsTotalMD,
bucketReplProxiedHeadRequestsFailuresMD,
bucketReplProxiedHeadRequestsTotalMD,
bucketReplProxiedPutTaggingRequestsFailuresMD,
bucketReplProxiedPutTaggingRequestsTotalMD,
bucketReplSentBytesMD,
bucketReplSentCountMD,
bucketReplTotalFailedBytesMD,
bucketReplTotalFailedCountMD,
bucketReplProxiedDeleteTaggingRequestsFailuresMD,
},
loadBucketReplicationMetrics,
)
systemNetworkInternodeMG := NewMetricsGroup(systemNetworkInternodeCollectorPath,
@@ -296,6 +324,24 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
loadClusterIAMMetrics,
)
clusterReplicationMG := NewMetricsGroup(replicationCollectorPath,
[]MetricDescriptor{
replicationAverageActiveWorkersMD,
replicationAverageQueuedBytesMD,
replicationAverageQueuedCountMD,
replicationAverageDataTransferRateMD,
replicationCurrentActiveWorkersMD,
replicationCurrentDataTransferRateMD,
replicationLastMinuteQueuedBytesMD,
replicationLastMinuteQueuedCountMD,
replicationMaxActiveWorkersMD,
replicationMaxQueuedBytesMD,
replicationMaxQueuedCountMD,
replicationMaxDataTransferRateMD,
},
loadClusterReplicationMetrics,
)
loggerWebhookMG := NewMetricsGroup(loggerWebhookCollectorPath,
[]MetricDescriptor{
webhookFailedMessagesMD,
@@ -316,7 +362,8 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
allMetricGroups := []*MetricsGroup{
apiRequestsMG,
apiBucketMG,
bucketAPIMG,
bucketReplicationMG,
systemNetworkInternodeMG,
systemDriveMG,
@@ -330,6 +377,7 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
clusterErasureSetMG,
clusterNotificationMG,
clusterIAMMG,
clusterReplicationMG,
auditMG,
loggerWebhookMG,