mirror of
https://github.com/minio/minio.git
synced 2025-05-22 18:11:50 -04:00
Add cluster and bucket replication metrics in metrics-v3 (#19546)
endpoint: /minio/metrics/v3/cluster/replication metrics: - average_active_workers - average_queued_bytes - average_queued_count - average_transfer_rate - current_active_workers - current_transfer_rate - last_minute_queued_bytes - last_minute_queued_count - max_active_workers - max_queued_bytes - max_queued_count - max_transfer_rate - recent_backlog_count endpoint: /minio/metrics/v3/api/bucket/replication metrics: - last_hour_failed_bytes - last_hour_failed_count - last_minute_failed_bytes - last_minute_failed_count - latency_ms - proxied_delete_tagging_requests_total - proxied_get_requests_failures - proxied_get_requests_total - proxied_get_tagging_requests_failures - proxied_get_tagging_requests_total - proxied_head_requests_failures - proxied_head_requests_total - proxied_put_tagging_requests_failures - proxied_put_tagging_requests_total - sent_bytes - sent_count - total_failed_bytes - total_failed_count - proxied_delete_tagging_requests_failures
This commit is contained in:
parent
6d5bc045bc
commit
7981509cc8
@ -144,33 +144,33 @@ func loadAPIRequestsNetworkMetrics(ctx context.Context, m MetricValues, _ *metri
|
|||||||
|
|
||||||
// Metric Descriptions for bucket level S3 metrics.
|
// Metric Descriptions for bucket level S3 metrics.
|
||||||
var (
|
var (
|
||||||
apiBucketTrafficSentBytesMD = NewCounterMD(apiTrafficSentBytes,
|
bucketAPITrafficSentBytesMD = NewCounterMD(apiTrafficSentBytes,
|
||||||
"Total number of bytes received for a bucket", "bucket", "type")
|
"Total number of bytes received for a bucket", "bucket", "type")
|
||||||
apiBucketTrafficRecvBytesMD = NewCounterMD(apiTrafficRecvBytes,
|
bucketAPITrafficRecvBytesMD = NewCounterMD(apiTrafficRecvBytes,
|
||||||
"Total number of bytes sent for a bucket", "bucket", "type")
|
"Total number of bytes sent for a bucket", "bucket", "type")
|
||||||
|
|
||||||
apiBucketRequestsInFlightMD = NewGaugeMD(apiRequestsInFlightTotal,
|
bucketAPIRequestsInFlightMD = NewGaugeMD(apiRequestsInFlightTotal,
|
||||||
"Total number of requests currently in flight for a bucket", "bucket", "name", "type")
|
"Total number of requests currently in flight for a bucket", "bucket", "name", "type")
|
||||||
apiBucketRequestsTotalMD = NewCounterMD(apiRequestsTotal,
|
bucketAPIRequestsTotalMD = NewCounterMD(apiRequestsTotal,
|
||||||
"Total number of requests for a bucket", "bucket", "name", "type")
|
"Total number of requests for a bucket", "bucket", "name", "type")
|
||||||
apiBucketRequestsCanceledMD = NewCounterMD(apiRequestsCanceledTotal,
|
bucketAPIRequestsCanceledMD = NewCounterMD(apiRequestsCanceledTotal,
|
||||||
"Total number of requests canceled by the client for a bucket", "bucket", "name", "type")
|
"Total number of requests canceled by the client for a bucket", "bucket", "name", "type")
|
||||||
apiBucketRequests4xxErrorsMD = NewCounterMD(apiRequests4xxErrorsTotal,
|
bucketAPIRequests4xxErrorsMD = NewCounterMD(apiRequests4xxErrorsTotal,
|
||||||
"Total number of requests with 4xx errors for a bucket", "bucket", "name", "type")
|
"Total number of requests with 4xx errors for a bucket", "bucket", "name", "type")
|
||||||
apiBucketRequests5xxErrorsMD = NewCounterMD(apiRequests5xxErrorsTotal,
|
bucketAPIRequests5xxErrorsMD = NewCounterMD(apiRequests5xxErrorsTotal,
|
||||||
"Total number of requests with 5xx errors for a bucket", "bucket", "name", "type")
|
"Total number of requests with 5xx errors for a bucket", "bucket", "name", "type")
|
||||||
|
|
||||||
apiBucketRequestsTTFBSecondsDistributionMD = NewCounterMD(apiRequestsTTFBSecondsDistribution,
|
bucketAPIRequestsTTFBSecondsDistributionMD = NewCounterMD(apiRequestsTTFBSecondsDistribution,
|
||||||
"Distribution of time to first byte across API calls for a bucket",
|
"Distribution of time to first byte across API calls for a bucket",
|
||||||
"bucket", "name", "le", "type")
|
"bucket", "name", "le", "type")
|
||||||
)
|
)
|
||||||
|
|
||||||
// loadAPIBucketHTTPMetrics - loads bucket level S3 HTTP metrics.
|
// loadBucketAPIHTTPMetrics - loads bucket level S3 HTTP metrics.
|
||||||
//
|
//
|
||||||
// This is a `MetricsLoaderFn`.
|
// This is a `MetricsLoaderFn`.
|
||||||
//
|
//
|
||||||
// This includes bucket level S3 HTTP metrics and S3 network in/out metrics.
|
// This includes bucket level S3 HTTP metrics and S3 network in/out metrics.
|
||||||
func loadAPIBucketHTTPMetrics(ctx context.Context, m MetricValues, _ *metricsCache, buckets []string) error {
|
func loadBucketAPIHTTPMetrics(ctx context.Context, m MetricValues, _ *metricsCache, buckets []string) error {
|
||||||
if len(buckets) == 0 {
|
if len(buckets) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -209,10 +209,10 @@ func loadAPIBucketHTTPMetrics(ctx context.Context, m MetricValues, _ *metricsCac
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// loadAPIBucketTTFBMetrics - loads bucket S3 TTFB metrics.
|
// loadBucketAPITTFBMetrics - loads bucket S3 TTFB metrics.
|
||||||
//
|
//
|
||||||
// This is a `MetricsLoaderFn`.
|
// This is a `MetricsLoaderFn`.
|
||||||
func loadAPIBucketTTFBMetrics(ctx context.Context, m MetricValues, _ *metricsCache, buckets []string) error {
|
func loadBucketAPITTFBMetrics(ctx context.Context, m MetricValues, _ *metricsCache, buckets []string) error {
|
||||||
renameLabels := map[string]string{"api": "name"}
|
renameLabels := map[string]string{"api": "name"}
|
||||||
m.SetHistogram(apiRequestsTTFBSecondsDistribution, bucketHTTPRequestsDuration, renameLabels,
|
m.SetHistogram(apiRequestsTTFBSecondsDistribution, bucketHTTPRequestsDuration, renameLabels,
|
||||||
buckets, "type", "s3")
|
buckets, "type", "s3")
|
||||||
|
155
cmd/metrics-v3-bucket-replication.go
Normal file
155
cmd/metrics-v3-bucket-replication.go
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
// Copyright (c) 2015-2024 MinIO, Inc.
|
||||||
|
//
|
||||||
|
// This file is part of MinIO Object Storage stack
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Affero General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Affero General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
bucketReplLastHrFailedBytes = "last_hour_failed_bytes"
|
||||||
|
bucketReplLastHrFailedCount = "last_hour_failed_count"
|
||||||
|
bucketReplLastMinFailedBytes = "last_minute_failed_bytes"
|
||||||
|
bucketReplLastMinFailedCount = "last_minute_failed_count"
|
||||||
|
bucketReplLatencyMs = "latency_ms"
|
||||||
|
bucketReplProxiedDeleteTaggingRequestsTotal = "proxied_delete_tagging_requests_total"
|
||||||
|
bucketReplProxiedGetRequestsFailures = "proxied_get_requests_failures"
|
||||||
|
bucketReplProxiedGetRequestsTotal = "proxied_get_requests_total"
|
||||||
|
bucketReplProxiedGetTaggingRequestsFailures = "proxied_get_tagging_requests_failures"
|
||||||
|
bucketReplProxiedGetTaggingRequestsTotal = "proxied_get_tagging_requests_total"
|
||||||
|
bucketReplProxiedHeadRequestsFailures = "proxied_head_requests_failures"
|
||||||
|
bucketReplProxiedHeadRequestsTotal = "proxied_head_requests_total"
|
||||||
|
bucketReplProxiedPutTaggingRequestsFailures = "proxied_put_tagging_requests_failures"
|
||||||
|
bucketReplProxiedPutTaggingRequestsTotal = "proxied_put_tagging_requests_total"
|
||||||
|
bucketReplSentBytes = "sent_bytes"
|
||||||
|
bucketReplSentCount = "sent_count"
|
||||||
|
bucketReplTotalFailedBytes = "total_failed_bytes"
|
||||||
|
bucketReplTotalFailedCount = "total_failed_count"
|
||||||
|
bucketReplProxiedDeleteTaggingRequestsFailures = "proxied_delete_tagging_requests_failures"
|
||||||
|
bucketL = "bucket"
|
||||||
|
operationL = "operation"
|
||||||
|
targetArnL = "targetArn"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
bucketReplLastHrFailedBytesMD = NewGaugeMD(bucketReplLastHrFailedBytes,
|
||||||
|
"Total number of bytes failed at least once to replicate in the last hour on a bucket",
|
||||||
|
bucketL)
|
||||||
|
bucketReplLastHrFailedCountMD = NewGaugeMD(bucketReplLastHrFailedCount,
|
||||||
|
"Total number of objects which failed replication in the last hour on a bucket",
|
||||||
|
bucketL)
|
||||||
|
bucketReplLastMinFailedBytesMD = NewGaugeMD(bucketReplLastMinFailedBytes,
|
||||||
|
"Total number of bytes failed at least once to replicate in the last full minute on a bucket",
|
||||||
|
bucketL)
|
||||||
|
bucketReplLastMinFailedCountMD = NewGaugeMD(bucketReplLastMinFailedCount,
|
||||||
|
"Total number of objects which failed replication in the last full minute on a bucket",
|
||||||
|
bucketL)
|
||||||
|
bucketReplLatencyMsMD = NewGaugeMD(bucketReplLatencyMs,
|
||||||
|
"Replication latency on a bucket in milliseconds",
|
||||||
|
bucketL, operationL, rangeL, targetArnL)
|
||||||
|
bucketReplProxiedDeleteTaggingRequestsTotalMD = NewCounterMD(bucketReplProxiedDeleteTaggingRequestsTotal,
|
||||||
|
"Number of DELETE tagging requests proxied to replication target",
|
||||||
|
bucketL)
|
||||||
|
bucketReplProxiedGetRequestsFailuresMD = NewCounterMD(bucketReplProxiedGetRequestsFailures,
|
||||||
|
"Number of failures in GET requests proxied to replication target",
|
||||||
|
bucketL)
|
||||||
|
bucketReplProxiedGetRequestsTotalMD = NewCounterMD(bucketReplProxiedGetRequestsTotal,
|
||||||
|
"Number of GET requests proxied to replication target",
|
||||||
|
bucketL)
|
||||||
|
bucketReplProxiedGetTaggingRequestsFailuresMD = NewCounterMD(bucketReplProxiedGetTaggingRequestsFailures,
|
||||||
|
"Number of failures in GET tagging requests proxied to replication target",
|
||||||
|
bucketL)
|
||||||
|
bucketReplProxiedGetTaggingRequestsTotalMD = NewCounterMD(bucketReplProxiedGetTaggingRequestsTotal,
|
||||||
|
"Number of GET tagging requests proxied to replication target",
|
||||||
|
bucketL)
|
||||||
|
bucketReplProxiedHeadRequestsFailuresMD = NewCounterMD(bucketReplProxiedHeadRequestsFailures,
|
||||||
|
"Number of failures in HEAD requests proxied to replication target",
|
||||||
|
bucketL)
|
||||||
|
bucketReplProxiedHeadRequestsTotalMD = NewCounterMD(bucketReplProxiedHeadRequestsTotal,
|
||||||
|
"Number of HEAD requests proxied to replication target",
|
||||||
|
bucketL)
|
||||||
|
bucketReplProxiedPutTaggingRequestsFailuresMD = NewCounterMD(bucketReplProxiedPutTaggingRequestsFailures,
|
||||||
|
"Number of failures in PUT tagging requests proxied to replication target",
|
||||||
|
bucketL)
|
||||||
|
bucketReplProxiedPutTaggingRequestsTotalMD = NewCounterMD(bucketReplProxiedPutTaggingRequestsTotal,
|
||||||
|
"Number of PUT tagging requests proxied to replication target",
|
||||||
|
bucketL)
|
||||||
|
bucketReplSentBytesMD = NewCounterMD(bucketReplSentBytes,
|
||||||
|
"Total number of bytes replicated to the target",
|
||||||
|
bucketL)
|
||||||
|
bucketReplSentCountMD = NewCounterMD(bucketReplSentCount,
|
||||||
|
"Total number of objects replicated to the target",
|
||||||
|
bucketL)
|
||||||
|
bucketReplTotalFailedBytesMD = NewCounterMD(bucketReplTotalFailedBytes,
|
||||||
|
"Total number of bytes failed at least once to replicate since server start",
|
||||||
|
bucketL)
|
||||||
|
bucketReplTotalFailedCountMD = NewCounterMD(bucketReplTotalFailedCount,
|
||||||
|
"Total number of objects which failed replication since server start",
|
||||||
|
bucketL)
|
||||||
|
bucketReplProxiedDeleteTaggingRequestsFailuresMD = NewCounterMD(bucketReplProxiedDeleteTaggingRequestsFailures,
|
||||||
|
"Number of failures in DELETE tagging requests proxied to replication target",
|
||||||
|
bucketL)
|
||||||
|
)
|
||||||
|
|
||||||
|
// loadBucketReplicationMetrics - `BucketMetricsLoaderFn` for bucket replication metrics
|
||||||
|
// such as latency and sent bytes.
|
||||||
|
func loadBucketReplicationMetrics(ctx context.Context, m MetricValues, c *metricsCache, buckets []string) error {
|
||||||
|
if globalSiteReplicationSys.isEnabled() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
dataUsageInfo, err := c.dataUsageInfo.Get()
|
||||||
|
if err != nil {
|
||||||
|
metricsLogIf(ctx, err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
bucketReplStats := globalReplicationStats.getAllLatest(dataUsageInfo.BucketsUsage)
|
||||||
|
for _, bucket := range buckets {
|
||||||
|
labels := []string{bucketL, bucket}
|
||||||
|
if s, ok := bucketReplStats[bucket]; ok {
|
||||||
|
stats := s.ReplicationStats
|
||||||
|
if stats.hasReplicationUsage() {
|
||||||
|
for arn, stat := range stats.Stats {
|
||||||
|
m.Set(bucketReplLastHrFailedBytes, float64(stat.Failed.LastHour.Bytes), labels...)
|
||||||
|
m.Set(bucketReplLastHrFailedCount, float64(stat.Failed.LastHour.Count), labels...)
|
||||||
|
m.Set(bucketReplLastMinFailedBytes, float64(stat.Failed.LastMinute.Bytes), labels...)
|
||||||
|
m.Set(bucketReplLastMinFailedCount, float64(stat.Failed.LastMinute.Count), labels...)
|
||||||
|
m.Set(bucketReplProxiedDeleteTaggingRequestsTotal, float64(s.ProxyStats.RmvTagTotal), labels...)
|
||||||
|
m.Set(bucketReplProxiedGetRequestsFailures, float64(s.ProxyStats.GetFailedTotal), labels...)
|
||||||
|
m.Set(bucketReplProxiedGetRequestsTotal, float64(s.ProxyStats.GetTotal), labels...)
|
||||||
|
m.Set(bucketReplProxiedGetTaggingRequestsFailures, float64(s.ProxyStats.GetTagFailedTotal), labels...)
|
||||||
|
m.Set(bucketReplProxiedGetTaggingRequestsTotal, float64(s.ProxyStats.GetTagTotal), labels...)
|
||||||
|
m.Set(bucketReplProxiedHeadRequestsFailures, float64(s.ProxyStats.HeadFailedTotal), labels...)
|
||||||
|
m.Set(bucketReplProxiedHeadRequestsTotal, float64(s.ProxyStats.HeadTotal), labels...)
|
||||||
|
m.Set(bucketReplProxiedPutTaggingRequestsFailures, float64(s.ProxyStats.PutTagFailedTotal), labels...)
|
||||||
|
m.Set(bucketReplProxiedPutTaggingRequestsTotal, float64(s.ProxyStats.PutTagTotal), labels...)
|
||||||
|
m.Set(bucketReplSentCount, float64(stat.ReplicatedCount), labels...)
|
||||||
|
m.Set(bucketReplTotalFailedBytes, float64(stat.Failed.Totals.Bytes), labels...)
|
||||||
|
m.Set(bucketReplTotalFailedCount, float64(stat.Failed.Totals.Count), labels...)
|
||||||
|
m.Set(bucketReplProxiedDeleteTaggingRequestsFailures, float64(s.ProxyStats.RmvTagFailedTotal), labels...)
|
||||||
|
m.Set(bucketReplSentBytes, float64(stat.ReplicatedSize), labels...)
|
||||||
|
|
||||||
|
SetHistogramValues(m, bucketReplLatencyMs, stat.Latency.getUploadLatency(), bucketL, bucket, operationL, "upload", targetArnL, arn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
96
cmd/metrics-v3-replication.go
Normal file
96
cmd/metrics-v3-replication.go
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
// Copyright (c) 2015-2024 MinIO, Inc.
|
||||||
|
//
|
||||||
|
// This file is part of MinIO Object Storage stack
|
||||||
|
//
|
||||||
|
// This program is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Affero General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Affero General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Affero General Public License
|
||||||
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
replicationAverageActiveWorkers = "average_active_workers"
|
||||||
|
replicationAverageQueuedBytes = "average_queued_bytes"
|
||||||
|
replicationAverageQueuedCount = "average_queued_count"
|
||||||
|
replicationAverageDataTransferRate = "average_data_transfer_rate"
|
||||||
|
replicationCurrentActiveWorkers = "current_active_workers"
|
||||||
|
replicationCurrentDataTransferRate = "current_data_transfer_rate"
|
||||||
|
replicationLastMinuteQueuedBytes = "last_minute_queued_bytes"
|
||||||
|
replicationLastMinuteQueuedCount = "last_minute_queued_count"
|
||||||
|
replicationMaxActiveWorkers = "max_active_workers"
|
||||||
|
replicationMaxQueuedBytes = "max_queued_bytes"
|
||||||
|
replicationMaxQueuedCount = "max_queued_count"
|
||||||
|
replicationMaxDataTransferRate = "max_data_transfer_rate"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
replicationAverageActiveWorkersMD = NewGaugeMD(replicationAverageActiveWorkers,
|
||||||
|
"Average number of active replication workers")
|
||||||
|
replicationAverageQueuedBytesMD = NewGaugeMD(replicationAverageQueuedBytes,
|
||||||
|
"Average number of bytes queued for replication since server start")
|
||||||
|
replicationAverageQueuedCountMD = NewGaugeMD(replicationAverageQueuedCount,
|
||||||
|
"Average number of objects queued for replication since server start")
|
||||||
|
replicationAverageDataTransferRateMD = NewGaugeMD(replicationAverageDataTransferRate,
|
||||||
|
"Average replication data transfer rate in bytes/sec")
|
||||||
|
replicationCurrentActiveWorkersMD = NewGaugeMD(replicationCurrentActiveWorkers,
|
||||||
|
"Total number of active replication workers")
|
||||||
|
replicationCurrentDataTransferRateMD = NewGaugeMD(replicationCurrentDataTransferRate,
|
||||||
|
"Current replication data transfer rate in bytes/sec")
|
||||||
|
replicationLastMinuteQueuedBytesMD = NewGaugeMD(replicationLastMinuteQueuedBytes,
|
||||||
|
"Number of bytes queued for replication in the last full minute")
|
||||||
|
replicationLastMinuteQueuedCountMD = NewGaugeMD(replicationLastMinuteQueuedCount,
|
||||||
|
"Number of objects queued for replication in the last full minute")
|
||||||
|
replicationMaxActiveWorkersMD = NewGaugeMD(replicationMaxActiveWorkers,
|
||||||
|
"Maximum number of active replication workers seen since server start")
|
||||||
|
replicationMaxQueuedBytesMD = NewGaugeMD(replicationMaxQueuedBytes,
|
||||||
|
"Maximum number of bytes queued for replication since server start")
|
||||||
|
replicationMaxQueuedCountMD = NewGaugeMD(replicationMaxQueuedCount,
|
||||||
|
"Maximum number of objects queued for replication since server start")
|
||||||
|
replicationMaxDataTransferRateMD = NewGaugeMD(replicationMaxDataTransferRate,
|
||||||
|
"Maximum replication data transfer rate in bytes/sec seen since server start")
|
||||||
|
)
|
||||||
|
|
||||||
|
// loadClusterReplicationMetrics - `MetricsLoaderFn` for cluster replication metrics
|
||||||
|
// such as transfer rate and objects queued.
|
||||||
|
func loadClusterReplicationMetrics(ctx context.Context, m MetricValues, c *metricsCache) error {
|
||||||
|
if globalReplicationStats == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
qs := globalReplicationStats.getNodeQueueStatsSummary()
|
||||||
|
|
||||||
|
qt := qs.QStats
|
||||||
|
m.Set(replicationAverageQueuedBytes, float64(qt.Avg.Bytes))
|
||||||
|
m.Set(replicationAverageQueuedCount, float64(qt.Avg.Count))
|
||||||
|
m.Set(replicationMaxQueuedBytes, float64(qt.Max.Bytes))
|
||||||
|
m.Set(replicationMaxQueuedCount, float64(qt.Max.Count))
|
||||||
|
m.Set(replicationLastMinuteQueuedBytes, float64(qt.Curr.Bytes))
|
||||||
|
m.Set(replicationLastMinuteQueuedCount, float64(qt.Curr.Count))
|
||||||
|
|
||||||
|
qa := qs.ActiveWorkers
|
||||||
|
m.Set(replicationAverageActiveWorkers, float64(qa.Avg))
|
||||||
|
m.Set(replicationCurrentActiveWorkers, float64(qa.Curr))
|
||||||
|
m.Set(replicationMaxActiveWorkers, float64(qa.Max))
|
||||||
|
|
||||||
|
if len(qs.XferStats) > 0 {
|
||||||
|
tots := qs.XferStats[Total]
|
||||||
|
m.Set(replicationAverageDataTransferRate, tots.Avg)
|
||||||
|
m.Set(replicationCurrentDataTransferRate, tots.Curr)
|
||||||
|
m.Set(replicationMaxDataTransferRate, tots.Peak)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
@ -72,6 +72,8 @@ const (
|
|||||||
GaugeMT
|
GaugeMT
|
||||||
// HistogramMT - represents a histogram metric.
|
// HistogramMT - represents a histogram metric.
|
||||||
HistogramMT
|
HistogramMT
|
||||||
|
// rangeL - represents a range label.
|
||||||
|
rangeL = "range"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (mt MetricType) String() string {
|
func (mt MetricType) String() string {
|
||||||
@ -225,7 +227,7 @@ func (m *MetricValues) Set(name MetricName, value float64, labels ...string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(labels)/2 != len(validLabels) {
|
if len(labels)/2 != len(validLabels) {
|
||||||
panic(fmt.Sprintf("not all labels were given values"))
|
panic("not all labels were given values")
|
||||||
}
|
}
|
||||||
|
|
||||||
v, ok := m.values[name]
|
v, ok := m.values[name]
|
||||||
@ -284,6 +286,14 @@ func (m *MetricValues) SetHistogram(name MetricName, hist *prometheus.HistogramV
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetHistogramValues - sets values for the given MetricName using the provided map of
|
||||||
|
// range to value.
|
||||||
|
func SetHistogramValues[V uint64 | int64 | float64](m MetricValues, name MetricName, values map[string]V, labels ...string) {
|
||||||
|
for rng, val := range values {
|
||||||
|
m.Set(name, float64(val), append(labels, rangeL, rng)...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// MetricsLoaderFn - represents a function to load metrics from the
|
// MetricsLoaderFn - represents a function to load metrics from the
|
||||||
// metricsCache.
|
// metricsCache.
|
||||||
//
|
//
|
||||||
|
@ -35,7 +35,9 @@ import (
|
|||||||
// for the bucket "mybucket" would be /minio/metrics/v3/bucket/api/mybucket
|
// for the bucket "mybucket" would be /minio/metrics/v3/bucket/api/mybucket
|
||||||
const (
|
const (
|
||||||
apiRequestsCollectorPath collectorPath = "/api/requests"
|
apiRequestsCollectorPath collectorPath = "/api/requests"
|
||||||
apiBucketCollectorPath collectorPath = "/bucket/api"
|
|
||||||
|
bucketAPICollectorPath collectorPath = "/bucket/api"
|
||||||
|
bucketReplicationCollectorPath collectorPath = "/bucket/replication"
|
||||||
|
|
||||||
systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode"
|
systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode"
|
||||||
systemDriveCollectorPath collectorPath = "/system/drive"
|
systemDriveCollectorPath collectorPath = "/system/drive"
|
||||||
@ -54,6 +56,7 @@ const (
|
|||||||
|
|
||||||
auditCollectorPath collectorPath = "/audit"
|
auditCollectorPath collectorPath = "/audit"
|
||||||
loggerWebhookCollectorPath collectorPath = "/logger/webhook"
|
loggerWebhookCollectorPath collectorPath = "/logger/webhook"
|
||||||
|
replicationCollectorPath collectorPath = "/replication"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -97,20 +100,45 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
|||||||
loadAPIRequestsNetworkMetrics),
|
loadAPIRequestsNetworkMetrics),
|
||||||
)
|
)
|
||||||
|
|
||||||
apiBucketMG := NewBucketMetricsGroup(apiBucketCollectorPath,
|
bucketAPIMG := NewBucketMetricsGroup(bucketAPICollectorPath,
|
||||||
[]MetricDescriptor{
|
[]MetricDescriptor{
|
||||||
apiBucketTrafficRecvBytesMD,
|
bucketAPITrafficRecvBytesMD,
|
||||||
apiBucketTrafficSentBytesMD,
|
bucketAPITrafficSentBytesMD,
|
||||||
|
|
||||||
apiBucketRequestsInFlightMD,
|
bucketAPIRequestsInFlightMD,
|
||||||
apiBucketRequestsTotalMD,
|
bucketAPIRequestsTotalMD,
|
||||||
apiBucketRequestsCanceledMD,
|
bucketAPIRequestsCanceledMD,
|
||||||
apiBucketRequests4xxErrorsMD,
|
bucketAPIRequests4xxErrorsMD,
|
||||||
apiBucketRequests5xxErrorsMD,
|
bucketAPIRequests5xxErrorsMD,
|
||||||
|
|
||||||
apiBucketRequestsTTFBSecondsDistributionMD,
|
bucketAPIRequestsTTFBSecondsDistributionMD,
|
||||||
},
|
},
|
||||||
JoinBucketLoaders(loadAPIBucketHTTPMetrics, loadAPIBucketTTFBMetrics),
|
JoinBucketLoaders(loadBucketAPIHTTPMetrics, loadBucketAPITTFBMetrics),
|
||||||
|
)
|
||||||
|
|
||||||
|
bucketReplicationMG := NewBucketMetricsGroup(bucketReplicationCollectorPath,
|
||||||
|
[]MetricDescriptor{
|
||||||
|
bucketReplLastHrFailedBytesMD,
|
||||||
|
bucketReplLastHrFailedCountMD,
|
||||||
|
bucketReplLastMinFailedBytesMD,
|
||||||
|
bucketReplLastMinFailedCountMD,
|
||||||
|
bucketReplLatencyMsMD,
|
||||||
|
bucketReplProxiedDeleteTaggingRequestsTotalMD,
|
||||||
|
bucketReplProxiedGetRequestsFailuresMD,
|
||||||
|
bucketReplProxiedGetRequestsTotalMD,
|
||||||
|
bucketReplProxiedGetTaggingRequestsFailuresMD,
|
||||||
|
bucketReplProxiedGetTaggingRequestsTotalMD,
|
||||||
|
bucketReplProxiedHeadRequestsFailuresMD,
|
||||||
|
bucketReplProxiedHeadRequestsTotalMD,
|
||||||
|
bucketReplProxiedPutTaggingRequestsFailuresMD,
|
||||||
|
bucketReplProxiedPutTaggingRequestsTotalMD,
|
||||||
|
bucketReplSentBytesMD,
|
||||||
|
bucketReplSentCountMD,
|
||||||
|
bucketReplTotalFailedBytesMD,
|
||||||
|
bucketReplTotalFailedCountMD,
|
||||||
|
bucketReplProxiedDeleteTaggingRequestsFailuresMD,
|
||||||
|
},
|
||||||
|
loadBucketReplicationMetrics,
|
||||||
)
|
)
|
||||||
|
|
||||||
systemNetworkInternodeMG := NewMetricsGroup(systemNetworkInternodeCollectorPath,
|
systemNetworkInternodeMG := NewMetricsGroup(systemNetworkInternodeCollectorPath,
|
||||||
@ -296,6 +324,24 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
|||||||
loadClusterIAMMetrics,
|
loadClusterIAMMetrics,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
clusterReplicationMG := NewMetricsGroup(replicationCollectorPath,
|
||||||
|
[]MetricDescriptor{
|
||||||
|
replicationAverageActiveWorkersMD,
|
||||||
|
replicationAverageQueuedBytesMD,
|
||||||
|
replicationAverageQueuedCountMD,
|
||||||
|
replicationAverageDataTransferRateMD,
|
||||||
|
replicationCurrentActiveWorkersMD,
|
||||||
|
replicationCurrentDataTransferRateMD,
|
||||||
|
replicationLastMinuteQueuedBytesMD,
|
||||||
|
replicationLastMinuteQueuedCountMD,
|
||||||
|
replicationMaxActiveWorkersMD,
|
||||||
|
replicationMaxQueuedBytesMD,
|
||||||
|
replicationMaxQueuedCountMD,
|
||||||
|
replicationMaxDataTransferRateMD,
|
||||||
|
},
|
||||||
|
loadClusterReplicationMetrics,
|
||||||
|
)
|
||||||
|
|
||||||
loggerWebhookMG := NewMetricsGroup(loggerWebhookCollectorPath,
|
loggerWebhookMG := NewMetricsGroup(loggerWebhookCollectorPath,
|
||||||
[]MetricDescriptor{
|
[]MetricDescriptor{
|
||||||
webhookFailedMessagesMD,
|
webhookFailedMessagesMD,
|
||||||
@ -316,7 +362,8 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
|||||||
|
|
||||||
allMetricGroups := []*MetricsGroup{
|
allMetricGroups := []*MetricsGroup{
|
||||||
apiRequestsMG,
|
apiRequestsMG,
|
||||||
apiBucketMG,
|
bucketAPIMG,
|
||||||
|
bucketReplicationMG,
|
||||||
|
|
||||||
systemNetworkInternodeMG,
|
systemNetworkInternodeMG,
|
||||||
systemDriveMG,
|
systemDriveMG,
|
||||||
@ -330,6 +377,7 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
|||||||
clusterErasureSetMG,
|
clusterErasureSetMG,
|
||||||
clusterNotificationMG,
|
clusterNotificationMG,
|
||||||
clusterIAMMG,
|
clusterIAMMG,
|
||||||
|
clusterReplicationMG,
|
||||||
|
|
||||||
auditMG,
|
auditMG,
|
||||||
loggerWebhookMG,
|
loggerWebhookMG,
|
||||||
|
@ -31,7 +31,7 @@ These are metrics about requests served by the (current) node.
|
|||||||
| Path | Description |
|
| Path | Description |
|
||||||
|-----------------|--------------------------------------------------|
|
|-----------------|--------------------------------------------------|
|
||||||
| `/api/requests` | Metrics over all requests |
|
| `/api/requests` | Metrics over all requests |
|
||||||
| `/api/bucket` | Metrics over all requests split by bucket labels |
|
| `/bucket/api` | Metrics over all requests for a given bucket |
|
||||||
| | |
|
| | |
|
||||||
|
|
||||||
### Audit metrics
|
### Audit metrics
|
||||||
@ -122,6 +122,30 @@ The standard metrics group for GoCollector is not shown below.
|
|||||||
| `minio_bucket_api_5xx_errors_total` | `counter` | Total number of requests with 5xx errors for a bucket | `bucket,name,type,server,pool_index` |
|
| `minio_bucket_api_5xx_errors_total` | `counter` | Total number of requests with 5xx errors for a bucket | `bucket,name,type,server,pool_index` |
|
||||||
| `minio_bucket_api_ttfb_seconds_distribution` | `counter` | Distribution of time to first byte across API calls for a bucket | `bucket,name,le,type,server,pool_index` |
|
| `minio_bucket_api_ttfb_seconds_distribution` | `counter` | Distribution of time to first byte across API calls for a bucket | `bucket,name,le,type,server,pool_index` |
|
||||||
|
|
||||||
|
### `/bucket/replication`
|
||||||
|
|
||||||
|
| Name | Type | Help | Labels |
|
||||||
|
|---------------------------------------------------------------------|-----------|---------------------------------------------------------------------------------------------|-------------------------------------------|
|
||||||
|
| `minio_bucket_replication_last_hour_failed_bytes` | `gauge` | Total number of bytes failed at least once to replicate in the last hour on a bucket | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_last_hour_failed_count` | `gauge` | Total number of objects which failed replication in the last hour on a bucket | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_last_minute_failed_bytes` | `gauge` | Total number of bytes failed at least once to replicate in the last full minute on a bucket | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_last_minute_failed_count` | `gauge` | Total number of objects which failed replication in the last full minute on a bucket | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_latency_ms` | `gauge` | Replication latency on a bucket in milliseconds | `bucket,operation,range,targetArn,server` |
|
||||||
|
| `minio_bucket_replication_proxied_delete_tagging_requests_total` | `counter` | Number of DELETE tagging requests proxied to replication target | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_proxied_get_requests_failures` | `counter` | Number of failures in GET requests proxied to replication target | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_proxied_get_requests_total` | `counter` | Number of GET requests proxied to replication target | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_proxied_get_tagging_requests_failures` | `counter` | Number of failures in GET tagging requests proxied to replication target | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_proxied_get_tagging_requests_total` | `counter` | Number of GET tagging requests proxied to replication target | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_proxied_head_requests_failures` | `counter` | Number of failures in HEAD requests proxied to replication target | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_proxied_head_requests_total` | `counter` | Number of HEAD requests proxied to replication target | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_proxied_put_tagging_requests_failures` | `counter` | Number of failures in PUT tagging requests proxied to replication target | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_proxied_put_tagging_requests_total` | `counter` | Number of PUT tagging requests proxied to replication target | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_sent_bytes` | `counter` | Total number of bytes replicated to the target | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_sent_count` | `counter` | Total number of objects replicated to the target | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_total_failed_bytes` | `counter` | Total number of bytes failed at least once to replicate since server start | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_total_failed_count` | `counter` | Total number of objects which failed replication since server start | `bucket,server` |
|
||||||
|
| `minio_bucket_replication_proxied_delete_tagging_requests_failures` | `counter` | Number of failures in DELETE tagging requests proxied to replication target | `bucket,server` |
|
||||||
|
|
||||||
### `/audit`
|
### `/audit`
|
||||||
|
|
||||||
| Name | Type | Help | Labels |
|
| Name | Type | Help | Labels |
|
||||||
@ -195,25 +219,25 @@ The standard metrics group for GoCollector is not shown below.
|
|||||||
|
|
||||||
### `/system/process`
|
### `/system/process`
|
||||||
|
|
||||||
| Name | Type | Help | Labels |
|
| Name | Type | Help | Labels |
|
||||||
|-------------------------------|-----------|----------------------------------------------------------------------------------------------------------------|----------|
|
|----------------------------------------------------|-----------|----------------------------------------------------------------------------------------------------------------|----------|
|
||||||
| `locks_read_total` | `gauge` | Number of current READ locks on this peer | `server` |
|
| `minio_system_process_locks_read_total` | `gauge` | Number of current READ locks on this peer | `server` |
|
||||||
| `locks_write_total` | `gauge` | Number of current WRITE locks on this peer | `server` |
|
| `minio_system_process_locks_write_total` | `gauge` | Number of current WRITE locks on this peer | `server` |
|
||||||
| `cpu_total_seconds` | `counter` | Total user and system CPU time spent in seconds | `server` |
|
| `minio_system_process_cpu_total_seconds` | `counter` | Total user and system CPU time spent in seconds | `server` |
|
||||||
| `go_routine_total` | `gauge` | Total number of go routines running | `server` |
|
| `minio_system_process_go_routine_total` | `gauge` | Total number of go routines running | `server` |
|
||||||
| `io_rchar_bytes` | `counter` | Total bytes read by the process from the underlying storage system including cache, /proc/[pid]/io rchar | `server` |
|
| `minio_system_process_io_rchar_bytes` | `counter` | Total bytes read by the process from the underlying storage system including cache, /proc/[pid]/io rchar | `server` |
|
||||||
| `io_read_bytes` | `counter` | Total bytes read by the process from the underlying storage system, /proc/[pid]/io read_bytes | `server` |
|
| `minio_system_process_io_read_bytes` | `counter` | Total bytes read by the process from the underlying storage system, /proc/[pid]/io read_bytes | `server` |
|
||||||
| `io_wchar_bytes` | `counter` | Total bytes written by the process to the underlying storage system including page cache, /proc/[pid]/io wchar | `server` |
|
| `minio_system_process_io_wchar_bytes` | `counter` | Total bytes written by the process to the underlying storage system including page cache, /proc/[pid]/io wchar | `server` |
|
||||||
| `io_write_bytes` | `counter` | Total bytes written by the process to the underlying storage system, /proc/[pid]/io write_bytes | `server` |
|
| `minio_system_process_io_write_bytes` | `counter` | Total bytes written by the process to the underlying storage system, /proc/[pid]/io write_bytes | `server` |
|
||||||
| `start_time_seconds` | `gauge` | Start time for MinIO process in seconds since Unix epoc | `server` |
|
| `minio_system_process_start_time_seconds` | `gauge` | Start time for MinIO process in seconds since Unix epoc | `server` |
|
||||||
| `uptime_seconds` | `gauge` | Uptime for MinIO process in seconds | `server` |
|
| `minio_system_process_uptime_seconds` | `gauge` | Uptime for MinIO process in seconds | `server` |
|
||||||
| `file_descriptor_limit_total` | `gauge` | Limit on total number of open file descriptors for the MinIO Server process | `server` |
|
| `minio_system_process_file_descriptor_limit_total` | `gauge` | Limit on total number of open file descriptors for the MinIO Server process | `server` |
|
||||||
| `file_descriptor_open_total` | `gauge` | Total number of open file descriptors by the MinIO Server process | `server` |
|
| `minio_system_process_file_descriptor_open_total` | `gauge` | Total number of open file descriptors by the MinIO Server process | `server` |
|
||||||
| `syscall_read_total` | `counter` | Total read SysCalls to the kernel. /proc/[pid]/io syscr | `server` |
|
| `minio_system_process_syscall_read_total` | `counter` | Total read SysCalls to the kernel. /proc/[pid]/io syscr | `server` |
|
||||||
| `syscall_write_total` | `counter` | Total write SysCalls to the kernel. /proc/[pid]/io syscw | `server` |
|
| `minio_system_process_syscall_write_total` | `counter` | Total write SysCalls to the kernel. /proc/[pid]/io syscw | `server` |
|
||||||
| `resident_memory_bytes` | `gauge` | Resident memory size in bytes | `server` |
|
| `minio_system_process_resident_memory_bytes` | `gauge` | Resident memory size in bytes | `server` |
|
||||||
| `virtual_memory_bytes` | `gauge` | Virtual memory size in bytes | `server` |
|
| `minio_system_process_virtual_memory_bytes` | `gauge` | Virtual memory size in bytes | `server` |
|
||||||
| `virtual_memory_max_bytes` | `gauge` | Maximum virtual memory size in bytes | `server` |
|
| `minio_system_process_virtual_memory_max_bytes` | `gauge` | Maximum virtual memory size in bytes | `server` |
|
||||||
|
|
||||||
### `/cluster/health`
|
### `/cluster/health`
|
||||||
|
|
||||||
@ -302,3 +326,20 @@ The standard metrics group for GoCollector is not shown below.
|
|||||||
| `minio_logger_webhook_failed_messages` | `counter` | Number of messages that failed to send | `server,name,endpoint` |
|
| `minio_logger_webhook_failed_messages` | `counter` | Number of messages that failed to send | `server,name,endpoint` |
|
||||||
| `minio_logger_webhook_queue_length` | `gauge` | Webhook queue length | `server,name,endpoint` |
|
| `minio_logger_webhook_queue_length` | `gauge` | Webhook queue length | `server,name,endpoint` |
|
||||||
| `minio_logger_webhook_total_message` | `counter` | Total number of messages sent to this target | `server,name,endpoint` |
|
| `minio_logger_webhook_total_message` | `counter` | Total number of messages sent to this target | `server,name,endpoint` |
|
||||||
|
|
||||||
|
### `/replication`
|
||||||
|
|
||||||
|
| Name | Type | Help | Labels |
|
||||||
|
|---------------------------------------------------|---------|-----------------------------------------------------------------------------|----------|
|
||||||
|
| `minio_replication_average_active_workers` | `gauge` | Average number of active replication workers | `server` |
|
||||||
|
| `minio_replication_average_queued_bytes` | `gauge` | Average number of bytes queued for replication since server start | `server` |
|
||||||
|
| `minio_replication_average_queued_count` | `gauge` | Average number of objects queued for replication since server start | `server` |
|
||||||
|
| `minio_replication_average_data_transfer_rate` | `gauge` | Average replication data transfer rate in bytes/sec | `server` |
|
||||||
|
| `minio_replication_current_active_workers` | `gauge` | Total number of active replication workers | `server` |
|
||||||
|
| `minio_replication_current_data_transfer_rate` | `gauge` | Current replication data transfer rate in bytes/sec | `server` |
|
||||||
|
| `minio_replication_last_minute_queued_bytes` | `gauge` | Number of bytes queued for replication in the last full minute | `server` |
|
||||||
|
| `minio_replication_last_minute_queued_count` | `gauge` | Number of objects queued for replication in the last full minute | `server` |
|
||||||
|
| `minio_replication_max_active_workers` | `gauge` | Maximum number of active replication workers seen since server start | `server` |
|
||||||
|
| `minio_replication_max_queued_bytes` | `gauge` | Maximum number of bytes queued for replication since server start | `server` |
|
||||||
|
| `minio_replication_max_queued_count` | `gauge` | Maximum number of objects queued for replication since server start | `server` |
|
||||||
|
| `minio_replication_max_data_transfer_rate` | `gauge` | Maximum replication data transfer rate in bytes/sec seen since server start | `server` |
|
||||||
|
Loading…
x
Reference in New Issue
Block a user