From 3bd3470d0b39fd624b2696431cf07e7bd3023c82 Mon Sep 17 00:00:00 2001 From: Shubhendu Date: Fri, 14 Jun 2024 03:56:54 +0530 Subject: [PATCH] Corrected names of node replication metrics (#19932) Signed-off-by: Shubhendu Ram Tripathi --- cmd/metrics-v2.go | 38 +++++++++--------- .../replication/minio-replication-node.json | 40 +++++++++---------- docs/metrics/prometheus/list.md | 38 +++++++++--------- 3 files changed, 58 insertions(+), 58 deletions(-) diff --git a/cmd/metrics-v2.go b/cmd/metrics-v2.go index 818ff3722..73b9b4197 100644 --- a/cmd/metrics-v2.go +++ b/cmd/metrics-v2.go @@ -817,7 +817,7 @@ func getClusterObjectVersionsMD() MetricDescription { func getClusterRepLinkLatencyCurrMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: currLinkLatency, Help: "Replication current link latency in milliseconds", @@ -827,7 +827,7 @@ func getClusterRepLinkLatencyCurrMD() MetricDescription { func getClusterRepLinkOnlineMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: linkOnline, Help: "Reports whether replication link is online (1) or offline(0)", @@ -837,7 +837,7 @@ func getClusterRepLinkOnlineMD() MetricDescription { func getClusterRepLinkCurrOfflineDurationMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: linkOfflineDuration, Help: "Duration of replication link being offline in seconds since last offline event", @@ -847,7 +847,7 @@ func getClusterRepLinkCurrOfflineDurationMD() MetricDescription { func getClusterRepLinkTotalOfflineDurationMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: linkDowntimeTotalDuration, Help: "Total downtime of replication link in seconds since server uptime", @@ -975,7 +975,7 @@ func getRepReceivedOperationsMD(namespace MetricNamespace) MetricDescription { func getClusterReplMRFFailedOperationsMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: recentBacklogCount, Help: "Total number of objects seen in replication backlog in the last 5 minutes", @@ -995,7 +995,7 @@ func getClusterRepCredentialErrorsMD(namespace MetricNamespace) MetricDescriptio func getClusterReplCurrQueuedOperationsMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: currInQueueCount, Help: "Total number of objects queued for replication in the last full minute", @@ -1005,7 +1005,7 @@ func getClusterReplCurrQueuedOperationsMD() MetricDescription { func getClusterReplCurrQueuedBytesMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: currInQueueBytes, Help: "Total number of bytes queued for replication in the last full minute", @@ -1015,7 +1015,7 @@ func getClusterReplCurrQueuedBytesMD() MetricDescription { func getClusterReplActiveWorkersCountMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: currActiveWorkers, Help: "Total number of active replication workers", @@ -1025,7 +1025,7 @@ func getClusterReplActiveWorkersCountMD() MetricDescription { func getClusterReplAvgActiveWorkersCountMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: avgActiveWorkers, Help: "Average number of active replication workers", @@ -1035,7 +1035,7 @@ func getClusterReplAvgActiveWorkersCountMD() MetricDescription { func getClusterReplMaxActiveWorkersCountMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: maxActiveWorkers, Help: "Maximum number of active replication workers seen since server uptime", @@ -1045,7 +1045,7 @@ func getClusterReplMaxActiveWorkersCountMD() MetricDescription { func getClusterReplCurrentTransferRateMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: currTransferRate, Help: "Current replication transfer rate in bytes/sec", @@ -1055,7 +1055,7 @@ func getClusterReplCurrentTransferRateMD() MetricDescription { func getClusterRepLinkLatencyMaxMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: maxLinkLatency, Help: "Maximum replication link latency in milliseconds seen since server uptime", @@ -1065,7 +1065,7 @@ func getClusterRepLinkLatencyMaxMD() MetricDescription { func getClusterRepLinkLatencyAvgMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: avgLinkLatency, Help: "Average replication link latency in milliseconds", @@ -1075,7 +1075,7 @@ func getClusterRepLinkLatencyAvgMD() MetricDescription { func getClusterReplAvgQueuedOperationsMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: avgInQueueCount, Help: "Average number of objects queued for replication since server uptime", @@ -1085,7 +1085,7 @@ func getClusterReplAvgQueuedOperationsMD() MetricDescription { func getClusterReplAvgQueuedBytesMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: avgInQueueBytes, Help: "Average number of bytes queued for replication since server uptime", @@ -1095,7 +1095,7 @@ func getClusterReplAvgQueuedBytesMD() MetricDescription { func getClusterReplMaxQueuedOperationsMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: maxInQueueCount, Help: "Maximum number of objects queued for replication since server uptime", @@ -1105,7 +1105,7 @@ func getClusterReplMaxQueuedOperationsMD() MetricDescription { func getClusterReplMaxQueuedBytesMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: maxInQueueBytes, Help: "Maximum number of bytes queued for replication since server uptime", @@ -1115,7 +1115,7 @@ func getClusterReplMaxQueuedBytesMD() MetricDescription { func getClusterReplAvgTransferRateMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: avgTransferRate, Help: "Average replication transfer rate in bytes/sec", @@ -1125,7 +1125,7 @@ func getClusterReplAvgTransferRateMD() MetricDescription { func getClusterReplMaxTransferRateMD() MetricDescription { return MetricDescription{ - Namespace: clusterMetricNamespace, + Namespace: nodeMetricNamespace, Subsystem: replicationSubsystem, Name: maxTransferRate, Help: "Maximum replication transfer rate in bytes/sec seen since server uptime", diff --git a/docs/metrics/prometheus/grafana/replication/minio-replication-node.json b/docs/metrics/prometheus/grafana/replication/minio-replication-node.json index 9feb5545c..c00c05b80 100644 --- a/docs/metrics/prometheus/grafana/replication/minio-replication-node.json +++ b/docs/metrics/prometheus/grafana/replication/minio-replication-node.json @@ -162,7 +162,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server) (minio_cluster_replication_average_active_workers{job=\"$scrape_jobs\"})", + "expr": "sum by (server) (minio_node_replication_average_active_workers{job=\"$scrape_jobs\"})", "interval": "1m", "intervalFactor": 2, "legendFormat": "{{server}}", @@ -290,7 +290,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server, endpoint) (minio_cluster_replication_average_link_latency_ms{job=\"$scrape_jobs\"})", + "expr": "sum by (server, endpoint) (minio_node_replication_average_link_latency_ms{job=\"$scrape_jobs\"})", "interval": "1m", "intervalFactor": 2, "legendFormat": "{{server,endpoint}}", @@ -418,7 +418,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server) (minio_cluster_replication_average_queued_bytes{job=\"$scrape_jobs\"})", + "expr": "sum by (server) (minio_node_replication_average_queued_bytes{job=\"$scrape_jobs\"})", "interval": "1m", "intervalFactor": 2, "legendFormat": "{{server}}", @@ -546,7 +546,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server) (minio_cluster_replication_average_queued_count{job=\"$scrape_jobs\"})", + "expr": "sum by (server) (minio_node_replication_average_queued_count{job=\"$scrape_jobs\"})", "interval": "1m", "intervalFactor": 2, "legendFormat": "{{server}}", @@ -674,7 +674,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server) (minio_cluster_replication_average_transfer_rate{job=\"$scrape_jobs\"})", + "expr": "sum by (server) (minio_node_replication_average_transfer_rate{job=\"$scrape_jobs\"})", "interval": "1m", "intervalFactor": 2, "legendFormat": "{{server}}", @@ -802,7 +802,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server) (minio_cluster_replication_current_active_workers{job=\"$scrape_jobs\"})", + "expr": "sum by (server) (minio_node_replication_current_active_workers{job=\"$scrape_jobs\"})", "interval": "1m", "intervalFactor": 2, "legendFormat": "{{server}}", @@ -930,7 +930,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server,endpoint) (minio_cluster_replication_current_link_latency_ms{job=\"$scrape_jobs\"})", + "expr": "sum by (server,endpoint) (minio_node_replication_current_link_latency_ms{job=\"$scrape_jobs\"})", "interval": "1m", "intervalFactor": 2, "legendFormat": "{{server}}", @@ -1058,7 +1058,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server) (minio_cluster_replication_current_transfer_rate{job=\"$scrape_jobs\"})", + "expr": "sum by (server) (minio_node_replication_current_transfer_rate{job=\"$scrape_jobs\"})", "interval": "1m", "intervalFactor": 2, "legendFormat": "{{server}}", @@ -1186,7 +1186,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server) (minio_cluster_replication_last_minute_queued_bytes{job=\"$scrape_jobs\"})", + "expr": "sum by (server) (minio_node_replication_last_minute_queued_bytes{job=\"$scrape_jobs\"})", "interval": "1m", "intervalFactor": 2, "legendFormat": "{{server}}", @@ -1314,7 +1314,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server) (minio_cluster_replication_last_minute_queued_count{job=\"$scrape_jobs\"})", + "expr": "sum by (server) (minio_node_replication_last_minute_queued_count{job=\"$scrape_jobs\"})", "interval": "1m", "intervalFactor": 2, "legendFormat": "{{bucket}}", @@ -1442,7 +1442,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server,endpoint) (minio_cluster_replication_link_downtime_duration_seconds{job=\"$scrape_jobs\"})", + "expr": "sum by (server,endpoint) (minio_node_replication_link_downtime_duration_seconds{job=\"$scrape_jobs\"})", "interval": "1m", "intervalFactor": 2, "legendFormat": "{{server,endpoint}}", @@ -1540,7 +1540,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server,endpoint) (minio_cluster_replication_link_offline_duration_seconds{job=\"$scrape_jobs\"})", + "expr": "sum by (server,endpoint) (minio_node_replication_link_offline_duration_seconds{job=\"$scrape_jobs\"})", "interval": "1m", "legendFormat": "{{server,endpoint}}", "refId": "A" @@ -1637,7 +1637,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "minio_cluster_replication_max_active_workers{job=\"$scrape_jobs\"}", + "expr": "minio_node_replication_max_active_workers{job=\"$scrape_jobs\"}", "interval": "1m", "legendFormat": "{{server}}", "refId": "A" @@ -1734,7 +1734,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server,endpoibt) (minio_cluster_replication_max_link_latency_ms{job=\"$scrape_jobs\"})", + "expr": "sum by (server,endpoibt) (minio_node_replication_max_link_latency_ms{job=\"$scrape_jobs\"})", "interval": "1m", "legendFormat": "{{server,endpoint}}", "refId": "A" @@ -1831,7 +1831,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "minio_cluster_replication_max_queued_bytes{job=\"$scrape_jobs\"}", + "expr": "minio_node_replication_max_queued_bytes{job=\"$scrape_jobs\"}", "interval": "1m", "legendFormat": "{{server}}", "refId": "A" @@ -1928,7 +1928,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "minio_cluster_replication_max_queued_count{job=\"$scrape_jobs\"}", + "expr": "minio_node_replication_max_queued_count{job=\"$scrape_jobs\"}", "interval": "1m", "legendFormat": "{{server}}", "refId": "A" @@ -2025,7 +2025,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "minio_cluster_replication_max_transfer_rate{job=\"$scrape_jobs\"}", + "expr": "minio_node_replication_max_transfer_rate{job=\"$scrape_jobs\"}", "interval": "1m", "legendFormat": "{{server}}", "refId": "A" @@ -2122,7 +2122,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "minio_cluster_replication_recent_backlog_count{job=\"$scrape_jobs\"}", + "expr": "minio_node_replication_recent_backlog_count{job=\"$scrape_jobs\"}", "interval": "1m", "legendFormat": "{{server}}", "refId": "A" @@ -2219,7 +2219,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "sum by (server,endpoint) (minio_cluster_replication_link_online{job=\"$scrape_jobs\"})", + "expr": "sum by (server,endpoint) (minio_node_replication_link_online{job=\"$scrape_jobs\"})", "interval": "1m", "legendFormat": "{{endpoint}}", "refId": "A" @@ -2316,7 +2316,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "minio_cluster_replication_link_offline_duration_seconds{job=\"$scrape_jobs\"}", + "expr": "minio_node_replication_link_offline_duration_seconds{job=\"$scrape_jobs\"}", "interval": "1m", "legendFormat": "{{endpoint}}", "refId": "A" diff --git a/docs/metrics/prometheus/list.md b/docs/metrics/prometheus/list.md index 17c4bd771..51d3b897e 100644 --- a/docs/metrics/prometheus/list.md +++ b/docs/metrics/prometheus/list.md @@ -114,25 +114,25 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc | Name | Description |:-----------------------------------------------------------|:---------------------------------------------------------------------------------------------------------| -| `minio_cluster_replication_current_active_workers` | Total number of active replication workers | -| `minio_cluster_replication_average_active_workers` | Average number of active replication workers | -| `minio_cluster_replication_max_active_workers` | Maximum number of active replication workers seen since server start | -| `minio_cluster_replication_link_online` | Reports whether the replication link is online (1) or offline (0). | -| `minio_cluster_replication_link_offline_duration_seconds` | Total duration of replication link being offline in seconds since last offline event | -| `minio_cluster_replication_link_downtime_duration_seconds` | Total downtime of replication link in seconds since server start | -| `minio_cluster_replication_average_link_latency_ms` | Average replication link latency in milliseconds | -| `minio_cluster_replication_max_link_latency_ms` | Maximum replication link latency in milliseconds seen since server start | -| `minio_cluster_replication_current_link_latency_ms` | Current replication link latency in milliseconds | -| `minio_cluster_replication_current_transfer_rate` | Current replication transfer rate in bytes/sec | -| `minio_cluster_replication_average_transfer_rate` | Average replication transfer rate in bytes/sec | -| `minio_cluster_replication_max_transfer_rate` | Maximum replication transfer rate in bytes/sec seen since server start | -| `minio_cluster_replication_last_minute_queued_count` | Total number of objects queued for replication in the last full minute | -| `minio_cluster_replication_last_minute_queued_bytes` | Total number of bytes queued for replication in the last full minute | -| `minio_cluster_replication_average_queued_count` | Average number of objects queued for replication since server start | -| `minio_cluster_replication_average_queued_bytes` | Average number of bytes queued for replication since server start | -| `minio_cluster_replication_max_queued_bytes` | Maximum number of bytes queued for replication seen since server start | -| `minio_cluster_replication_max_queued_count` | Maximum number of objects queued for replication seen since server start | -| `minio_cluster_replication_recent_backlog_count` | Total number of objects seen in replication backlog in the last 5 minutes | +| `minio_node_replication_current_active_workers` | Total number of active replication workers | +| `minio_node_replication_average_active_workers` | Average number of active replication workers | +| `minio_node_replication_max_active_workers` | Maximum number of active replication workers seen since server start | +| `minio_node_replication_link_online` | Reports whether the replication link is online (1) or offline (0). | +| `minio_node_replication_link_offline_duration_seconds` | Total duration of replication link being offline in seconds since last offline event | +| `minio_node_replication_link_downtime_duration_seconds` | Total downtime of replication link in seconds since server start | +| `minio_node_replication_average_link_latency_ms` | Average replication link latency in milliseconds | +| `minio_node_replication_max_link_latency_ms` | Maximum replication link latency in milliseconds seen since server start | +| `minio_node_replication_current_link_latency_ms` | Current replication link latency in milliseconds | +| `minio_node_replication_current_transfer_rate` | Current replication transfer rate in bytes/sec | +| `minio_node_replication_average_transfer_rate` | Average replication transfer rate in bytes/sec | +| `minio_node_replication_max_transfer_rate` | Maximum replication transfer rate in bytes/sec seen since server start | +| `minio_node_replication_last_minute_queued_count` | Total number of objects queued for replication in the last full minute | +| `minio_node_replication_last_minute_queued_bytes` | Total number of bytes queued for replication in the last full minute | +| `minio_node_replication_average_queued_count` | Average number of objects queued for replication since server start | +| `minio_node_replication_average_queued_bytes` | Average number of bytes queued for replication since server start | +| `minio_node_replication_max_queued_bytes` | Maximum number of bytes queued for replication seen since server start | +| `minio_node_replication_max_queued_count` | Maximum number of objects queued for replication seen since server start | +| `minio_node_replication_recent_backlog_count` | Total number of objects seen in replication backlog in the last 5 minutes | ## Healing Metrics