diff --git a/cmd/bucket-stats.go b/cmd/bucket-stats.go
index d691af93b..a51fbf41d 100644
--- a/cmd/bucket-stats.go
+++ b/cmd/bucket-stats.go
@@ -20,6 +20,7 @@ package cmd
import (
"fmt"
"math"
+ "sync/atomic"
"time"
"github.com/minio/madmin-go/v3"
@@ -127,8 +128,7 @@ func (l *ReplicationLastHour) getTotal() AccElem {
// forwardTo time t, clearing any entries in between.
func (l *ReplicationLastHour) forwardTo(t int64) {
- tMin := t / 60
- if l.LastMin >= tMin {
+ if l.LastMin >= t {
return
}
if t-l.LastMin >= 60 {
@@ -314,6 +314,9 @@ func (r *ReplicationStats) getNodeQueueStats(bucket string) (qs ReplQNodeStats)
qs.XferStats = make(map[RMetricName]XferStats)
qs.QStats = r.qCache.getBucketStats(bucket)
qs.TgtXferStats = make(map[string]map[RMetricName]XferStats)
+ qs.MRFStats = ReplicationMRFStats{
+ LastFailedCount: atomic.LoadUint64(&r.mrfStats.LastFailedCount),
+ }
r.RLock()
defer r.RUnlock()
@@ -402,7 +405,9 @@ func (r *ReplicationStats) getNodeQueueStatsSummary() (qs ReplQNodeStats) {
qs.ActiveWorkers = globalReplicationStats.ActiveWorkers()
qs.XferStats = make(map[RMetricName]XferStats)
qs.QStats = r.qCache.getSiteStats()
-
+ qs.MRFStats = ReplicationMRFStats{
+ LastFailedCount: atomic.LoadUint64(&r.mrfStats.LastFailedCount),
+ }
r.RLock()
defer r.RUnlock()
tx := newXferStats()
diff --git a/cmd/metrics-v3-replication.go b/cmd/metrics-v3-replication.go
index 1961c3304..da26e0956 100644
--- a/cmd/metrics-v3-replication.go
+++ b/cmd/metrics-v3-replication.go
@@ -34,6 +34,7 @@ const (
replicationMaxQueuedBytes = "max_queued_bytes"
replicationMaxQueuedCount = "max_queued_count"
replicationMaxDataTransferRate = "max_data_transfer_rate"
+ replicationRecentBacklogCount = "recent_backlog_count"
)
var (
@@ -61,6 +62,8 @@ var (
"Maximum number of objects queued for replication since server start")
replicationMaxDataTransferRateMD = NewGaugeMD(replicationMaxDataTransferRate,
"Maximum replication data transfer rate in bytes/sec seen since server start")
+ replicationRecentBacklogCountMD = NewGaugeMD(replicationRecentBacklogCount,
+ "Total number of objects seen in replication backlog in the last 5 minutes")
)
// loadClusterReplicationMetrics - `MetricsLoaderFn` for cluster replication metrics
@@ -91,6 +94,7 @@ func loadClusterReplicationMetrics(ctx context.Context, m MetricValues, c *metri
m.Set(replicationCurrentDataTransferRate, tots.Curr)
m.Set(replicationMaxDataTransferRate, tots.Peak)
}
+ m.Set(replicationRecentBacklogCount, float64(qs.MRFStats.LastFailedCount))
return nil
}
diff --git a/cmd/metrics-v3.go b/cmd/metrics-v3.go
index d00a447d3..93749258c 100644
--- a/cmd/metrics-v3.go
+++ b/cmd/metrics-v3.go
@@ -341,6 +341,7 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
replicationMaxQueuedBytesMD,
replicationMaxQueuedCountMD,
replicationMaxDataTransferRateMD,
+ replicationRecentBacklogCountMD,
},
loadClusterReplicationMetrics,
)
diff --git a/docs/metrics/v3.md b/docs/metrics/v3.md
index 8cbf46517..1805232de 100644
--- a/docs/metrics/v3.md
+++ b/docs/metrics/v3.md
@@ -275,7 +275,7 @@ Metrics about MinIO site and bucket replication.
| `minio_replication_max_queued_bytes` | Maximum number of bytes queued for replication since server start.
Type: gauge | `server` |
| `minio_replication_max_queued_count` | Maximum number of objects queued for replication since server start.
Type: gauge | `server` |
| `minio_replication_max_data_transfer_rate` | Maximum replication data transfer rate in bytes/sec since server start.
Type: gauge | `server` |
-
+| `minio_replication_recent_backlog_count` | Total number of objects seen in replication backlog in the last 5 minutes
Type: gauge | `server` |
#### `/bucket/replication`
| Name | Description | Labels |