metrics: Add replication latency metrics (#13515)

Add a new Prometheus metric for bucket replication latency

e.g.:
minio_bucket_replication_latency_ns{
    bucket="testbucket",
    operation="upload",
    range="LESS_THAN_1_MiB",
    server="127.0.0.1:9001",
    targetArn="arn:minio:replication::45da043c-14f5-4da4-9316-aba5f77bf730:testbucket"} 2.2015663e+07

Co-authored-by: Klaus Post <klauspost@gmail.com>
This commit is contained in:
Anis Elleuch
2021-11-17 21:10:57 +01:00
committed by GitHub
parent 5b68f8ea6a
commit 4caed7cc0d
11 changed files with 1237 additions and 34 deletions

View File

@@ -19,10 +19,46 @@ package cmd
import (
"sync/atomic"
"time"
)
//go:generate msgp -file $GOFILE
// ReplicationLatency holds information of bucket operations latency, such us uploads
type ReplicationLatency struct {
// Single & Multipart PUTs latency
UploadHistogram LastMinuteLatencies
}
// Merge two replication latency into a new one
func (rl ReplicationLatency) merge(other ReplicationLatency) (newReplLatency ReplicationLatency) {
newReplLatency.UploadHistogram = rl.UploadHistogram.Merge(other.UploadHistogram)
return
}
// Get upload latency of each object size range
func (rl ReplicationLatency) getUploadLatency() (ret map[string]uint64) {
ret = make(map[string]uint64)
avg := rl.UploadHistogram.GetAvg()
for k, v := range avg {
// Convert nanoseconds to milliseconds
ret[sizeTagToString(k)] = v.avg() / uint64(time.Millisecond)
}
return
}
// Update replication upload latency with a new value
func (rl *ReplicationLatency) update(size int64, duration time.Duration) {
rl.UploadHistogram.Add(size, duration)
}
// Clone replication latency
func (rl ReplicationLatency) clone() ReplicationLatency {
return ReplicationLatency{
UploadHistogram: rl.UploadHistogram.Clone(),
}
}
// BucketStats bucket statistics
type BucketStats struct {
ReplicationStats BucketReplicationStats
@@ -65,6 +101,7 @@ func (brs BucketReplicationStats) Clone() BucketReplicationStats {
FailedCount: atomic.LoadInt64(&st.FailedCount),
PendingSize: atomic.LoadInt64(&st.PendingSize),
PendingCount: atomic.LoadInt64(&st.PendingCount),
Latency: st.Latency.clone(),
}
}
// update total counts across targets
@@ -93,6 +130,8 @@ type BucketReplicationStat struct {
PendingCount int64 `json:"pendingReplicationCount"`
// Total number of failed operations including metadata updates
FailedCount int64 `json:"failedReplicationCount"`
// Replication latency information
Latency ReplicationLatency `json:"replicationLatency"`
}
func (bs *BucketReplicationStat) hasReplicationUsage() bool {