Add additional info for replication metrics API (#17293)

to track the replication transfer rate across different nodes,
number of active workers in use and in-queue stats to get
an idea of the current workload.

This PR also adds replication metrics to the site replication
status API. For site replication, prometheus metrics are
no longer at the bucket level - but at the cluster level.

Add prometheus metric to track credential errors since uptime
This commit is contained in:
Poorna
2023-08-30 01:00:59 -07:00
committed by GitHub
parent cce90cb2b7
commit b48bbe08b2
31 changed files with 8779 additions and 743 deletions

View File

@@ -846,9 +846,11 @@ type sizeSummary struct {
versions uint64
deleteMarkers uint64
replicatedSize int64
replicatedCount int64
pendingSize int64
failedSize int64
replicaSize int64
replicaCount int64
pendingCount uint64
failedCount uint64
replTargetStats map[string]replTargetSizeSummary
@@ -857,11 +859,12 @@ type sizeSummary struct {
// replTargetSizeSummary holds summary of replication stats by target
type replTargetSizeSummary struct {
replicatedSize int64
pendingSize int64
failedSize int64
pendingCount uint64
failedCount uint64
replicatedSize int64
replicatedCount int64
pendingSize int64
failedSize int64
pendingCount uint64
failedCount uint64
}
type getSizeFn func(item scannerItem) (sizeSummary, error)
@@ -1286,13 +1289,16 @@ func (i *scannerItem) healReplication(ctx context.Context, o ObjectLayer, oi Obj
sizeS.failedCount++
case replication.Completed, replication.CompletedLegacy:
tgtSizeS.replicatedSize += oi.Size
tgtSizeS.replicatedCount++
sizeS.replicatedSize += oi.Size
sizeS.replicatedCount++
}
sizeS.replTargetStats[arn] = tgtSizeS
}
if oi.ReplicationStatus == replication.Replica {
sizeS.replicaSize += oi.Size
sizeS.replicaCount++
}
}