Add additional info for replication metrics API (#17293)

to track the replication transfer rate across different nodes,
number of active workers in use and in-queue stats to get
an idea of the current workload.

This PR also adds replication metrics to the site replication
status API. For site replication, prometheus metrics are
no longer at the bucket level - but at the cluster level.

Add prometheus metric to track credential errors since uptime
This commit is contained in:
Poorna
2023-08-30 01:00:59 -07:00
committed by GitHub
parent cce90cb2b7
commit b48bbe08b2
31 changed files with 8779 additions and 743 deletions

View File

@@ -49,6 +49,9 @@ type replicatedTargetInfo struct {
VersionPurgeStatus VersionPurgeStatusType
ResyncTimestamp string
ReplicationResynced bool // true only if resync attempted for this target
endpoint string
secure bool
Err error // replication error if any
}
// Empty returns true for a target if arn is empty
@@ -320,7 +323,7 @@ func parseReplicateDecision(ctx context.Context, bucket, s string) (r ReplicateD
if err != nil {
return r, err
}
tgtClnt := globalBucketTargetSys.GetRemoteTargetClient(ctx, slc[0])
tgtClnt := globalBucketTargetSys.GetRemoteTargetClient(slc[0])
if tgtClnt == nil {
// Skip stale targets if any and log them to be missing atleast once.
logger.LogOnceIf(ctx, fmt.Errorf("failed to get target for bucket:%s arn:%s", bucket, slc[0]), slc[0])
@@ -800,6 +803,7 @@ type MRFReplicateEntry struct {
Object string `json:"object" msg:"o"`
versionID string `json:"-"`
RetryCount int `json:"retryCount" msg:"rc"`
sz int64 `json:"-"`
}
// MRFReplicateEntries has the map of MRF entries to save to disk
@@ -814,17 +818,7 @@ func (ri ReplicateObjectInfo) ToMRFEntry() MRFReplicateEntry {
Bucket: ri.Bucket,
Object: ri.Name,
versionID: ri.VersionID,
sz: ri.Size,
RetryCount: int(ri.RetryCount),
}
}
func getReplicationStatsPath() string {
return bucketMetaPrefix + SlashSeparator + replicationDir + SlashSeparator + "replication.stats"
}
const (
replStatsMetaFormat = 1
replStatsVersionV1 = 1
replStatsVersion = replStatsVersionV1
replStatsSaveInterval = time.Minute * 5
)