mirror of
https://github.com/minio/minio.git
synced 2025-11-07 21:02:58 -05:00
fix: make metrics endpoint responsive by reducing the chatter (#15055)
peerOnlineCounter was making NxN calls to many peers, this can be really long and tedious if there are random servers that are going down. Instead we should calculate online peers from the point of view of "self" and return those online and offline appropriately by performing a healthcheck.
This commit is contained in:
@@ -1347,7 +1347,7 @@ func getNodeHealthMetrics() *MetricsGroup {
|
||||
return
|
||||
}
|
||||
metrics = make([]Metric, 0, 16)
|
||||
nodesUp, nodesDown := GetPeerOnlineCount()
|
||||
nodesUp, nodesDown := globalNotificationSys.GetPeerOnlineCount()
|
||||
metrics = append(metrics, Metric{
|
||||
Description: getNodeOnlineTotalMD(),
|
||||
Value: float64(nodesUp),
|
||||
@@ -1932,11 +1932,9 @@ func (c *minioClusterCollector) Collect(out chan<- prometheus.Metric) {
|
||||
}
|
||||
|
||||
// Call peer api to fetch metrics
|
||||
peerCh := globalNotificationSys.GetClusterMetrics(GlobalContext)
|
||||
selfCh := ReportMetrics(GlobalContext, c.metricsGroups)
|
||||
wg.Add(2)
|
||||
go publish(peerCh)
|
||||
go publish(selfCh)
|
||||
go publish(ReportMetrics(GlobalContext, c.metricsGroups))
|
||||
go publish(globalNotificationSys.GetClusterMetrics(GlobalContext))
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user