diff --git a/cmd/metrics-v2.go b/cmd/metrics-v2.go index 61f470f7e..535e272fa 100644 --- a/cmd/metrics-v2.go +++ b/cmd/metrics-v2.go @@ -1347,7 +1347,7 @@ func getNodeHealthMetrics() *MetricsGroup { return } metrics = make([]Metric, 0, 16) - nodesUp, nodesDown := GetPeerOnlineCount() + nodesUp, nodesDown := globalNotificationSys.GetPeerOnlineCount() metrics = append(metrics, Metric{ Description: getNodeOnlineTotalMD(), Value: float64(nodesUp), @@ -1932,11 +1932,9 @@ func (c *minioClusterCollector) Collect(out chan<- prometheus.Metric) { } // Call peer api to fetch metrics - peerCh := globalNotificationSys.GetClusterMetrics(GlobalContext) - selfCh := ReportMetrics(GlobalContext, c.metricsGroups) wg.Add(2) - go publish(peerCh) - go publish(selfCh) + go publish(ReportMetrics(GlobalContext, c.metricsGroups)) + go publish(globalNotificationSys.GetClusterMetrics(GlobalContext)) wg.Wait() } diff --git a/cmd/metrics.go b/cmd/metrics.go index 79225d363..80cd815e3 100644 --- a/cmd/metrics.go +++ b/cmd/metrics.go @@ -110,7 +110,7 @@ func nodeHealthMetricsPrometheus(ch chan<- prometheus.Metric) { return } - nodesUp, nodesDown := GetPeerOnlineCount() + nodesUp, nodesDown := globalNotificationSys.GetPeerOnlineCount() ch <- prometheus.MustNewConstMetric( prometheus.NewDesc( prometheus.BuildFQName(minioNamespace, "nodes", "online"), diff --git a/cmd/notification.go b/cmd/notification.go index 4566ae5ef..c697da8cb 100644 --- a/cmd/notification.go +++ b/cmd/notification.go @@ -1344,6 +1344,35 @@ func (sys *NotificationSys) restClientFromHash(s string) (client *peerRESTClient return peerClients[idx] } +// GetPeerOnlineCount gets the count of online and offline nodes. +func (sys *NotificationSys) GetPeerOnlineCount() (nodesOnline, nodesOffline int) { + nodesOnline = 1 // Self is always online. + nodesOffline = 0 + nodesOnlineIndex := make([]bool, len(sys.peerClients)) + var wg sync.WaitGroup + for idx, client := range sys.peerClients { + if client == nil { + continue + } + wg.Add(1) + go func(idx int, client *peerRESTClient) { + defer wg.Done() + nodesOnlineIndex[idx] = client.restClient.HealthCheckFn() + }(idx, client) + + } + wg.Wait() + + for _, online := range nodesOnlineIndex { + if online { + nodesOnline++ + } else { + nodesOffline++ + } + } + return +} + // NewNotificationSys - creates new notification system object. func NewNotificationSys(endpoints EndpointServerPools) *NotificationSys { // targetList/bucketRulesMap/bucketRemoteTargetRulesMap are populated by NotificationSys.Init() @@ -1358,21 +1387,6 @@ func NewNotificationSys(endpoints EndpointServerPools) *NotificationSys { } } -// GetPeerOnlineCount gets the count of online and offline nodes. -func GetPeerOnlineCount() (nodesOnline, nodesOffline int) { - nodesOnline = 1 // Self is always online. - nodesOffline = 0 - servers := globalNotificationSys.ServerInfo() - for _, s := range servers { - if s.State == string(madmin.ItemOnline) { - nodesOnline++ - continue - } - nodesOffline++ - } - return -} - type eventArgs struct { EventName event.Name BucketName string @@ -1524,7 +1538,7 @@ func (sys *NotificationSys) GetBandwidthReports(ctx context.Context, buckets ... } // GetClusterMetrics - gets the cluster metrics from all nodes excluding self. -func (sys *NotificationSys) GetClusterMetrics(ctx context.Context) chan Metric { +func (sys *NotificationSys) GetClusterMetrics(ctx context.Context) <-chan Metric { if sys == nil { return nil } @@ -1545,11 +1559,14 @@ func (sys *NotificationSys) GetClusterMetrics(ctx context.Context) chan Metric { ch := make(chan Metric) var wg sync.WaitGroup for index, err := range g.Wait() { - reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", - sys.peerClients[index].host.String()) - ctx := logger.SetReqInfo(ctx, reqInfo) if err != nil { - logger.LogOnceIf(ctx, err, sys.peerClients[index].host.String()) + if sys.peerClients[index] != nil { + reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", + sys.peerClients[index].host.String()) + logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), err, sys.peerClients[index].host.String()) + } else { + logger.LogOnceIf(ctx, err, "peer-offline") + } continue } wg.Add(1) diff --git a/cmd/peer-rest-server.go b/cmd/peer-rest-server.go index ea9f01223..0a551f201 100644 --- a/cmd/peer-rest-server.go +++ b/cmd/peer-rest-server.go @@ -1174,13 +1174,9 @@ func (s *peerRESTServer) GetPeerMetrics(w http.ResponseWriter, r *http.Request) s.writeErrorResponse(w, errors.New("invalid request")) } - doneCh := make(chan struct{}) - defer close(doneCh) - enc := gob.NewEncoder(w) - ch := ReportMetrics(r.Context(), peerMetricsGroups) - for m := range ch { + for m := range ReportMetrics(r.Context(), peerMetricsGroups) { if err := enc.Encode(m); err != nil { s.writeErrorResponse(w, errors.New("Encoding metric failed: "+err.Error())) return