mirror of
https://github.com/minio/minio.git
synced 2025-11-07 12:52:58 -05:00
Add support for resource metrics (#18057)
Add a new endpoint for "resource" metrics `/v2/metrics/resource` This should return system metrics related to drives, network, CPU and memory. Except for drives, other metrics should have corresponding "avg" and "max" values also. Reuse the real-time feature to capture the required data, introducing CPU and memory metrics in it. Collect the data every minute and keep updating the average and max values accordingly, returning the latest values when the API is called.
This commit is contained in:
@@ -783,6 +783,27 @@ func (sys *NotificationSys) GetMetrics(ctx context.Context, t madmin.MetricType,
|
||||
return reply
|
||||
}
|
||||
|
||||
// GetResourceMetrics - gets the resource metrics from all nodes excluding self.
|
||||
func (sys *NotificationSys) GetResourceMetrics(ctx context.Context) <-chan Metric {
|
||||
if sys == nil {
|
||||
return nil
|
||||
}
|
||||
g := errgroup.WithNErrs(len(sys.peerClients))
|
||||
peerChannels := make([]<-chan Metric, len(sys.peerClients))
|
||||
for index := range sys.peerClients {
|
||||
index := index
|
||||
g.Go(func() error {
|
||||
if sys.peerClients[index] == nil {
|
||||
return errPeerNotReachable
|
||||
}
|
||||
var err error
|
||||
peerChannels[index], err = sys.peerClients[index].GetResourceMetrics(ctx)
|
||||
return err
|
||||
}, index)
|
||||
}
|
||||
return sys.collectPeerMetrics(ctx, peerChannels, g)
|
||||
}
|
||||
|
||||
// GetSysConfig - Get information about system config
|
||||
// (only the config that are of concern to minio)
|
||||
func (sys *NotificationSys) GetSysConfig(ctx context.Context) []madmin.SysConfig {
|
||||
@@ -1122,25 +1143,7 @@ func (sys *NotificationSys) GetBandwidthReports(ctx context.Context, buckets ...
|
||||
return consolidatedReport
|
||||
}
|
||||
|
||||
// GetBucketMetrics - gets the cluster level bucket metrics from all nodes excluding self.
|
||||
func (sys *NotificationSys) GetBucketMetrics(ctx context.Context) <-chan Metric {
|
||||
if sys == nil {
|
||||
return nil
|
||||
}
|
||||
g := errgroup.WithNErrs(len(sys.peerClients))
|
||||
peerChannels := make([]<-chan Metric, len(sys.peerClients))
|
||||
for index := range sys.peerClients {
|
||||
index := index
|
||||
g.Go(func() error {
|
||||
if sys.peerClients[index] == nil {
|
||||
return errPeerNotReachable
|
||||
}
|
||||
var err error
|
||||
peerChannels[index], err = sys.peerClients[index].GetPeerBucketMetrics(ctx)
|
||||
return err
|
||||
}, index)
|
||||
}
|
||||
|
||||
func (sys *NotificationSys) collectPeerMetrics(ctx context.Context, peerChannels []<-chan Metric, g *errgroup.Group) <-chan Metric {
|
||||
ch := make(chan Metric)
|
||||
var wg sync.WaitGroup
|
||||
for index, err := range g.Wait() {
|
||||
@@ -1181,6 +1184,27 @@ func (sys *NotificationSys) GetBucketMetrics(ctx context.Context) <-chan Metric
|
||||
return ch
|
||||
}
|
||||
|
||||
// GetBucketMetrics - gets the cluster level bucket metrics from all nodes excluding self.
|
||||
func (sys *NotificationSys) GetBucketMetrics(ctx context.Context) <-chan Metric {
|
||||
if sys == nil {
|
||||
return nil
|
||||
}
|
||||
g := errgroup.WithNErrs(len(sys.peerClients))
|
||||
peerChannels := make([]<-chan Metric, len(sys.peerClients))
|
||||
for index := range sys.peerClients {
|
||||
index := index
|
||||
g.Go(func() error {
|
||||
if sys.peerClients[index] == nil {
|
||||
return errPeerNotReachable
|
||||
}
|
||||
var err error
|
||||
peerChannels[index], err = sys.peerClients[index].GetPeerBucketMetrics(ctx)
|
||||
return err
|
||||
}, index)
|
||||
}
|
||||
return sys.collectPeerMetrics(ctx, peerChannels, g)
|
||||
}
|
||||
|
||||
// GetClusterMetrics - gets the cluster metrics from all nodes excluding self.
|
||||
func (sys *NotificationSys) GetClusterMetrics(ctx context.Context) <-chan Metric {
|
||||
if sys == nil {
|
||||
@@ -1199,45 +1223,7 @@ func (sys *NotificationSys) GetClusterMetrics(ctx context.Context) <-chan Metric
|
||||
return err
|
||||
}, index)
|
||||
}
|
||||
|
||||
ch := make(chan Metric)
|
||||
var wg sync.WaitGroup
|
||||
for index, err := range g.Wait() {
|
||||
if err != nil {
|
||||
if sys.peerClients[index] != nil {
|
||||
reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress",
|
||||
sys.peerClients[index].host.String())
|
||||
logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), err, sys.peerClients[index].host.String())
|
||||
} else {
|
||||
logger.LogOnceIf(ctx, err, "peer-offline")
|
||||
}
|
||||
continue
|
||||
}
|
||||
wg.Add(1)
|
||||
go func(ctx context.Context, peerChannel <-chan Metric, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
for {
|
||||
select {
|
||||
case m, ok := <-peerChannel:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case ch <- m:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}(ctx, peerChannels[index], &wg)
|
||||
}
|
||||
go func(wg *sync.WaitGroup, ch chan Metric) {
|
||||
wg.Wait()
|
||||
close(ch)
|
||||
}(&wg, ch)
|
||||
return ch
|
||||
return sys.collectPeerMetrics(ctx, peerChannels, g)
|
||||
}
|
||||
|
||||
// ServiceFreeze freezes all S3 API calls when 'freeze' is true,
|
||||
|
||||
Reference in New Issue
Block a user