diff --git a/cmd/generic-handlers.go b/cmd/generic-handlers.go index 3d6d4daa7..6226081a6 100644 --- a/cmd/generic-handlers.go +++ b/cmd/generic-handlers.go @@ -231,7 +231,8 @@ func guessIsMetricsReq(req *http.Request) bool { req.URL.Path == minioReservedBucketPath+prometheusMetricsPathLegacy || req.URL.Path == minioReservedBucketPath+prometheusMetricsV2ClusterPath || req.URL.Path == minioReservedBucketPath+prometheusMetricsV2NodePath || - req.URL.Path == minioReservedBucketPath+prometheusMetricsV2BucketPath + req.URL.Path == minioReservedBucketPath+prometheusMetricsV2BucketPath || + req.URL.Path == minioReservedBucketPath+prometheusMetricsV2ResourcePath } // guessIsRPCReq - returns true if the request is for an RPC endpoint. diff --git a/cmd/metrics-realtime.go b/cmd/metrics-realtime.go index 2538487df..f8e639511 100644 --- a/cmd/metrics-realtime.go +++ b/cmd/metrics-realtime.go @@ -19,12 +19,15 @@ package cmd import ( "context" + "fmt" "net/http" "time" "github.com/minio/madmin-go/v3" "github.com/minio/minio/internal/disk" "github.com/minio/minio/internal/net" + c "github.com/shirou/gopsutil/v3/cpu" + "github.com/shirou/gopsutil/v3/load" ) type collectMetricsOpts struct { @@ -89,6 +92,34 @@ func collectLocalMetrics(types madmin.MetricType, opts collectMetricsOpts) (m ma m.Aggregated.Net.NetStats = netStats } } + if types.Contains(madmin.MetricsMem) { + m.Aggregated.Mem = &madmin.MemMetrics{ + CollectedAt: UTCNow(), + } + m.Aggregated.Mem.Info = madmin.GetMemInfo(GlobalContext, globalMinioAddr) + } + if types.Contains(madmin.MetricsCPU) { + m.Aggregated.CPU = &madmin.CPUMetrics{ + CollectedAt: UTCNow(), + } + cm, err := c.Times(false) + if err != nil { + m.Errors = append(m.Errors, err.Error()) + } else { + // not collecting per-cpu stats, so there will be only one element + if len(cm) == 1 { + m.Aggregated.CPU.TimesStat = &cm[0] + } else { + m.Errors = append(m.Errors, fmt.Sprintf("Expected one CPU stat, got %d", len(cm))) + } + } + loadStat, err := load.Avg() + if err != nil { + m.Errors = append(m.Errors, err.Error()) + } else { + m.Aggregated.CPU.LoadStat = loadStat + } + } // Add types... // ByHost is a shallow reference, so careful about sharing. diff --git a/cmd/metrics-resource.go b/cmd/metrics-resource.go new file mode 100644 index 000000000..d008db340 --- /dev/null +++ b/cmd/metrics-resource.go @@ -0,0 +1,453 @@ +// Copyright (c) 2015-2023 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "context" + "fmt" + "math" + "net/http" + "sync" + "time" + + "github.com/minio/madmin-go/v3" + "github.com/prometheus/client_golang/prometheus" + "github.com/shirou/gopsutil/v3/host" +) + +const ( + resourceMetricsCollectionInterval = time.Minute + resourceMetricsCacheInterval = time.Minute + + // drive stats + totalInodes MetricName = "total_inodes" + readsPerSec MetricName = "reads_per_sec" + writesPerSec MetricName = "writes_per_sec" + readsKBPerSec MetricName = "reads_kb_per_sec" + writesKBPerSec MetricName = "writes_kb_per_sec" + readsAwait MetricName = "reads_await" + writesAwait MetricName = "writes_await" + percUtil MetricName = "perc_util" + usedInodes MetricName = "used_inodes" + + // network stats + interfaceRxBytes MetricName = "rx_bytes" + interfaceRxErrors MetricName = "rx_errors" + interfaceTxBytes MetricName = "tx_bytes" + interfaceTxErrors MetricName = "tx_errors" + + // memory stats + memUsed MetricName = "used" + memFree MetricName = "free" + memShared MetricName = "shared" + memBuffers MetricName = "buffers" + memCache MetricName = "cache" + memAvailable MetricName = "available" + + // cpu stats + cpuUser MetricName = "user" + cpuSystem MetricName = "system" + cpuIOWait MetricName = "iowait" + cpuIdle MetricName = "idle" + cpuNice MetricName = "nice" + cpuSteal MetricName = "steal" + cpuLoad1 MetricName = "load1" + cpuLoad5 MetricName = "load5" + cpuLoad15 MetricName = "load15" +) + +var ( + resourceCollector *minioResourceCollector + // resourceMetricsMap is a map of subsystem to its metrics + resourceMetricsMap map[MetricSubsystem]ResourceMetrics + // resourceMetricsHelpMap maps metric name to its help string + resourceMetricsHelpMap map[MetricName]string + resourceMetricsGroups []*MetricsGroup +) + +// PeerResourceMetrics represents the resource metrics +// retrieved from a peer, along with errors if any +type PeerResourceMetrics struct { + Metrics map[MetricSubsystem]ResourceMetrics + Errors []string +} + +// ResourceMetrics is a map of unique key identifying +// a resource metric (e.g. reads_per_sec_{node}_{drive}) +// to its data +type ResourceMetrics map[string]ResourceMetric + +// ResourceMetric represents a single resource metric +// The metrics are collected from all servers periodically +// and stored in the resource metrics map. +// It also maintains the count of number of times this metric +// was collected since the server started, and the sum, +// average and max values across the same. +type ResourceMetric struct { + Name MetricName + Labels map[string]string + + // value captured in current cycle + Current float64 + + // Used when system provides cumulative (since uptime) values + // helps in calculating the current value by comparing the new + // cumulative value with previous one + Cumulative float64 + + Max float64 + Avg float64 + Sum float64 + Count uint64 +} + +func init() { + interval := fmt.Sprintf("%ds", int(resourceMetricsCollectionInterval.Seconds())) + resourceMetricsHelpMap = map[MetricName]string{ + interfaceRxBytes: "Bytes received on the interface in " + interval, + interfaceRxErrors: "Receive errors in " + interval, + interfaceTxBytes: "Bytes transmitted in " + interval, + interfaceTxErrors: "Transmit errors in " + interval, + total: "Total memory on the node", + memUsed: "Used memory on the node", + memFree: "Free memory on the node", + memShared: "Shared memory on the node", + memBuffers: "Buffers memory on the node", + memCache: "Cache memory on the node", + memAvailable: "Available memory on the node", + readsPerSec: "Reads per second on a drive", + writesPerSec: "Writes per second on a drive", + readsKBPerSec: "Kilobytes read per second on a drive", + writesKBPerSec: "Kilobytes written per second on a drive", + readsAwait: "Average time for read requests to be served on a drive", + writesAwait: "Average time for write requests to be served on a drive", + percUtil: "Percentage of time the disk was busy since uptime", + usedBytes: "Used bytes on a drive", + totalBytes: "Total bytes on a drive", + usedInodes: "Total inodes used on a drive", + totalInodes: "Total inodes on a drive", + cpuUser: "CPU user time", + cpuSystem: "CPU system time", + cpuIdle: "CPU idle time", + cpuIOWait: "CPU ioWait time", + cpuSteal: "CPU steal time", + cpuNice: "CPU nice time", + cpuLoad1: "CPU load average 1min", + cpuLoad5: "CPU load average 5min", + cpuLoad15: "CPU load average 15min", + } + resourceMetricsGroups = []*MetricsGroup{ + getResourceMetrics(), + } + + resourceCollector = newMinioResourceCollector(resourceMetricsGroups) +} + +func updateResourceMetrics(subSys MetricSubsystem, name MetricName, val float64, labels map[string]string, isCumulative bool) { + subsysMetrics, found := resourceMetricsMap[subSys] + if !found { + subsysMetrics = ResourceMetrics{} + } + + // labels are used to uniquely identify a metric + // e.g. reads_per_sec_{drive} inside the map + sfx := "" + for _, v := range labels { + if len(sfx) > 0 { + sfx += "_" + } + sfx += v + } + + key := string(name) + "_" + sfx + metric, found := subsysMetrics[key] + if !found { + metric = ResourceMetric{ + Name: name, + Labels: labels, + } + } + + if isCumulative { + metric.Current = val - metric.Cumulative + metric.Cumulative = val + } else { + metric.Current = val + } + + if metric.Current > metric.Max { + metric.Max = val + } + + metric.Sum += metric.Current + metric.Count++ + + metric.Avg = metric.Sum / float64(metric.Count) + subsysMetrics[key] = metric + + resourceMetricsMap[subSys] = subsysMetrics +} + +func collectDriveMetrics(m madmin.RealtimeMetrics) { + upt, _ := host.Uptime() + kib := 1 << 10 + sectorSize := uint64(512) + + for d, dm := range m.ByDisk { + stats := dm.IOStats + labels := map[string]string{"drive": d} + updateResourceMetrics(driveSubsystem, readsPerSec, float64(stats.ReadIOs)/float64(upt), labels, false) + + readBytes := stats.ReadSectors * sectorSize + readKib := float64(readBytes) / float64(kib) + readKibPerSec := readKib / float64(upt) + updateResourceMetrics(driveSubsystem, readsKBPerSec, readKibPerSec, labels, false) + + updateResourceMetrics(driveSubsystem, writesPerSec, float64(stats.WriteIOs)/float64(upt), labels, false) + + writeBytes := stats.WriteSectors * sectorSize + writeKib := float64(writeBytes) / float64(kib) + writeKibPerSec := writeKib / float64(upt) + updateResourceMetrics(driveSubsystem, writesKBPerSec, writeKibPerSec, labels, false) + + rdAwait := 0.0 + if stats.ReadIOs > 0 { + rdAwait = float64(stats.ReadTicks) / float64(stats.ReadIOs) + } + updateResourceMetrics(driveSubsystem, readsAwait, rdAwait, labels, false) + + wrAwait := 0.0 + if stats.WriteIOs > 0 { + wrAwait = float64(stats.WriteTicks) / float64(stats.WriteIOs) + } + updateResourceMetrics(driveSubsystem, writesAwait, wrAwait, labels, false) + + updateResourceMetrics(driveSubsystem, percUtil, float64(stats.TotalTicks)/float64(upt*10), labels, false) + } + + globalLocalDrivesMu.RLock() + gld := globalLocalDrives + globalLocalDrivesMu.RUnlock() + + for _, d := range gld { + labels := map[string]string{"drive": d.Endpoint().RawPath} + di, err := d.DiskInfo(GlobalContext, false) + if err == nil { + updateResourceMetrics(driveSubsystem, usedBytes, float64(di.Used), labels, false) + updateResourceMetrics(driveSubsystem, totalBytes, float64(di.Total), labels, false) + updateResourceMetrics(driveSubsystem, usedInodes, float64(di.UsedInodes), labels, false) + updateResourceMetrics(driveSubsystem, totalInodes, float64(di.FreeInodes+di.UsedInodes), labels, false) + } + } +} + +func collectLocalResourceMetrics() { + var types madmin.MetricType = madmin.MetricsDisk | madmin.MetricNet | madmin.MetricsMem | madmin.MetricsCPU + + m := collectLocalMetrics(types, collectMetricsOpts{ + hosts: map[string]struct{}{ + globalLocalNodeName: {}, + }, + }) + + for host, hm := range m.ByHost { + if len(host) > 0 { + if hm.Net != nil && len(hm.Net.NetStats.Name) > 0 { + stats := hm.Net.NetStats + labels := map[string]string{"interface": stats.Name} + updateResourceMetrics(interfaceSubsystem, interfaceRxBytes, float64(stats.RxBytes), labels, true) + updateResourceMetrics(interfaceSubsystem, interfaceRxErrors, float64(stats.RxErrors), labels, true) + updateResourceMetrics(interfaceSubsystem, interfaceTxBytes, float64(stats.TxBytes), labels, true) + updateResourceMetrics(interfaceSubsystem, interfaceTxErrors, float64(stats.TxErrors), labels, true) + } + if hm.Mem != nil && len(hm.Mem.Info.Addr) > 0 { + labels := map[string]string{} + stats := hm.Mem.Info + updateResourceMetrics(memSubsystem, total, float64(stats.Total), labels, false) + updateResourceMetrics(memSubsystem, memUsed, float64(stats.Used), labels, false) + updateResourceMetrics(memSubsystem, memFree, float64(stats.Free), labels, false) + updateResourceMetrics(memSubsystem, memShared, float64(stats.Shared), labels, false) + updateResourceMetrics(memSubsystem, memBuffers, float64(stats.Buffers), labels, false) + updateResourceMetrics(memSubsystem, memAvailable, float64(stats.Available), labels, false) + updateResourceMetrics(memSubsystem, memCache, float64(stats.Cache), labels, false) + } + if hm.CPU != nil { + labels := map[string]string{} + ts := hm.CPU.TimesStat + if ts != nil { + tot := ts.User + ts.System + ts.Idle + ts.Iowait + ts.Nice + ts.Steal + cpuUserVal := math.Round(ts.User/tot*100*100) / 100 + updateResourceMetrics(cpuSubsystem, cpuUser, cpuUserVal, labels, false) + cpuSystemVal := math.Round(ts.System/tot*100*100) / 100 + updateResourceMetrics(cpuSubsystem, cpuSystem, cpuSystemVal, labels, false) + cpuIdleVal := math.Round(ts.Idle/tot*100*100) / 100 + updateResourceMetrics(cpuSubsystem, cpuIdle, cpuIdleVal, labels, false) + cpuIOWaitVal := math.Round(ts.Iowait/tot*100*100) / 100 + updateResourceMetrics(cpuSubsystem, cpuIOWait, cpuIOWaitVal, labels, false) + cpuNiceVal := math.Round(ts.Nice/tot*100*100) / 100 + updateResourceMetrics(cpuSubsystem, cpuNice, cpuNiceVal, labels, false) + cpuStealVal := math.Round(ts.Steal/tot*100*100) / 100 + updateResourceMetrics(cpuSubsystem, cpuSteal, cpuStealVal, labels, false) + } + ls := hm.CPU.LoadStat + if ls != nil { + updateResourceMetrics(cpuSubsystem, cpuLoad1, ls.Load1, labels, false) + updateResourceMetrics(cpuSubsystem, cpuLoad5, ls.Load5, labels, false) + updateResourceMetrics(cpuSubsystem, cpuLoad15, ls.Load15, labels, false) + } + } + break // only one host expected + } + } + + collectDriveMetrics(m) +} + +// startResourceMetricsCollection - starts the job for collecting resource metrics +func startResourceMetricsCollection() { + resourceMetricsMap = map[MetricSubsystem]ResourceMetrics{} + metricsTimer := time.NewTimer(resourceMetricsCollectionInterval) + defer metricsTimer.Stop() + + collectLocalResourceMetrics() + + for { + select { + case <-GlobalContext.Done(): + return + case <-metricsTimer.C: + collectLocalResourceMetrics() + + // Reset the timer for next cycle. + metricsTimer.Reset(resourceMetricsCollectionInterval) + } + } +} + +// minioResourceCollector is the Collector for resource metrics +type minioResourceCollector struct { + metricsGroups []*MetricsGroup + desc *prometheus.Desc +} + +// Describe sends the super-set of all possible descriptors of metrics +func (c *minioResourceCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- c.desc +} + +// Collect is called by the Prometheus registry when collecting metrics. +func (c *minioResourceCollector) Collect(out chan<- prometheus.Metric) { + var wg sync.WaitGroup + publish := func(in <-chan Metric) { + defer wg.Done() + for metric := range in { + labels, values := getOrderedLabelValueArrays(metric.VariableLabels) + collectMetric(metric, labels, values, "resource", out) + } + } + + // Call peer api to fetch metrics + wg.Add(2) + go publish(ReportMetrics(GlobalContext, c.metricsGroups)) + go publish(globalNotificationSys.GetResourceMetrics(GlobalContext)) + wg.Wait() +} + +// newMinioResourceCollector describes the collector +// and returns reference of minio resource Collector +// It creates the Prometheus Description which is used +// to define Metric and help string +func newMinioResourceCollector(metricsGroups []*MetricsGroup) *minioResourceCollector { + return &minioResourceCollector{ + metricsGroups: metricsGroups, + desc: prometheus.NewDesc("minio_resource_stats", "Resource statistics exposed by MinIO server", nil, nil), + } +} + +func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAvgMax bool) []Metric { + help := resourceMetricsHelpMap[rm.Name] + name := rm.Name + + metrics := []Metric{} + metrics = append(metrics, Metric{ + Description: getResourceMetricDescription(subSys, name, help), + Value: rm.Current, + VariableLabels: rm.Labels, + }) + + if requireAvgMax { + avgName := MetricName(fmt.Sprintf("%s_avg", name)) + avgHelp := fmt.Sprintf("%s (avg)", help) + metrics = append(metrics, Metric{ + Description: getResourceMetricDescription(subSys, avgName, avgHelp), + Value: math.Round(rm.Avg*100) / 100, + VariableLabels: rm.Labels, + }) + + maxName := MetricName(fmt.Sprintf("%s_max", name)) + maxHelp := fmt.Sprintf("%s (max)", help) + metrics = append(metrics, Metric{ + Description: getResourceMetricDescription(subSys, maxName, maxHelp), + Value: rm.Max, + VariableLabels: rm.Labels, + }) + } + + return metrics +} + +func getResourceMetricDescription(subSys MetricSubsystem, name MetricName, help string) MetricDescription { + return MetricDescription{ + Namespace: nodeMetricNamespace, + Subsystem: subSys, + Name: name, + Help: help, + Type: gaugeMetric, + } +} + +func getResourceMetrics() *MetricsGroup { + mg := &MetricsGroup{ + cacheInterval: resourceMetricsCacheInterval, + } + mg.RegisterRead(func(ctx context.Context) []Metric { + metrics := []Metric{} + + subSystems := []MetricSubsystem{interfaceSubsystem, memSubsystem, driveSubsystem, cpuSubsystem} + for _, subSys := range subSystems { + stats, found := resourceMetricsMap[subSys] + if found { + requireAvgMax := true + if subSys == driveSubsystem { + requireAvgMax = false + } + for _, m := range stats { + metrics = append(metrics, prepareResourceMetrics(m, subSys, requireAvgMax)...) + } + } + } + + return metrics + }) + return mg +} + +// metricsResourceHandler is the prometheus handler for resource metrics +func metricsResourceHandler() http.Handler { + return metricsHTTPHandler(resourceCollector, "handler.MetricsResource") +} diff --git a/cmd/metrics-router.go b/cmd/metrics-router.go index a222192a0..e2cf23ced 100644 --- a/cmd/metrics-router.go +++ b/cmd/metrics-router.go @@ -25,10 +25,11 @@ import ( ) const ( - prometheusMetricsPathLegacy = "/prometheus/metrics" - prometheusMetricsV2ClusterPath = "/v2/metrics/cluster" - prometheusMetricsV2BucketPath = "/v2/metrics/bucket" - prometheusMetricsV2NodePath = "/v2/metrics/node" + prometheusMetricsPathLegacy = "/prometheus/metrics" + prometheusMetricsV2ClusterPath = "/v2/metrics/cluster" + prometheusMetricsV2BucketPath = "/v2/metrics/bucket" + prometheusMetricsV2NodePath = "/v2/metrics/node" + prometheusMetricsV2ResourcePath = "/v2/metrics/resource" ) // Standard env prometheus auth type @@ -57,4 +58,5 @@ func registerMetricsRouter(router *mux.Router) { metricsRouter.Handle(prometheusMetricsV2ClusterPath, auth(metricsServerHandler())) metricsRouter.Handle(prometheusMetricsV2BucketPath, auth(metricsBucketHandler())) metricsRouter.Handle(prometheusMetricsV2NodePath, auth(metricsNodeHandler())) + metricsRouter.Handle(prometheusMetricsV2ResourcePath, auth(metricsResourceHandler())) } diff --git a/cmd/metrics-v2.go b/cmd/metrics-v2.go index c5f26d66f..caf296738 100644 --- a/cmd/metrics-v2.go +++ b/cmd/metrics-v2.go @@ -132,6 +132,9 @@ const ( capacityRawSubsystem MetricSubsystem = "capacity_raw" capacityUsableSubsystem MetricSubsystem = "capacity_usable" driveSubsystem MetricSubsystem = "drive" + interfaceSubsystem MetricSubsystem = "if" + memSubsystem MetricSubsystem = "mem" + cpuSubsystem MetricSubsystem = "cpu_avg" storageClassSubsystem MetricSubsystem = "storage_class" fileDescriptorSubsystem MetricSubsystem = "file_descriptor" goRoutines MetricSubsystem = "go_routine" @@ -539,12 +542,12 @@ func getNodeRRSParityMD() MetricDescription { } } -func getNodeDrivesFreeInodes() MetricDescription { +func getNodeDrivesFreeInodesMD() MetricDescription { return MetricDescription{ Namespace: nodeMetricNamespace, Subsystem: driveSubsystem, Name: freeInodes, - Help: "Total free inodes", + Help: "Free inodes on a drive", Type: gaugeMetric, } } @@ -3207,7 +3210,7 @@ func getLocalStorageMetrics() *MetricsGroup { }) metrics = append(metrics, Metric{ - Description: getNodeDrivesFreeInodes(), + Description: getNodeDrivesFreeInodesMD(), Value: float64(disk.FreeInodes), VariableLabels: map[string]string{"drive": disk.DrivePath}, }) @@ -3546,6 +3549,61 @@ func getKMSMetrics() *MetricsGroup { return mg } +func collectMetric(metric Metric, labels []string, values []string, metricName string, out chan<- prometheus.Metric) { + if metric.Description.Type == histogramMetric { + if metric.Histogram == nil { + return + } + for k, v := range metric.Histogram { + pmetric, err := prometheus.NewConstMetric( + prometheus.NewDesc( + prometheus.BuildFQName(string(metric.Description.Namespace), + string(metric.Description.Subsystem), + string(metric.Description.Name)), + metric.Description.Help, + append(labels, metric.HistogramBucketLabel), + metric.StaticLabels, + ), + prometheus.GaugeValue, + float64(v), + append(values, k)...) + if err != nil { + // Enable for debugging + if serverDebugLog { + logger.LogOnceIf(GlobalContext, fmt.Errorf("unable to validate prometheus metric (%w) %v+%v", err, values, metric.Histogram), metricName+"-metrics-histogram") + } + } else { + out <- pmetric + } + } + return + } + metricType := prometheus.GaugeValue + if metric.Description.Type == counterMetric { + metricType = prometheus.CounterValue + } + pmetric, err := prometheus.NewConstMetric( + prometheus.NewDesc( + prometheus.BuildFQName(string(metric.Description.Namespace), + string(metric.Description.Subsystem), + string(metric.Description.Name)), + metric.Description.Help, + labels, + metric.StaticLabels, + ), + metricType, + metric.Value, + values...) + if err != nil { + // Enable for debugging + if serverDebugLog { + logger.LogOnceIf(GlobalContext, fmt.Errorf("unable to validate prometheus metric (%w) %v", err, values), metricName+"-metrics") + } + } else { + out <- pmetric + } +} + type minioBucketCollector struct { metricsGroups []*MetricsGroup desc *prometheus.Desc @@ -3570,52 +3628,7 @@ func (c *minioBucketCollector) Collect(out chan<- prometheus.Metric) { defer wg.Done() for metric := range in { labels, values := getOrderedLabelValueArrays(metric.VariableLabels) - if metric.Description.Type == histogramMetric { - if metric.Histogram == nil { - continue - } - for k, v := range metric.Histogram { - pmetric, err := prometheus.NewConstMetric( - prometheus.NewDesc( - prometheus.BuildFQName(string(metric.Description.Namespace), - string(metric.Description.Subsystem), - string(metric.Description.Name)), - metric.Description.Help, - append(labels, metric.HistogramBucketLabel), - metric.StaticLabels, - ), - prometheus.GaugeValue, - float64(v), - append(values, k)...) - if err != nil { - logger.LogOnceIf(GlobalContext, fmt.Errorf("unable to validate prometheus metric (%w) %v+%v", err, values, metric.Histogram), "bucket-metrics-histogram") - } else { - out <- pmetric - } - } - continue - } - metricType := prometheus.GaugeValue - if metric.Description.Type == counterMetric { - metricType = prometheus.CounterValue - } - pmetric, err := prometheus.NewConstMetric( - prometheus.NewDesc( - prometheus.BuildFQName(string(metric.Description.Namespace), - string(metric.Description.Subsystem), - string(metric.Description.Name)), - metric.Description.Help, - labels, - metric.StaticLabels, - ), - metricType, - metric.Value, - values...) - if err != nil { - logger.LogOnceIf(GlobalContext, fmt.Errorf("unable to validate prometheus metric (%w) %v", err, values), "bucket-metrics") - } else { - out <- pmetric - } + collectMetric(metric, labels, values, "bucket", out) } } @@ -3650,59 +3663,7 @@ func (c *minioClusterCollector) Collect(out chan<- prometheus.Metric) { defer wg.Done() for metric := range in { labels, values := getOrderedLabelValueArrays(metric.VariableLabels) - if metric.Description.Type == histogramMetric { - if metric.Histogram == nil { - continue - } - for k, v := range metric.Histogram { - pmetric, err := prometheus.NewConstMetric( - prometheus.NewDesc( - prometheus.BuildFQName(string(metric.Description.Namespace), - string(metric.Description.Subsystem), - string(metric.Description.Name)), - metric.Description.Help, - append(labels, metric.HistogramBucketLabel), - metric.StaticLabels, - ), - prometheus.GaugeValue, - float64(v), - append(values, k)...) - - if err != nil { - // Enable for debugging - if serverDebugLog { - logger.LogOnceIf(GlobalContext, fmt.Errorf("unable to validate prometheus metric (%w) %v:%v", err, values, metric.Histogram), "cluster-metrics-histogram") - } - } else { - out <- pmetric - } - } - continue - } - metricType := prometheus.GaugeValue - if metric.Description.Type == counterMetric { - metricType = prometheus.CounterValue - } - pmetric, err := prometheus.NewConstMetric( - prometheus.NewDesc( - prometheus.BuildFQName(string(metric.Description.Namespace), - string(metric.Description.Subsystem), - string(metric.Description.Name)), - metric.Description.Help, - labels, - metric.StaticLabels, - ), - metricType, - metric.Value, - values...) - if err != nil { - // Enable for debugging - if serverDebugLog { - logger.LogOnceIf(GlobalContext, fmt.Errorf("unable to validate prometheus metric (%w) %v", err, values), "cluster-metrics") - } - } else { - out <- pmetric - } + collectMetric(metric, labels, values, "cluster", out) } } @@ -3835,11 +3796,11 @@ func newMinioCollectorNode(metricsGroups []*MetricsGroup) *minioNodeCollector { } } -func metricsBucketHandler() http.Handler { +func metricsHTTPHandler(c prometheus.Collector, funcName string) http.Handler { registry := prometheus.NewRegistry() // Report all other metrics - logger.CriticalIf(GlobalContext, registry.Register(bucketCollector)) + logger.CriticalIf(GlobalContext, registry.Register(c)) // DefaultGatherers include golang metrics and process metrics. gatherers := prometheus.Gatherers{ @@ -3849,16 +3810,14 @@ func metricsBucketHandler() http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { tc, ok := r.Context().Value(mcontext.ContextTraceKey).(*mcontext.TraceCtxt) if ok { - tc.FuncName = "handler.MetricsBucket" + tc.FuncName = funcName tc.ResponseRecorder.LogErrBody = true } mfs, err := gatherers.Gather() - if err != nil { - if len(mfs) == 0 { - writeErrorResponseJSON(r.Context(), w, toAdminAPIErr(r.Context(), err), r.URL) - return - } + if err != nil && len(mfs) == 0 { + writeErrorResponseJSON(r.Context(), w, toAdminAPIErr(r.Context(), err), r.URL) + return } contentType := expfmt.Negotiate(r.Header) @@ -3878,6 +3837,10 @@ func metricsBucketHandler() http.Handler { }) } +func metricsBucketHandler() http.Handler { + return metricsHTTPHandler(bucketCollector, "handler.MetricsBucket") +} + func metricsServerHandler() http.Handler { registry := prometheus.NewRegistry() diff --git a/cmd/notification.go b/cmd/notification.go index d4d57f2c0..57f9db544 100644 --- a/cmd/notification.go +++ b/cmd/notification.go @@ -783,6 +783,27 @@ func (sys *NotificationSys) GetMetrics(ctx context.Context, t madmin.MetricType, return reply } +// GetResourceMetrics - gets the resource metrics from all nodes excluding self. +func (sys *NotificationSys) GetResourceMetrics(ctx context.Context) <-chan Metric { + if sys == nil { + return nil + } + g := errgroup.WithNErrs(len(sys.peerClients)) + peerChannels := make([]<-chan Metric, len(sys.peerClients)) + for index := range sys.peerClients { + index := index + g.Go(func() error { + if sys.peerClients[index] == nil { + return errPeerNotReachable + } + var err error + peerChannels[index], err = sys.peerClients[index].GetResourceMetrics(ctx) + return err + }, index) + } + return sys.collectPeerMetrics(ctx, peerChannels, g) +} + // GetSysConfig - Get information about system config // (only the config that are of concern to minio) func (sys *NotificationSys) GetSysConfig(ctx context.Context) []madmin.SysConfig { @@ -1122,25 +1143,7 @@ func (sys *NotificationSys) GetBandwidthReports(ctx context.Context, buckets ... return consolidatedReport } -// GetBucketMetrics - gets the cluster level bucket metrics from all nodes excluding self. -func (sys *NotificationSys) GetBucketMetrics(ctx context.Context) <-chan Metric { - if sys == nil { - return nil - } - g := errgroup.WithNErrs(len(sys.peerClients)) - peerChannels := make([]<-chan Metric, len(sys.peerClients)) - for index := range sys.peerClients { - index := index - g.Go(func() error { - if sys.peerClients[index] == nil { - return errPeerNotReachable - } - var err error - peerChannels[index], err = sys.peerClients[index].GetPeerBucketMetrics(ctx) - return err - }, index) - } - +func (sys *NotificationSys) collectPeerMetrics(ctx context.Context, peerChannels []<-chan Metric, g *errgroup.Group) <-chan Metric { ch := make(chan Metric) var wg sync.WaitGroup for index, err := range g.Wait() { @@ -1181,6 +1184,27 @@ func (sys *NotificationSys) GetBucketMetrics(ctx context.Context) <-chan Metric return ch } +// GetBucketMetrics - gets the cluster level bucket metrics from all nodes excluding self. +func (sys *NotificationSys) GetBucketMetrics(ctx context.Context) <-chan Metric { + if sys == nil { + return nil + } + g := errgroup.WithNErrs(len(sys.peerClients)) + peerChannels := make([]<-chan Metric, len(sys.peerClients)) + for index := range sys.peerClients { + index := index + g.Go(func() error { + if sys.peerClients[index] == nil { + return errPeerNotReachable + } + var err error + peerChannels[index], err = sys.peerClients[index].GetPeerBucketMetrics(ctx) + return err + }, index) + } + return sys.collectPeerMetrics(ctx, peerChannels, g) +} + // GetClusterMetrics - gets the cluster metrics from all nodes excluding self. func (sys *NotificationSys) GetClusterMetrics(ctx context.Context) <-chan Metric { if sys == nil { @@ -1199,45 +1223,7 @@ func (sys *NotificationSys) GetClusterMetrics(ctx context.Context) <-chan Metric return err }, index) } - - ch := make(chan Metric) - var wg sync.WaitGroup - for index, err := range g.Wait() { - if err != nil { - if sys.peerClients[index] != nil { - reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", - sys.peerClients[index].host.String()) - logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), err, sys.peerClients[index].host.String()) - } else { - logger.LogOnceIf(ctx, err, "peer-offline") - } - continue - } - wg.Add(1) - go func(ctx context.Context, peerChannel <-chan Metric, wg *sync.WaitGroup) { - defer wg.Done() - for { - select { - case m, ok := <-peerChannel: - if !ok { - return - } - select { - case ch <- m: - case <-ctx.Done(): - return - } - case <-ctx.Done(): - return - } - } - }(ctx, peerChannels[index], &wg) - } - go func(wg *sync.WaitGroup, ch chan Metric) { - wg.Wait() - close(ch) - }(&wg, ch) - return ch + return sys.collectPeerMetrics(ctx, peerChannels, g) } // ServiceFreeze freezes all S3 API calls when 'freeze' is true, diff --git a/cmd/peer-rest-client.go b/cmd/peer-rest-client.go index 2ca276175..e30c028f7 100644 --- a/cmd/peer-rest-client.go +++ b/cmd/peer-rest-client.go @@ -229,6 +229,33 @@ func (client *peerRESTClient) GetMetrics(ctx context.Context, t madmin.MetricTyp return info, err } +func (client *peerRESTClient) GetResourceMetrics(ctx context.Context) (<-chan Metric, error) { + respBody, err := client.callWithContext(ctx, peerRESTMethodResourceMetrics, nil, nil, -1) + if err != nil { + return nil, err + } + dec := gob.NewDecoder(respBody) + ch := make(chan Metric) + go func(ch chan<- Metric) { + defer func() { + xhttp.DrainBody(respBody) + close(ch) + }() + for { + var metric Metric + if err := dec.Decode(&metric); err != nil { + return + } + select { + case <-ctx.Done(): + return + case ch <- metric: + } + } + }(ch) + return ch, nil +} + // GetProcInfo - fetch MinIO process information for a remote node. func (client *peerRESTClient) GetProcInfo(ctx context.Context) (info madmin.ProcInfo, err error) { respBody, err := client.callWithContext(ctx, peerRESTMethodProcInfo, nil, nil, -1) diff --git a/cmd/peer-rest-common.go b/cmd/peer-rest-common.go index f4b469d79..622384f29 100644 --- a/cmd/peer-rest-common.go +++ b/cmd/peer-rest-common.go @@ -77,6 +77,7 @@ const ( peerRESTMethodDevNull = "/devnull" peerRESTMethodNetperf = "/netperf" peerRESTMethodMetrics = "/metrics" + peerRESTMethodResourceMetrics = "/resourcemetrics" peerRESTMethodGetReplicationMRF = "/getreplicationmrf" peerRESTMethodGetSRMetrics = "/getsrmetrics" ) diff --git a/cmd/peer-rest-server.go b/cmd/peer-rest-server.go index 17ad85901..71d7c06d6 100644 --- a/cmd/peer-rest-server.go +++ b/cmd/peer-rest-server.go @@ -474,6 +474,22 @@ func (s *peerRESTServer) GetMetricsHandler(w http.ResponseWriter, r *http.Reques logger.LogIf(ctx, gob.NewEncoder(w).Encode(info)) } +func (s *peerRESTServer) GetResourceMetrics(w http.ResponseWriter, r *http.Request) { + if !s.IsValid(w, r) { + s.writeErrorResponse(w, errors.New("invalid request")) + return + } + + enc := gob.NewEncoder(w) + + for m := range ReportMetrics(r.Context(), resourceMetricsGroups) { + if err := enc.Encode(m); err != nil { + s.writeErrorResponse(w, errors.New("Encoding metric failed: "+err.Error())) + return + } + } +} + // GetSysConfigHandler - returns system config information. // (only the config that are of concern to minio) func (s *peerRESTServer) GetSysConfigHandler(w http.ResponseWriter, r *http.Request) { @@ -1438,6 +1454,7 @@ func registerPeerRESTHandlers(router *mux.Router) { subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodProcInfo).HandlerFunc(h(server.GetProcInfoHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodMemInfo).HandlerFunc(h(server.GetMemInfoHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodMetrics).HandlerFunc(h(server.GetMetricsHandler)).Queries(restQueries(peerRESTMetricsTypes)...) + subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodResourceMetrics).HandlerFunc(h(server.GetResourceMetrics)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodSysErrors).HandlerFunc(h(server.GetSysErrorsHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodSysServices).HandlerFunc(h(server.GetSysServicesHandler)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodSysConfig).HandlerFunc(h(server.GetSysConfigHandler)) diff --git a/cmd/server-main.go b/cmd/server-main.go index 12a635935..93e566a76 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -928,6 +928,10 @@ func serverMain(ctx *cli.Context) { logger.FatalIf(err, "Unable to initialize MinIO client") }) + go bootstrapTrace("startResourceMetricsCollection", func() { + startResourceMetricsCollection() + }) + // Add User-Agent to differentiate the requests. globalMinioClient.SetAppInfo("minio-perf-test", ReleaseTag) diff --git a/docs/metrics/prometheus/list.md b/docs/metrics/prometheus/list.md index 8001eb166..c1267cb04 100644 --- a/docs/metrics/prometheus/list.md +++ b/docs/metrics/prometheus/list.md @@ -303,3 +303,109 @@ For deployments with [Site Replication](https://min.io/docs/minio/linux/operatio | `minio_bucket_requests_total` | Total number of S3 requests on a bucket. | | `minio_bucket_requests_canceled_total` | Total number S3 requests canceled by the client. | | `minio_bucket_requests_ttfb_seconds_distribution` | Distribution of time to first byte across API calls per bucket. | + +# Resource Metrics + +MinIO collects the following resource metrics at the node level. +Each metric includes the `server` label to identify the corresponding node. +Metrics may include one or more additional labels, such as the drive path, interface name, etc. + +These metrics can be obtained from any MinIO server once per collection by using the following URL: + +```shell +https://HOSTNAME:PORT/minio/metrics/v2/resource +``` + +Replace `HOSTNAME:PORT` with the hostname of your MinIO deployment. +For deployments behind a load balancer, use the load balancer hostname instead of a single node hostname. + +## Drive Resource Metrics + +| Name | Description | +| :----------------------------------- | :------------------------------------------------------- | +| `minio_node_drive_total_bytes` | Total bytes on a drive. | +| `minio_node_drive_used_bytes` | Used bytes on a drive. | +| `minio_node_drive_total_inodes` | Total inodes on a drive. | +| `minio_node_drive_used_inodes` | Total inodes used on a drive. | +| `minio_node_drive_reads_per_sec` | Reads per second on a drive. | +| `minio_node_drive_reads_kb_per_sec` | Kilobytes read per second on a drive. | +| `minio_node_drive_reads_await` | Average time for read requests to be served on a drive. | +| `minio_node_drive_writes_per_sec` | Writes per second on a drive. | +| `minio_node_drive_writes_kb_per_sec` | Kilobytes written per second on a drive. | +| `minio_node_drive_writes_await` | Average time for write requests to be served on a drive. | +| `minio_node_drive_perc_util` | Percentage of time the disk was busy since uptime. | + +## Network Interface Metrics + +| Name | Description | +| :---------------------------- | :-------------------------------------------- | +| `minio_node_if_rx_bytes` | Bytes received on the interface in 60s. | +| `minio_node_if_rx_bytes_avg` | Bytes received on the interface in 60s (avg). | +| `minio_node_if_rx_bytes_max` | Bytes received on the interface in 60s (max). | +| `minio_node_if_rx_errors` | Receive errors in 60s. | +| `minio_node_if_rx_errors_avg` | Receive errors in 60s (avg). | +| `minio_node_if_rx_errors_max` | Receive errors in 60s (max). | +| `minio_node_if_tx_bytes` | Bytes transmitted in 60s. | +| `minio_node_if_tx_bytes_avg` | Bytes transmitted in 60s (avg). | +| `minio_node_if_tx_bytes_max` | Bytes transmitted in 60s (max). | +| `minio_node_if_tx_errors` | Transmit errors in 60s. | +| `minio_node_if_tx_errors_avg` | Transmit errors in 60s (avg). | +| `minio_node_if_tx_errors_max` | Transmit errors in 60s (max). | + +## CPU Metrics + +| Name | Description | +| :------------------------------ | :---------------------------- | +| `minio_node_cpu_avg_user` | CPU user time. | +| `minio_node_cpu_avg_user_avg` | CPU user time (avg). | +| `minio_node_cpu_avg_user_max` | CPU user time (max). | +| `minio_node_cpu_avg_system` | CPU system time. | +| `minio_node_cpu_avg_system_avg` | CPU system time (avg). | +| `minio_node_cpu_avg_system_max` | CPU system time (max). | +| `minio_node_cpu_avg_idle` | CPU idle time. | +| `minio_node_cpu_avg_idle_avg` | CPU idle time (avg). | +| `minio_node_cpu_avg_idle_max` | CPU idle time (max). | +| `minio_node_cpu_avg_iowait` | CPU ioWait time. | +| `minio_node_cpu_avg_iowait_avg` | CPU ioWait time (avg). | +| `minio_node_cpu_avg_iowait_max` | CPU ioWait time (max). | +| `minio_node_cpu_avg_nice` | CPU nice time. | +| `minio_node_cpu_avg_nice_avg` | CPU nice time (avg). | +| `minio_node_cpu_avg_nice_max` | CPU nice time (max). | +| `minio_node_cpu_avg_steal` | CPU steam time. | +| `minio_node_cpu_avg_steal_avg` | CPU steam time (avg). | +| `minio_node_cpu_avg_steal_max` | CPU steam time (max). | +| `minio_node_cpu_avg_load1` | CPU load average 1min. | +| `minio_node_cpu_avg_load1_avg` | CPU load average 1min (avg). | +| `minio_node_cpu_avg_load1_max` | CPU load average 1min (max). | +| `minio_node_cpu_avg_load5` | CPU load average 5min. | +| `minio_node_cpu_avg_load5_avg` | CPU load average 5min (avg). | +| `minio_node_cpu_avg_load5_max` | CPU load average 5min (max). | +| `minio_node_cpu_avg_load15` | CPU load average 15min. | +| `minio_node_cpu_avg_load15_avg` | CPU load average 15min (avg). | +| `minio_node_cpu_avg_load15_max` | CPU load average 15min (max). | + +## Memory Metrics + +| Name | Description | +| :----------------------------- | :---------------------------------- | +| `minio_node_mem_available` | Available memory on the node. | +| `minio_node_mem_available_avg` | Available memory on the node (avg). | +| `minio_node_mem_available_max` | Available memory on the node (max). | +| `minio_node_mem_buffers` | Buffers memory on the node. | +| `minio_node_mem_buffers_avg` | Buffers memory on the node (avg). | +| `minio_node_mem_buffers_max` | Buffers memory on the node (max). | +| `minio_node_mem_cache` | Cache memory on the node. | +| `minio_node_mem_cache_avg` | Cache memory on the node (avg). | +| `minio_node_mem_cache_max` | Cache memory on the node (max). | +| `minio_node_mem_free` | Free memory on the node. | +| `minio_node_mem_free_avg` | Free memory on the node (avg). | +| `minio_node_mem_free_max` | Free memory on the node (max). | +| `minio_node_mem_shared` | Shared memory on the node. | +| `minio_node_mem_shared_avg` | Shared memory on the node (avg). | +| `minio_node_mem_shared_max` | Shared memory on the node (max). | +| `minio_node_mem_total` | Total memory on the node. | +| `minio_node_mem_total_avg` | Total memory on the node (avg). | +| `minio_node_mem_total_max` | Total memory on the node (max). | +| `minio_node_mem_used` | Used memory on the node. | +| `minio_node_mem_used_avg` | Used memory on the node (avg). | +| `minio_node_mem_used_max` | Used memory on the node (max). |