From bec1f7c26ac0c338da81445eb79bee1bee3a28e5 Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Thu, 14 Dec 2023 14:02:52 -0800 Subject: [PATCH] metrics: Refactor handling of histogram vectors (#18632) --- cmd/metrics-v2.go | 118 +++++++++++++++-------------------------- cmd/metrics-v2_test.go | 87 ++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 74 deletions(-) create mode 100644 cmd/metrics-v2_test.go diff --git a/cmd/metrics-v2.go b/cmd/metrics-v2.go index 9b9996ce9..0c8fcd146 100644 --- a/cmd/metrics-v2.go +++ b/cmd/metrics-v2.go @@ -1598,47 +1598,52 @@ func getGoMetrics() *MetricsGroup { return mg } +// getHistogramMetrics fetches histogram metrics and returns it in a []Metric +// Note: Typically used in MetricGroup.RegisterRead +func getHistogramMetrics(hist *prometheus.HistogramVec, desc MetricDescription) []Metric { + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + // Collects prometheus metrics from hist and sends it over ch + hist.Collect(ch) + }() + + // Converts metrics received into internal []Metric type + var metrics []Metric + for promMetric := range ch { + dtoMetric := &dto.Metric{} + err := promMetric.Write(dtoMetric) + if err != nil { + // Log error and continue to receive other metric + // values + logger.LogIf(GlobalContext, err) + continue + } + + h := dtoMetric.GetHistogram() + for _, b := range h.Bucket { + labels := make(map[string]string) + for _, lp := range dtoMetric.GetLabel() { + labels[*lp.Name] = *lp.Value + } + labels["le"] = fmt.Sprintf("%.3f", *b.UpperBound) + metric := Metric{ + Description: desc, + VariableLabels: labels, + Value: float64(b.GetCumulativeCount()), + } + metrics = append(metrics, metric) + } + } + return metrics +} + func getBucketTTFBMetric() *MetricsGroup { mg := &MetricsGroup{ cacheInterval: 10 * time.Second, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { - // Read prometheus metric on this channel - ch := make(chan prometheus.Metric) - var wg sync.WaitGroup - wg.Add(1) - - // Read prometheus histogram data and convert it to internal metric data - go func() { - defer wg.Done() - for promMetric := range ch { - dtoMetric := &dto.Metric{} - err := promMetric.Write(dtoMetric) - if err != nil { - logger.LogIf(GlobalContext, err) - return - } - h := dtoMetric.GetHistogram() - for _, b := range h.Bucket { - labels := make(map[string]string) - for _, lp := range dtoMetric.GetLabel() { - labels[*lp.Name] = *lp.Value - } - labels["le"] = fmt.Sprintf("%.3f", *b.UpperBound) - metric := Metric{ - Description: getBucketTTFBDistributionMD(), - VariableLabels: labels, - Value: float64(b.GetCumulativeCount()), - } - metrics = append(metrics, metric) - } - } - }() - - bucketHTTPRequestsDuration.Collect(ch) - close(ch) - wg.Wait() - return + mg.RegisterRead(func(ctx context.Context) []Metric { + return getHistogramMetrics(bucketHTTPRequestsDuration, getBucketObjectDistributionMD()) }) return mg } @@ -1647,43 +1652,8 @@ func getS3TTFBMetric() *MetricsGroup { mg := &MetricsGroup{ cacheInterval: 10 * time.Second, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { - // Read prometheus metric on this channel - ch := make(chan prometheus.Metric) - var wg sync.WaitGroup - wg.Add(1) - - // Read prometheus histogram data and convert it to internal metric data - go func() { - defer wg.Done() - for promMetric := range ch { - dtoMetric := &dto.Metric{} - err := promMetric.Write(dtoMetric) - if err != nil { - logger.LogIf(GlobalContext, err) - return - } - h := dtoMetric.GetHistogram() - for _, b := range h.Bucket { - labels := make(map[string]string) - for _, lp := range dtoMetric.GetLabel() { - labels[*lp.Name] = *lp.Value - } - labels["le"] = fmt.Sprintf("%.3f", *b.UpperBound) - metric := Metric{ - Description: getS3TTFBDistributionMD(), - VariableLabels: labels, - Value: float64(b.GetCumulativeCount()), - } - metrics = append(metrics, metric) - } - } - }() - - httpRequestsDuration.Collect(ch) - close(ch) - wg.Wait() - return + mg.RegisterRead(func(ctx context.Context) []Metric { + return getHistogramMetrics(httpRequestsDuration, getS3TTFBDistributionMD()) }) return mg } diff --git a/cmd/metrics-v2_test.go b/cmd/metrics-v2_test.go new file mode 100644 index 000000000..088fb4041 --- /dev/null +++ b/cmd/metrics-v2_test.go @@ -0,0 +1,87 @@ +// Copyright (c) 2015-2023 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +func TestGetHistogramMetrics(t *testing.T) { + histBuckets := []float64{0.05, 0.1, 0.25, 0.5, 0.75} + labels := []string{"GetObject", "PutObject", "CopyObject", "CompleteMultipartUpload"} + ttfbHist := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "s3_ttfb_seconds", + Help: "Time taken by requests served by current MinIO server instance", + Buckets: histBuckets, + }, + []string{"api"}, + ) + observations := []struct { + val float64 + label string + }{ + { + val: 0.02, + label: labels[0], + }, + { + val: 0.07, + label: labels[1], + }, + { + val: 0.11, + label: labels[1], + }, + { + val: 0.19, + label: labels[1], + }, + { + val: 0.31, + label: labels[1], + }, + { + val: 0.61, + label: labels[3], + }, + { + val: 0.79, + label: labels[2], + }, + } + ticker := time.NewTicker(1 * time.Millisecond) + defer ticker.Stop() + for _, obs := range observations { + // Send observations once every 1ms, to simulate delay between + // observations. This is to test the channel based + // synchronization used internally. + select { + case <-ticker.C: + ttfbHist.With(prometheus.Labels{"api": obs.label}).Observe(obs.val) + } + } + + metrics := getHistogramMetrics(ttfbHist, getBucketTTFBDistributionMD()) + if expPoints := len(labels) * len(histBuckets); expPoints != len(metrics) { + t.Fatalf("Expected %v data points but got %v", expPoints, len(metrics)) + } +}