Enforce a bucket limit of 100 to v2 metrics calls (#20761)

Enforce a bucket count limit on metrics for v2 calls.

If people hit this limit, they should move to v3, as certain calls explode with high bucket count.

Reviewers: This *should* only affect v2 calls, but the complexity is overwhelming.
This commit is contained in:
Klaus Post 2025-02-28 11:33:08 -08:00 committed by GitHub
parent f9c62dea55
commit 11507d46da
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 68 additions and 16 deletions

View File

@ -53,6 +53,10 @@ var (
bucketPeerMetricsGroups []*MetricsGroupV2 bucketPeerMetricsGroups []*MetricsGroupV2
) )
// v2MetricsMaxBuckets enforces a bucket count limit on metrics for v2 calls.
// If people hit this limit, they should move to v3, as certain calls explode with high bucket count.
const v2MetricsMaxBuckets = 100
func init() { func init() {
clusterMetricsGroups := []*MetricsGroupV2{ clusterMetricsGroups := []*MetricsGroupV2{
getNodeHealthMetrics(MetricsGroupOpts{dependGlobalNotificationSys: true}), getNodeHealthMetrics(MetricsGroupOpts{dependGlobalNotificationSys: true}),
@ -1842,9 +1846,9 @@ func getGoMetrics() *MetricsGroupV2 {
// getHistogramMetrics fetches histogram metrics and returns it in a []Metric // getHistogramMetrics fetches histogram metrics and returns it in a []Metric
// Note: Typically used in MetricGroup.RegisterRead // Note: Typically used in MetricGroup.RegisterRead
// //
// The last parameter is added for compatibility - if true it lowercases the // The toLowerAPILabels parameter is added for compatibility,
// `api` label values. // if set, it lowercases the `api` label values.
func getHistogramMetrics(hist *prometheus.HistogramVec, desc MetricDescription, toLowerAPILabels bool) []MetricV2 { func getHistogramMetrics(hist *prometheus.HistogramVec, desc MetricDescription, toLowerAPILabels, limitBuckets bool) []MetricV2 {
ch := make(chan prometheus.Metric) ch := make(chan prometheus.Metric)
go func() { go func() {
defer xioutil.SafeClose(ch) defer xioutil.SafeClose(ch)
@ -1854,6 +1858,7 @@ func getHistogramMetrics(hist *prometheus.HistogramVec, desc MetricDescription,
// Converts metrics received into internal []Metric type // Converts metrics received into internal []Metric type
var metrics []MetricV2 var metrics []MetricV2
buckets := make(map[string][]MetricV2, v2MetricsMaxBuckets)
for promMetric := range ch { for promMetric := range ch {
dtoMetric := &dto.Metric{} dtoMetric := &dto.Metric{}
err := promMetric.Write(dtoMetric) err := promMetric.Write(dtoMetric)
@ -1880,7 +1885,11 @@ func getHistogramMetrics(hist *prometheus.HistogramVec, desc MetricDescription,
VariableLabels: labels, VariableLabels: labels,
Value: float64(b.GetCumulativeCount()), Value: float64(b.GetCumulativeCount()),
} }
metrics = append(metrics, metric) if limitBuckets && labels["bucket"] != "" {
buckets[labels["bucket"]] = append(buckets[labels["bucket"]], metric)
} else {
metrics = append(metrics, metric)
}
} }
// add metrics with +Inf label // add metrics with +Inf label
labels1 := make(map[string]string) labels1 := make(map[string]string)
@ -1892,11 +1901,26 @@ func getHistogramMetrics(hist *prometheus.HistogramVec, desc MetricDescription,
} }
} }
labels1["le"] = fmt.Sprintf("%.3f", math.Inf(+1)) labels1["le"] = fmt.Sprintf("%.3f", math.Inf(+1))
metrics = append(metrics, MetricV2{
metric := MetricV2{
Description: desc, Description: desc,
VariableLabels: labels1, VariableLabels: labels1,
Value: float64(dtoMetric.Histogram.GetSampleCount()), Value: float64(dtoMetric.Histogram.GetSampleCount()),
}) }
if limitBuckets && labels1["bucket"] != "" {
buckets[labels1["bucket"]] = append(buckets[labels1["bucket"]], metric)
} else {
metrics = append(metrics, metric)
}
}
// Limit bucket metrics...
if limitBuckets {
bucketNames := mapKeysSorted(buckets)
bucketNames = bucketNames[:min(len(buckets), v2MetricsMaxBuckets)]
for _, b := range bucketNames {
metrics = append(metrics, buckets[b]...)
}
} }
return metrics return metrics
} }
@ -1907,7 +1931,7 @@ func getBucketTTFBMetric() *MetricsGroupV2 {
} }
mg.RegisterRead(func(ctx context.Context) []MetricV2 { mg.RegisterRead(func(ctx context.Context) []MetricV2 {
return getHistogramMetrics(bucketHTTPRequestsDuration, return getHistogramMetrics(bucketHTTPRequestsDuration,
getBucketTTFBDistributionMD(), true) getBucketTTFBDistributionMD(), true, true)
}) })
return mg return mg
} }
@ -1918,7 +1942,7 @@ func getS3TTFBMetric() *MetricsGroupV2 {
} }
mg.RegisterRead(func(ctx context.Context) []MetricV2 { mg.RegisterRead(func(ctx context.Context) []MetricV2 {
return getHistogramMetrics(httpRequestsDuration, return getHistogramMetrics(httpRequestsDuration,
getS3TTFBDistributionMD(), true) getS3TTFBDistributionMD(), true, true)
}) })
return mg return mg
} }
@ -3017,7 +3041,13 @@ func getHTTPMetrics(opts MetricsGroupOpts) *MetricsGroupV2 {
return return
} }
for bucket, inOut := range globalBucketConnStats.getS3InOutBytes() { // If we have too many, limit them
bConnStats := globalBucketConnStats.getS3InOutBytes()
buckets := mapKeysSorted(bConnStats)
buckets = buckets[:min(v2MetricsMaxBuckets, len(buckets))]
for _, bucket := range buckets {
inOut := bConnStats[bucket]
recvBytes := inOut.In recvBytes := inOut.In
if recvBytes > 0 { if recvBytes > 0 {
metrics = append(metrics, MetricV2{ metrics = append(metrics, MetricV2{
@ -3260,7 +3290,12 @@ func getBucketUsageMetrics(opts MetricsGroupOpts) *MetricsGroupV2 {
if !globalSiteReplicationSys.isEnabled() { if !globalSiteReplicationSys.isEnabled() {
bucketReplStats = globalReplicationStats.Load().getAllLatest(dataUsageInfo.BucketsUsage) bucketReplStats = globalReplicationStats.Load().getAllLatest(dataUsageInfo.BucketsUsage)
} }
for bucket, usage := range dataUsageInfo.BucketsUsage { buckets := mapKeysSorted(dataUsageInfo.BucketsUsage)
if len(buckets) > v2MetricsMaxBuckets {
buckets = buckets[:v2MetricsMaxBuckets]
}
for _, bucket := range buckets {
usage := dataUsageInfo.BucketsUsage[bucket]
quota, _ := globalBucketQuotaSys.Get(ctx, bucket) quota, _ := globalBucketQuotaSys.Get(ctx, bucket)
metrics = append(metrics, MetricV2{ metrics = append(metrics, MetricV2{

View File

@ -82,13 +82,13 @@ func TestGetHistogramMetrics_BucketCount(t *testing.T) {
} }
} }
metrics := getHistogramMetrics(ttfbHist, getBucketTTFBDistributionMD(), false) metrics := getHistogramMetrics(ttfbHist, getBucketTTFBDistributionMD(), false, false)
// additional labels for +Inf for all histogram metrics // additional labels for +Inf for all histogram metrics
if expPoints := len(labels) * (len(histBuckets) + 1); expPoints != len(metrics) { if expPoints := len(labels) * (len(histBuckets) + 1); expPoints != len(metrics) {
t.Fatalf("Expected %v data points when toLowerAPILabels=false but got %v", expPoints, len(metrics)) t.Fatalf("Expected %v data points when toLowerAPILabels=false but got %v", expPoints, len(metrics))
} }
metrics = getHistogramMetrics(ttfbHist, getBucketTTFBDistributionMD(), true) metrics = getHistogramMetrics(ttfbHist, getBucketTTFBDistributionMD(), true, false)
// additional labels for +Inf for all histogram metrics // additional labels for +Inf for all histogram metrics
if expPoints := len(labels) * (len(histBuckets) + 1); expPoints != len(metrics) { if expPoints := len(labels) * (len(histBuckets) + 1); expPoints != len(metrics) {
t.Fatalf("Expected %v data points when toLowerAPILabels=true but got %v", expPoints, len(metrics)) t.Fatalf("Expected %v data points when toLowerAPILabels=true but got %v", expPoints, len(metrics))
@ -144,7 +144,7 @@ func TestGetHistogramMetrics_Values(t *testing.T) {
} }
// Accumulate regular-cased API label metrics for 'PutObject' for deeper verification // Accumulate regular-cased API label metrics for 'PutObject' for deeper verification
metrics := getHistogramMetrics(ttfbHist, getBucketTTFBDistributionMD(), false) metrics := getHistogramMetrics(ttfbHist, getBucketTTFBDistributionMD(), false, false)
capitalPutObjects := make([]MetricV2, 0, len(histBuckets)+1) capitalPutObjects := make([]MetricV2, 0, len(histBuckets)+1)
for _, metric := range metrics { for _, metric := range metrics {
if value := metric.VariableLabels["api"]; value == "PutObject" { if value := metric.VariableLabels["api"]; value == "PutObject" {
@ -181,7 +181,7 @@ func TestGetHistogramMetrics_Values(t *testing.T) {
} }
// Accumulate lower-cased API label metrics for 'copyobject' for deeper verification // Accumulate lower-cased API label metrics for 'copyobject' for deeper verification
metrics = getHistogramMetrics(ttfbHist, getBucketTTFBDistributionMD(), true) metrics = getHistogramMetrics(ttfbHist, getBucketTTFBDistributionMD(), true, false)
lowerCopyObjects := make([]MetricV2, 0, len(histBuckets)+1) lowerCopyObjects := make([]MetricV2, 0, len(histBuckets)+1)
for _, metric := range metrics { for _, metric := range metrics {
if value := metric.VariableLabels["api"]; value == "copyobject" { if value := metric.VariableLabels["api"]; value == "copyobject" {

View File

@ -267,7 +267,7 @@ func (m *MetricValues) SetHistogram(name MetricName, hist *prometheus.HistogramV
panic(fmt.Sprintf("metric has no description: %s", name)) panic(fmt.Sprintf("metric has no description: %s", name))
} }
dummyDesc := MetricDescription{} dummyDesc := MetricDescription{}
metricsV2 := getHistogramMetrics(hist, dummyDesc, false) metricsV2 := getHistogramMetrics(hist, dummyDesc, false, false)
mainLoop: mainLoop:
for _, metric := range metricsV2 { for _, metric := range metricsV2 {
for label, allowedValues := range filterByLabels { for label, allowedValues := range filterByLabels {

View File

@ -165,7 +165,7 @@ var (
) )
func (t *tierMetrics) Report() []MetricV2 { func (t *tierMetrics) Report() []MetricV2 {
metrics := getHistogramMetrics(t.histogram, tierTTLBMD, true) metrics := getHistogramMetrics(t.histogram, tierTTLBMD, true, true)
t.RLock() t.RLock()
defer t.RUnlock() defer t.RUnlock()
for tier, stat := range t.requestsCount { for tier, stat := range t.requestsCount {

View File

@ -36,6 +36,7 @@ import (
"runtime" "runtime"
"runtime/pprof" "runtime/pprof"
"runtime/trace" "runtime/trace"
"sort"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -1176,3 +1177,19 @@ func filterStorageClass(ctx context.Context, s string) string {
} }
return s return s
} }
type ordered interface {
~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr | ~float32 | ~float64 | string
}
// mapKeysSorted returns the map keys as a sorted slice.
func mapKeysSorted[Map ~map[K]V, K ordered, V any](m Map) []K {
res := make([]K, 0, len(m))
for k := range m {
res = append(res, k)
}
sort.Slice(res, func(i, j int) bool {
return res[i] < res[j]
})
return res
}