fix: resource metrics "concurrent map iteration and map write" (#18273)

`resourceMetricsMap` has no protection against concurrent reads and writes.

Add a mutex and don't use maps from the last iteration.

Bug introduced in #18057

Fixes #18271
This commit is contained in:
Klaus Post 2023-10-18 13:28:50 -07:00 committed by GitHub
parent 8e32de3ba9
commit ba6218b354
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -74,7 +74,8 @@ const (
var ( var (
resourceCollector *minioResourceCollector resourceCollector *minioResourceCollector
// resourceMetricsMap is a map of subsystem to its metrics // resourceMetricsMap is a map of subsystem to its metrics
resourceMetricsMap map[MetricSubsystem]ResourceMetrics resourceMetricsMap map[MetricSubsystem]ResourceMetrics
resourceMetricsMapMu sync.RWMutex
// resourceMetricsHelpMap maps metric name to its help string // resourceMetricsHelpMap maps metric name to its help string
resourceMetricsHelpMap map[MetricName]string resourceMetricsHelpMap map[MetricName]string
resourceMetricsGroups []*MetricsGroup resourceMetricsGroups []*MetricsGroup
@ -159,6 +160,8 @@ func init() {
} }
func updateResourceMetrics(subSys MetricSubsystem, name MetricName, val float64, labels map[string]string, isCumulative bool) { func updateResourceMetrics(subSys MetricSubsystem, name MetricName, val float64, labels map[string]string, isCumulative bool) {
resourceMetricsMapMu.Lock()
defer resourceMetricsMapMu.Unlock()
subsysMetrics, found := resourceMetricsMap[subSys] subsysMetrics, found := resourceMetricsMap[subSys]
if !found { if !found {
subsysMetrics = ResourceMetrics{} subsysMetrics = ResourceMetrics{}
@ -320,7 +323,9 @@ func collectLocalResourceMetrics() {
// startResourceMetricsCollection - starts the job for collecting resource metrics // startResourceMetricsCollection - starts the job for collecting resource metrics
func startResourceMetricsCollection() { func startResourceMetricsCollection() {
resourceMetricsMapMu.Lock()
resourceMetricsMap = map[MetricSubsystem]ResourceMetrics{} resourceMetricsMap = map[MetricSubsystem]ResourceMetrics{}
resourceMetricsMapMu.Unlock()
metricsTimer := time.NewTimer(resourceMetricsCollectionInterval) metricsTimer := time.NewTimer(resourceMetricsCollectionInterval)
defer metricsTimer.Stop() defer metricsTimer.Stop()
@ -382,12 +387,11 @@ func newMinioResourceCollector(metricsGroups []*MetricsGroup) *minioResourceColl
func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAvgMax bool) []Metric { func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAvgMax bool) []Metric {
help := resourceMetricsHelpMap[rm.Name] help := resourceMetricsHelpMap[rm.Name]
name := rm.Name name := rm.Name
metrics := make([]Metric, 0, 3)
metrics := []Metric{}
metrics = append(metrics, Metric{ metrics = append(metrics, Metric{
Description: getResourceMetricDescription(subSys, name, help), Description: getResourceMetricDescription(subSys, name, help),
Value: rm.Current, Value: rm.Current,
VariableLabels: rm.Labels, VariableLabels: cloneMSS(rm.Labels),
}) })
if requireAvgMax { if requireAvgMax {
@ -396,7 +400,7 @@ func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAv
metrics = append(metrics, Metric{ metrics = append(metrics, Metric{
Description: getResourceMetricDescription(subSys, avgName, avgHelp), Description: getResourceMetricDescription(subSys, avgName, avgHelp),
Value: math.Round(rm.Avg*100) / 100, Value: math.Round(rm.Avg*100) / 100,
VariableLabels: rm.Labels, VariableLabels: cloneMSS(rm.Labels),
}) })
maxName := MetricName(fmt.Sprintf("%s_max", name)) maxName := MetricName(fmt.Sprintf("%s_max", name))
@ -404,7 +408,7 @@ func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAv
metrics = append(metrics, Metric{ metrics = append(metrics, Metric{
Description: getResourceMetricDescription(subSys, maxName, maxHelp), Description: getResourceMetricDescription(subSys, maxName, maxHelp),
Value: rm.Max, Value: rm.Max,
VariableLabels: rm.Labels, VariableLabels: cloneMSS(rm.Labels),
}) })
} }
@ -429,6 +433,8 @@ func getResourceMetrics() *MetricsGroup {
metrics := []Metric{} metrics := []Metric{}
subSystems := []MetricSubsystem{interfaceSubsystem, memSubsystem, driveSubsystem, cpuSubsystem} subSystems := []MetricSubsystem{interfaceSubsystem, memSubsystem, driveSubsystem, cpuSubsystem}
resourceMetricsMapMu.RLock()
defer resourceMetricsMapMu.RUnlock()
for _, subSys := range subSystems { for _, subSys := range subSystems {
stats, found := resourceMetricsMap[subSys] stats, found := resourceMetricsMap[subSys]
if found { if found {