mirror of
https://github.com/minio/minio.git
synced 2025-01-12 15:33:22 -05:00
fix: resource metrics "concurrent map iteration and map write" (#18273)
`resourceMetricsMap` has no protection against concurrent reads and writes. Add a mutex and don't use maps from the last iteration. Bug introduced in #18057 Fixes #18271
This commit is contained in:
parent
8e32de3ba9
commit
ba6218b354
@ -75,6 +75,7 @@ var (
|
|||||||
resourceCollector *minioResourceCollector
|
resourceCollector *minioResourceCollector
|
||||||
// resourceMetricsMap is a map of subsystem to its metrics
|
// resourceMetricsMap is a map of subsystem to its metrics
|
||||||
resourceMetricsMap map[MetricSubsystem]ResourceMetrics
|
resourceMetricsMap map[MetricSubsystem]ResourceMetrics
|
||||||
|
resourceMetricsMapMu sync.RWMutex
|
||||||
// resourceMetricsHelpMap maps metric name to its help string
|
// resourceMetricsHelpMap maps metric name to its help string
|
||||||
resourceMetricsHelpMap map[MetricName]string
|
resourceMetricsHelpMap map[MetricName]string
|
||||||
resourceMetricsGroups []*MetricsGroup
|
resourceMetricsGroups []*MetricsGroup
|
||||||
@ -159,6 +160,8 @@ func init() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func updateResourceMetrics(subSys MetricSubsystem, name MetricName, val float64, labels map[string]string, isCumulative bool) {
|
func updateResourceMetrics(subSys MetricSubsystem, name MetricName, val float64, labels map[string]string, isCumulative bool) {
|
||||||
|
resourceMetricsMapMu.Lock()
|
||||||
|
defer resourceMetricsMapMu.Unlock()
|
||||||
subsysMetrics, found := resourceMetricsMap[subSys]
|
subsysMetrics, found := resourceMetricsMap[subSys]
|
||||||
if !found {
|
if !found {
|
||||||
subsysMetrics = ResourceMetrics{}
|
subsysMetrics = ResourceMetrics{}
|
||||||
@ -320,7 +323,9 @@ func collectLocalResourceMetrics() {
|
|||||||
|
|
||||||
// startResourceMetricsCollection - starts the job for collecting resource metrics
|
// startResourceMetricsCollection - starts the job for collecting resource metrics
|
||||||
func startResourceMetricsCollection() {
|
func startResourceMetricsCollection() {
|
||||||
|
resourceMetricsMapMu.Lock()
|
||||||
resourceMetricsMap = map[MetricSubsystem]ResourceMetrics{}
|
resourceMetricsMap = map[MetricSubsystem]ResourceMetrics{}
|
||||||
|
resourceMetricsMapMu.Unlock()
|
||||||
metricsTimer := time.NewTimer(resourceMetricsCollectionInterval)
|
metricsTimer := time.NewTimer(resourceMetricsCollectionInterval)
|
||||||
defer metricsTimer.Stop()
|
defer metricsTimer.Stop()
|
||||||
|
|
||||||
@ -382,12 +387,11 @@ func newMinioResourceCollector(metricsGroups []*MetricsGroup) *minioResourceColl
|
|||||||
func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAvgMax bool) []Metric {
|
func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAvgMax bool) []Metric {
|
||||||
help := resourceMetricsHelpMap[rm.Name]
|
help := resourceMetricsHelpMap[rm.Name]
|
||||||
name := rm.Name
|
name := rm.Name
|
||||||
|
metrics := make([]Metric, 0, 3)
|
||||||
metrics := []Metric{}
|
|
||||||
metrics = append(metrics, Metric{
|
metrics = append(metrics, Metric{
|
||||||
Description: getResourceMetricDescription(subSys, name, help),
|
Description: getResourceMetricDescription(subSys, name, help),
|
||||||
Value: rm.Current,
|
Value: rm.Current,
|
||||||
VariableLabels: rm.Labels,
|
VariableLabels: cloneMSS(rm.Labels),
|
||||||
})
|
})
|
||||||
|
|
||||||
if requireAvgMax {
|
if requireAvgMax {
|
||||||
@ -396,7 +400,7 @@ func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAv
|
|||||||
metrics = append(metrics, Metric{
|
metrics = append(metrics, Metric{
|
||||||
Description: getResourceMetricDescription(subSys, avgName, avgHelp),
|
Description: getResourceMetricDescription(subSys, avgName, avgHelp),
|
||||||
Value: math.Round(rm.Avg*100) / 100,
|
Value: math.Round(rm.Avg*100) / 100,
|
||||||
VariableLabels: rm.Labels,
|
VariableLabels: cloneMSS(rm.Labels),
|
||||||
})
|
})
|
||||||
|
|
||||||
maxName := MetricName(fmt.Sprintf("%s_max", name))
|
maxName := MetricName(fmt.Sprintf("%s_max", name))
|
||||||
@ -404,7 +408,7 @@ func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAv
|
|||||||
metrics = append(metrics, Metric{
|
metrics = append(metrics, Metric{
|
||||||
Description: getResourceMetricDescription(subSys, maxName, maxHelp),
|
Description: getResourceMetricDescription(subSys, maxName, maxHelp),
|
||||||
Value: rm.Max,
|
Value: rm.Max,
|
||||||
VariableLabels: rm.Labels,
|
VariableLabels: cloneMSS(rm.Labels),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -429,6 +433,8 @@ func getResourceMetrics() *MetricsGroup {
|
|||||||
metrics := []Metric{}
|
metrics := []Metric{}
|
||||||
|
|
||||||
subSystems := []MetricSubsystem{interfaceSubsystem, memSubsystem, driveSubsystem, cpuSubsystem}
|
subSystems := []MetricSubsystem{interfaceSubsystem, memSubsystem, driveSubsystem, cpuSubsystem}
|
||||||
|
resourceMetricsMapMu.RLock()
|
||||||
|
defer resourceMetricsMapMu.RUnlock()
|
||||||
for _, subSys := range subSystems {
|
for _, subSys := range subSystems {
|
||||||
stats, found := resourceMetricsMap[subSys]
|
stats, found := resourceMetricsMap[subSys]
|
||||||
if found {
|
if found {
|
||||||
|
Loading…
Reference in New Issue
Block a user