minio/cmd/metrics-v3.go
Shireesh Anjal 08d3d06a06
Add drive metrics in metrics-v3 (#19452)
Add following metrics:

- used_inodes
- total_inodes
- healing
- online
- reads_per_sec
- reads_kb_per_sec
- reads_await
- writes_per_sec
- writes_kb_per_sec
- writes_await
- perc_util

To be able to calculate the `per_sec` values, we capture the IOStats-related 
data in the beginning (along with the time at which they were captured), 
and compare them against the current values subsequently. This is because 
dividing by "time since server uptime." doesn't work in k8s environments.
2024-04-11 10:46:34 -07:00

286 lines
8.2 KiB
Go

// Copyright (c) 2015-2024 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cmd
import (
"slices"
"strings"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
)
// Collector paths.
//
// These are paths under the top-level /minio/metrics/v3 metrics endpoint. Each
// of these paths returns a set of V3 metrics.
const (
apiRequestsCollectorPath collectorPath = "/api/requests"
apiBucketCollectorPath collectorPath = "/api/bucket"
systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode"
systemDriveCollectorPath collectorPath = "/system/drive"
systemProcessCollectorPath collectorPath = "/system/process"
systemGoCollectorPath collectorPath = "/system/go"
clusterHealthCollectorPath collectorPath = "/cluster/health"
clusterUsageObjectsCollectorPath collectorPath = "/cluster/usage/objects"
clusterUsageBucketsCollectorPath collectorPath = "/cluster/usage/buckets"
clusterErasureSetCollectorPath collectorPath = "/cluster/erasure-set"
)
const (
clusterBasePath = "/cluster"
)
type metricsV3Collection struct {
mgMap map[collectorPath]*MetricsGroup
bucketMGMap map[collectorPath]*MetricsGroup
// Gatherers for non-bucket MetricsGroup's
mgGatherers map[collectorPath]prometheus.Gatherer
collectorPaths []collectorPath
}
func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
// Create all metric groups.
apiRequestsMG := NewMetricsGroup(apiRequestsCollectorPath,
[]MetricDescriptor{
apiRejectedAuthTotalMD,
apiRejectedHeaderTotalMD,
apiRejectedTimestampTotalMD,
apiRejectedInvalidTotalMD,
apiRequestsWaitingTotalMD,
apiRequestsIncomingTotalMD,
apiRequestsInFlightTotalMD,
apiRequestsTotalMD,
apiRequestsErrorsTotalMD,
apiRequests5xxErrorsTotalMD,
apiRequests4xxErrorsTotalMD,
apiRequestsCanceledTotalMD,
apiRequestsTTFBSecondsDistributionMD,
apiTrafficSentBytesMD,
apiTrafficRecvBytesMD,
},
JoinLoaders(loadAPIRequestsHTTPMetrics, loadAPIRequestsTTFBMetrics,
loadAPIRequestsNetworkMetrics),
)
apiBucketMG := NewBucketMetricsGroup(apiBucketCollectorPath,
[]MetricDescriptor{
apiBucketTrafficRecvBytesMD,
apiBucketTrafficSentBytesMD,
apiBucketRequestsInFlightMD,
apiBucketRequestsTotalMD,
apiBucketRequestsCanceledMD,
apiBucketRequests4xxErrorsMD,
apiBucketRequests5xxErrorsMD,
apiBucketRequestsTTFBSecondsDistributionMD,
},
JoinBucketLoaders(loadAPIBucketHTTPMetrics, loadAPIBucketTTFBMetrics),
)
systemNetworkInternodeMG := NewMetricsGroup(systemNetworkInternodeCollectorPath,
[]MetricDescriptor{
internodeErrorsTotalMD,
internodeDialedErrorsTotalMD,
internodeDialAvgTimeNanosMD,
internodeSentBytesTotalMD,
internodeRecvBytesTotalMD,
},
loadNetworkInternodeMetrics,
)
systemDriveMG := NewMetricsGroup(systemDriveCollectorPath,
[]MetricDescriptor{
driveUsedBytesMD,
driveFreeBytesMD,
driveTotalBytesMD,
driveUsedInodesMD,
driveFreeInodesMD,
driveTotalInodesMD,
driveTimeoutErrorsMD,
driveAvailabilityErrorsMD,
driveWaitingIOMD,
driveAPILatencyMD,
driveHealingMD,
driveOnlineMD,
driveOfflineCountMD,
driveOnlineCountMD,
driveCountMD,
// iostat related
driveReadsPerSecMD,
driveReadsKBPerSecMD,
driveReadsAwaitMD,
driveWritesPerSecMD,
driveWritesKBPerSecMD,
driveWritesAwaitMD,
drivePercUtilMD,
},
loadDriveMetrics,
)
clusterHealthMG := NewMetricsGroup(clusterHealthCollectorPath,
[]MetricDescriptor{
healthDrivesOfflineCountMD,
healthDrivesOnlineCountMD,
healthDrivesCountMD,
healthNodesOfflineCountMD,
healthNodesOnlineCountMD,
healthCapacityRawTotalBytesMD,
healthCapacityRawFreeBytesMD,
healthCapacityUsableTotalBytesMD,
healthCapacityUsableFreeBytesMD,
},
JoinLoaders(loadClusterHealthDriveMetrics,
loadClusterHealthNodeMetrics,
loadClusterHealthCapacityMetrics),
)
clusterUsageObjectsMG := NewMetricsGroup(clusterUsageObjectsCollectorPath,
[]MetricDescriptor{
usageSinceLastUpdateSecondsMD,
usageTotalBytesMD,
usageObjectsCountMD,
usageVersionsCountMD,
usageDeleteMarkersCountMD,
usageBucketsCountMD,
usageObjectsDistributionMD,
usageVersionsDistributionMD,
},
loadClusterUsageObjectMetrics,
)
clusterUsageBucketsMG := NewBucketMetricsGroup(clusterUsageBucketsCollectorPath,
[]MetricDescriptor{
usageSinceLastUpdateSecondsMD,
usageBucketTotalBytesMD,
usageBucketObjectsTotalMD,
usageBucketVersionsCountMD,
usageBucketDeleteMarkersCountMD,
usageBucketQuotaTotalBytesMD,
usageBucketObjectSizeDistributionMD,
usageBucketObjectVersionCountDistributionMD,
},
loadClusterUsageBucketMetrics,
)
clusterErasureSetMG := NewMetricsGroup(clusterErasureSetCollectorPath,
[]MetricDescriptor{
erasureSetOverallWriteQuorumMD,
erasureSetOverallHealthMD,
erasureSetReadQuorumMD,
erasureSetWriteQuorumMD,
erasureSetOnlineDrivesCountMD,
erasureSetHealingDrivesCountMD,
erasureSetHealthMD,
},
loadClusterErasureSetMetrics,
)
allMetricGroups := []*MetricsGroup{
apiRequestsMG,
apiBucketMG,
systemNetworkInternodeMG,
systemDriveMG,
clusterHealthMG,
clusterUsageObjectsMG,
clusterUsageBucketsMG,
clusterErasureSetMG,
}
// Bucket metrics are special, they always include the bucket label. These
// metrics required a list of buckets to be passed to the loader, and the list
// of buckets is not known until the request is made. So we keep a separate
// map for bucket metrics and handle them specially.
// Add the serverName and poolIndex labels to all non-cluster metrics.
//
// Also create metric group maps and set the cache.
metricsCache := newMetricsCache()
mgMap := make(map[collectorPath]*MetricsGroup)
bucketMGMap := make(map[collectorPath]*MetricsGroup)
for _, mg := range allMetricGroups {
if !strings.HasPrefix(string(mg.CollectorPath), clusterBasePath) {
mg.AddExtraLabels(
serverName, globalLocalNodeName,
// poolIndex, strconv.Itoa(globalLocalPoolIdx),
)
}
mg.SetCache(metricsCache)
if mg.IsBucketMetricsGroup() {
bucketMGMap[mg.CollectorPath] = mg
} else {
mgMap[mg.CollectorPath] = mg
}
}
// Prepare to register the collectors. Other than `MetricGroup` collectors,
// we also have standard collectors like `ProcessCollector` and `GoCollector`.
// Create all Non-`MetricGroup` collectors here.
collectors := map[collectorPath]prometheus.Collector{
systemProcessCollectorPath: collectors.NewProcessCollector(collectors.ProcessCollectorOpts{
ReportErrors: true,
}),
systemGoCollectorPath: collectors.NewGoCollector(),
}
// Add all `MetricGroup` collectors to the map.
for _, mg := range allMetricGroups {
collectors[mg.CollectorPath] = mg
}
// Helper function to register a collector and return a gatherer for it.
mustRegister := func(c ...prometheus.Collector) prometheus.Gatherer {
subRegistry := prometheus.NewRegistry()
for _, col := range c {
subRegistry.MustRegister(col)
}
r.MustRegister(subRegistry)
return subRegistry
}
// Register all collectors and create gatherers for them.
gatherers := make(map[collectorPath]prometheus.Gatherer, len(collectors))
collectorPaths := make([]collectorPath, 0, len(collectors))
for path, collector := range collectors {
gatherers[path] = mustRegister(collector)
collectorPaths = append(collectorPaths, path)
}
slices.Sort(collectorPaths)
return &metricsV3Collection{
mgMap: mgMap,
bucketMGMap: bucketMGMap,
mgGatherers: gatherers,
collectorPaths: collectorPaths,
}
}