mirror of
https://github.com/minio/minio.git
synced 2024-12-26 23:25:54 -05:00
08d3d06a06
Add following metrics: - used_inodes - total_inodes - healing - online - reads_per_sec - reads_kb_per_sec - reads_await - writes_per_sec - writes_kb_per_sec - writes_await - perc_util To be able to calculate the `per_sec` values, we capture the IOStats-related data in the beginning (along with the time at which they were captured), and compare them against the current values subsequently. This is because dividing by "time since server uptime." doesn't work in k8s environments.
286 lines
8.2 KiB
Go
286 lines
8.2 KiB
Go
// Copyright (c) 2015-2024 MinIO, Inc.
|
|
//
|
|
// This file is part of MinIO Object Storage stack
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package cmd
|
|
|
|
import (
|
|
"slices"
|
|
"strings"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/collectors"
|
|
)
|
|
|
|
// Collector paths.
|
|
//
|
|
// These are paths under the top-level /minio/metrics/v3 metrics endpoint. Each
|
|
// of these paths returns a set of V3 metrics.
|
|
const (
|
|
apiRequestsCollectorPath collectorPath = "/api/requests"
|
|
apiBucketCollectorPath collectorPath = "/api/bucket"
|
|
|
|
systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode"
|
|
systemDriveCollectorPath collectorPath = "/system/drive"
|
|
systemProcessCollectorPath collectorPath = "/system/process"
|
|
systemGoCollectorPath collectorPath = "/system/go"
|
|
|
|
clusterHealthCollectorPath collectorPath = "/cluster/health"
|
|
clusterUsageObjectsCollectorPath collectorPath = "/cluster/usage/objects"
|
|
clusterUsageBucketsCollectorPath collectorPath = "/cluster/usage/buckets"
|
|
clusterErasureSetCollectorPath collectorPath = "/cluster/erasure-set"
|
|
)
|
|
|
|
const (
|
|
clusterBasePath = "/cluster"
|
|
)
|
|
|
|
type metricsV3Collection struct {
|
|
mgMap map[collectorPath]*MetricsGroup
|
|
bucketMGMap map[collectorPath]*MetricsGroup
|
|
|
|
// Gatherers for non-bucket MetricsGroup's
|
|
mgGatherers map[collectorPath]prometheus.Gatherer
|
|
|
|
collectorPaths []collectorPath
|
|
}
|
|
|
|
func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
|
|
// Create all metric groups.
|
|
apiRequestsMG := NewMetricsGroup(apiRequestsCollectorPath,
|
|
[]MetricDescriptor{
|
|
apiRejectedAuthTotalMD,
|
|
apiRejectedHeaderTotalMD,
|
|
apiRejectedTimestampTotalMD,
|
|
apiRejectedInvalidTotalMD,
|
|
|
|
apiRequestsWaitingTotalMD,
|
|
apiRequestsIncomingTotalMD,
|
|
apiRequestsInFlightTotalMD,
|
|
apiRequestsTotalMD,
|
|
apiRequestsErrorsTotalMD,
|
|
apiRequests5xxErrorsTotalMD,
|
|
apiRequests4xxErrorsTotalMD,
|
|
apiRequestsCanceledTotalMD,
|
|
|
|
apiRequestsTTFBSecondsDistributionMD,
|
|
|
|
apiTrafficSentBytesMD,
|
|
apiTrafficRecvBytesMD,
|
|
},
|
|
JoinLoaders(loadAPIRequestsHTTPMetrics, loadAPIRequestsTTFBMetrics,
|
|
loadAPIRequestsNetworkMetrics),
|
|
)
|
|
|
|
apiBucketMG := NewBucketMetricsGroup(apiBucketCollectorPath,
|
|
[]MetricDescriptor{
|
|
apiBucketTrafficRecvBytesMD,
|
|
apiBucketTrafficSentBytesMD,
|
|
|
|
apiBucketRequestsInFlightMD,
|
|
apiBucketRequestsTotalMD,
|
|
apiBucketRequestsCanceledMD,
|
|
apiBucketRequests4xxErrorsMD,
|
|
apiBucketRequests5xxErrorsMD,
|
|
|
|
apiBucketRequestsTTFBSecondsDistributionMD,
|
|
},
|
|
JoinBucketLoaders(loadAPIBucketHTTPMetrics, loadAPIBucketTTFBMetrics),
|
|
)
|
|
|
|
systemNetworkInternodeMG := NewMetricsGroup(systemNetworkInternodeCollectorPath,
|
|
[]MetricDescriptor{
|
|
internodeErrorsTotalMD,
|
|
internodeDialedErrorsTotalMD,
|
|
internodeDialAvgTimeNanosMD,
|
|
internodeSentBytesTotalMD,
|
|
internodeRecvBytesTotalMD,
|
|
},
|
|
loadNetworkInternodeMetrics,
|
|
)
|
|
|
|
systemDriveMG := NewMetricsGroup(systemDriveCollectorPath,
|
|
[]MetricDescriptor{
|
|
driveUsedBytesMD,
|
|
driveFreeBytesMD,
|
|
driveTotalBytesMD,
|
|
driveUsedInodesMD,
|
|
driveFreeInodesMD,
|
|
driveTotalInodesMD,
|
|
driveTimeoutErrorsMD,
|
|
driveAvailabilityErrorsMD,
|
|
driveWaitingIOMD,
|
|
driveAPILatencyMD,
|
|
driveHealingMD,
|
|
driveOnlineMD,
|
|
|
|
driveOfflineCountMD,
|
|
driveOnlineCountMD,
|
|
driveCountMD,
|
|
|
|
// iostat related
|
|
driveReadsPerSecMD,
|
|
driveReadsKBPerSecMD,
|
|
driveReadsAwaitMD,
|
|
driveWritesPerSecMD,
|
|
driveWritesKBPerSecMD,
|
|
driveWritesAwaitMD,
|
|
drivePercUtilMD,
|
|
},
|
|
loadDriveMetrics,
|
|
)
|
|
|
|
clusterHealthMG := NewMetricsGroup(clusterHealthCollectorPath,
|
|
[]MetricDescriptor{
|
|
healthDrivesOfflineCountMD,
|
|
healthDrivesOnlineCountMD,
|
|
healthDrivesCountMD,
|
|
|
|
healthNodesOfflineCountMD,
|
|
healthNodesOnlineCountMD,
|
|
|
|
healthCapacityRawTotalBytesMD,
|
|
healthCapacityRawFreeBytesMD,
|
|
healthCapacityUsableTotalBytesMD,
|
|
healthCapacityUsableFreeBytesMD,
|
|
},
|
|
JoinLoaders(loadClusterHealthDriveMetrics,
|
|
loadClusterHealthNodeMetrics,
|
|
loadClusterHealthCapacityMetrics),
|
|
)
|
|
|
|
clusterUsageObjectsMG := NewMetricsGroup(clusterUsageObjectsCollectorPath,
|
|
[]MetricDescriptor{
|
|
usageSinceLastUpdateSecondsMD,
|
|
usageTotalBytesMD,
|
|
usageObjectsCountMD,
|
|
usageVersionsCountMD,
|
|
usageDeleteMarkersCountMD,
|
|
usageBucketsCountMD,
|
|
usageObjectsDistributionMD,
|
|
usageVersionsDistributionMD,
|
|
},
|
|
loadClusterUsageObjectMetrics,
|
|
)
|
|
|
|
clusterUsageBucketsMG := NewBucketMetricsGroup(clusterUsageBucketsCollectorPath,
|
|
[]MetricDescriptor{
|
|
usageSinceLastUpdateSecondsMD,
|
|
usageBucketTotalBytesMD,
|
|
usageBucketObjectsTotalMD,
|
|
usageBucketVersionsCountMD,
|
|
usageBucketDeleteMarkersCountMD,
|
|
usageBucketQuotaTotalBytesMD,
|
|
usageBucketObjectSizeDistributionMD,
|
|
usageBucketObjectVersionCountDistributionMD,
|
|
},
|
|
loadClusterUsageBucketMetrics,
|
|
)
|
|
|
|
clusterErasureSetMG := NewMetricsGroup(clusterErasureSetCollectorPath,
|
|
[]MetricDescriptor{
|
|
erasureSetOverallWriteQuorumMD,
|
|
erasureSetOverallHealthMD,
|
|
erasureSetReadQuorumMD,
|
|
erasureSetWriteQuorumMD,
|
|
erasureSetOnlineDrivesCountMD,
|
|
erasureSetHealingDrivesCountMD,
|
|
erasureSetHealthMD,
|
|
},
|
|
loadClusterErasureSetMetrics,
|
|
)
|
|
|
|
allMetricGroups := []*MetricsGroup{
|
|
apiRequestsMG,
|
|
apiBucketMG,
|
|
|
|
systemNetworkInternodeMG,
|
|
systemDriveMG,
|
|
|
|
clusterHealthMG,
|
|
clusterUsageObjectsMG,
|
|
clusterUsageBucketsMG,
|
|
clusterErasureSetMG,
|
|
}
|
|
|
|
// Bucket metrics are special, they always include the bucket label. These
|
|
// metrics required a list of buckets to be passed to the loader, and the list
|
|
// of buckets is not known until the request is made. So we keep a separate
|
|
// map for bucket metrics and handle them specially.
|
|
|
|
// Add the serverName and poolIndex labels to all non-cluster metrics.
|
|
//
|
|
// Also create metric group maps and set the cache.
|
|
metricsCache := newMetricsCache()
|
|
mgMap := make(map[collectorPath]*MetricsGroup)
|
|
bucketMGMap := make(map[collectorPath]*MetricsGroup)
|
|
for _, mg := range allMetricGroups {
|
|
if !strings.HasPrefix(string(mg.CollectorPath), clusterBasePath) {
|
|
mg.AddExtraLabels(
|
|
serverName, globalLocalNodeName,
|
|
// poolIndex, strconv.Itoa(globalLocalPoolIdx),
|
|
)
|
|
}
|
|
mg.SetCache(metricsCache)
|
|
if mg.IsBucketMetricsGroup() {
|
|
bucketMGMap[mg.CollectorPath] = mg
|
|
} else {
|
|
mgMap[mg.CollectorPath] = mg
|
|
}
|
|
}
|
|
|
|
// Prepare to register the collectors. Other than `MetricGroup` collectors,
|
|
// we also have standard collectors like `ProcessCollector` and `GoCollector`.
|
|
|
|
// Create all Non-`MetricGroup` collectors here.
|
|
collectors := map[collectorPath]prometheus.Collector{
|
|
systemProcessCollectorPath: collectors.NewProcessCollector(collectors.ProcessCollectorOpts{
|
|
ReportErrors: true,
|
|
}),
|
|
systemGoCollectorPath: collectors.NewGoCollector(),
|
|
}
|
|
|
|
// Add all `MetricGroup` collectors to the map.
|
|
for _, mg := range allMetricGroups {
|
|
collectors[mg.CollectorPath] = mg
|
|
}
|
|
|
|
// Helper function to register a collector and return a gatherer for it.
|
|
mustRegister := func(c ...prometheus.Collector) prometheus.Gatherer {
|
|
subRegistry := prometheus.NewRegistry()
|
|
for _, col := range c {
|
|
subRegistry.MustRegister(col)
|
|
}
|
|
r.MustRegister(subRegistry)
|
|
return subRegistry
|
|
}
|
|
|
|
// Register all collectors and create gatherers for them.
|
|
gatherers := make(map[collectorPath]prometheus.Gatherer, len(collectors))
|
|
collectorPaths := make([]collectorPath, 0, len(collectors))
|
|
for path, collector := range collectors {
|
|
gatherers[path] = mustRegister(collector)
|
|
collectorPaths = append(collectorPaths, path)
|
|
}
|
|
slices.Sort(collectorPaths)
|
|
return &metricsV3Collection{
|
|
mgMap: mgMap,
|
|
bucketMGMap: bucketMGMap,
|
|
mgGatherers: gatherers,
|
|
collectorPaths: collectorPaths,
|
|
}
|
|
}
|