mirror of
https://github.com/minio/minio.git
synced 2025-11-09 13:39:46 -05:00
Add RPC tcp timeout/errs and AVG duration to prometheus (#15747)
This commit is contained in:
@@ -31,6 +31,7 @@ import (
|
||||
"github.com/minio/madmin-go"
|
||||
"github.com/minio/minio/internal/bucket/lifecycle"
|
||||
"github.com/minio/minio/internal/logger"
|
||||
"github.com/minio/minio/internal/rest"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
dto "github.com/prometheus/client_model/go"
|
||||
"github.com/prometheus/common/expfmt"
|
||||
@@ -167,7 +168,8 @@ const (
|
||||
writeBytes MetricName = "write_bytes"
|
||||
wcharBytes MetricName = "wchar_bytes"
|
||||
|
||||
apiLatencyMicroSec MetricName = "latency_us"
|
||||
latencyMicroSec MetricName = "latency_us"
|
||||
latencyNanoSec MetricName = "latency_ns"
|
||||
|
||||
usagePercent MetricName = "update_percent"
|
||||
|
||||
@@ -331,7 +333,7 @@ func getNodeDiskAPILatencyMD() MetricDescription {
|
||||
return MetricDescription{
|
||||
Namespace: nodeMetricNamespace,
|
||||
Subsystem: diskSubsystem,
|
||||
Name: apiLatencyMicroSec,
|
||||
Name: latencyMicroSec,
|
||||
Help: "Average last minute latency in µs for drive API storage operations",
|
||||
Type: gaugeMetric,
|
||||
}
|
||||
@@ -537,6 +539,26 @@ func getInternodeFailedRequests() MetricDescription {
|
||||
}
|
||||
}
|
||||
|
||||
func getInternodeTCPDialTimeout() MetricDescription {
|
||||
return MetricDescription{
|
||||
Namespace: interNodeMetricNamespace,
|
||||
Subsystem: trafficSubsystem,
|
||||
Name: "dial_errors",
|
||||
Help: "Total number of internode TCP dial timeouts and errors",
|
||||
Type: counterMetric,
|
||||
}
|
||||
}
|
||||
|
||||
func getInternodeTCPAvgDuration() MetricDescription {
|
||||
return MetricDescription{
|
||||
Namespace: interNodeMetricNamespace,
|
||||
Subsystem: trafficSubsystem,
|
||||
Name: "dial_avg_time",
|
||||
Help: "Average time of internodes TCP dial calls",
|
||||
Type: gaugeMetric,
|
||||
}
|
||||
}
|
||||
|
||||
func getInterNodeSentBytesMD() MetricDescription {
|
||||
return MetricDescription{
|
||||
Namespace: interNodeMetricNamespace,
|
||||
@@ -1607,10 +1629,19 @@ func getNetworkMetrics() *MetricsGroup {
|
||||
mg.RegisterRead(func(ctx context.Context) (metrics []Metric) {
|
||||
metrics = make([]Metric, 0, 10)
|
||||
connStats := globalConnStats.toServerConnStats()
|
||||
rpcStats := rest.GetRPCStats()
|
||||
if globalIsDistErasure {
|
||||
metrics = append(metrics, Metric{
|
||||
Description: getInternodeFailedRequests(),
|
||||
Value: float64(loadAndResetRPCNetworkErrsCounter()),
|
||||
Value: float64(rpcStats.Errs),
|
||||
})
|
||||
metrics = append(metrics, Metric{
|
||||
Description: getInternodeTCPDialTimeout(),
|
||||
Value: float64(rpcStats.DialErrs),
|
||||
})
|
||||
metrics = append(metrics, Metric{
|
||||
Description: getInternodeTCPAvgDuration(),
|
||||
Value: float64(rpcStats.DialAvgDuration),
|
||||
})
|
||||
metrics = append(metrics, Metric{
|
||||
Description: getInterNodeSentBytesMD(),
|
||||
|
||||
@@ -56,7 +56,6 @@ import (
|
||||
ioutilx "github.com/minio/minio/internal/ioutil"
|
||||
"github.com/minio/minio/internal/logger"
|
||||
"github.com/minio/minio/internal/logger/message/audit"
|
||||
"github.com/minio/minio/internal/rest"
|
||||
"github.com/minio/pkg/certs"
|
||||
"github.com/minio/pkg/env"
|
||||
"golang.org/x/oauth2"
|
||||
@@ -1016,13 +1015,6 @@ func decodeDirObject(object string) string {
|
||||
return object
|
||||
}
|
||||
|
||||
// This is used by metrics to show the number of failed RPC calls
|
||||
// between internodes
|
||||
func loadAndResetRPCNetworkErrsCounter() uint64 {
|
||||
defer rest.ResetNetworkErrsCounter()
|
||||
return rest.GetNetworkErrsCounter()
|
||||
}
|
||||
|
||||
// Helper method to return total number of nodes in cluster
|
||||
func totalNodeCount() uint64 {
|
||||
peers, _ := globalEndpoints.peers()
|
||||
|
||||
Reference in New Issue
Block a user