mirror of
https://github.com/minio/minio.git
synced 2025-01-11 15:03:22 -05:00
Add metric for internode RPC calls errors (#11669)
This commit is contained in:
parent
bbd1244a88
commit
e8d8dfa3ae
@ -346,6 +346,16 @@ func getBucketObjectDistributionMD() MetricDescription {
|
||||
Type: histogramMetric,
|
||||
}
|
||||
}
|
||||
func getInternodeFailedRequests() MetricDescription {
|
||||
return MetricDescription{
|
||||
Namespace: interNodeMetricNamespace,
|
||||
Subsystem: trafficSubsystem,
|
||||
Name: errorsTotal,
|
||||
Help: "Total number of failed internode calls.",
|
||||
Type: counterMetric,
|
||||
}
|
||||
}
|
||||
|
||||
func getInterNodeSentBytesMD() MetricDescription {
|
||||
return MetricDescription{
|
||||
Namespace: interNodeMetricNamespace,
|
||||
@ -982,6 +992,10 @@ func getNetworkMetrics() MetricsGroup {
|
||||
return MetricsGroup{
|
||||
Metrics: []Metric{},
|
||||
initialize: func(ctx context.Context, metrics *MetricsGroup) {
|
||||
metrics.Metrics = append(metrics.Metrics, Metric{
|
||||
Description: getInternodeFailedRequests(),
|
||||
Value: float64(loadAndResetRPCNetworkErrsCounter()),
|
||||
})
|
||||
connStats := globalConnStats.toServerConnStats()
|
||||
metrics.Metrics = append(metrics.Metrics, Metric{
|
||||
Description: getInterNodeSentBytesMD(),
|
||||
|
@ -42,6 +42,19 @@ const (
|
||||
closed
|
||||
)
|
||||
|
||||
// Hold the number of failed RPC calls due to networking errors
|
||||
var networkErrsCounter uint64
|
||||
|
||||
// GetNetworkErrsCounter returns the number of failed RPC requests
|
||||
func GetNetworkErrsCounter() uint64 {
|
||||
return atomic.LoadUint64(&networkErrsCounter)
|
||||
}
|
||||
|
||||
// ResetNetworkErrsCounter resets the number of failed RPC requests
|
||||
func ResetNetworkErrsCounter() {
|
||||
atomic.StoreUint64(&networkErrsCounter, 0)
|
||||
}
|
||||
|
||||
// NetworkError - error type in case of errors related to http/transport
|
||||
// for ex. connection refused, connection reset, dns resolution failure etc.
|
||||
// All errors returned by storage-rest-server (ex errFileNotFound, errDiskNotFound) are not considered to be network errors.
|
||||
@ -120,6 +133,7 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
if c.HealthCheckFn != nil && xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
|
||||
atomic.AddUint64(&networkErrsCounter, 1)
|
||||
if c.MarkOffline() {
|
||||
logger.LogIf(ctx, fmt.Errorf("Marking %s temporary offline; caused by %w", c.url.String(), err))
|
||||
}
|
||||
|
@ -43,6 +43,7 @@ import (
|
||||
"github.com/gorilla/mux"
|
||||
xhttp "github.com/minio/minio/cmd/http"
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
"github.com/minio/minio/cmd/rest"
|
||||
"github.com/minio/minio/pkg/certs"
|
||||
"github.com/minio/minio/pkg/handlers"
|
||||
"github.com/minio/minio/pkg/madmin"
|
||||
@ -883,3 +884,10 @@ func decodeDirObject(object string) string {
|
||||
}
|
||||
return object
|
||||
}
|
||||
|
||||
// This is used by metrics to show the number of failed RPC calls
|
||||
// between internodes
|
||||
func loadAndResetRPCNetworkErrsCounter() uint64 {
|
||||
defer rest.ResetNetworkErrsCounter()
|
||||
return rest.GetNetworkErrsCounter()
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user