Add metric for internode RPC calls errors (#11669)

This commit is contained in:
Anis Elleuch
2021-03-01 21:31:33 +01:00
committed by GitHub
parent bbd1244a88
commit e8d8dfa3ae
3 changed files with 36 additions and 0 deletions

View File

@@ -42,6 +42,19 @@ const (
closed
)
// Hold the number of failed RPC calls due to networking errors
var networkErrsCounter uint64
// GetNetworkErrsCounter returns the number of failed RPC requests
func GetNetworkErrsCounter() uint64 {
return atomic.LoadUint64(&networkErrsCounter)
}
// ResetNetworkErrsCounter resets the number of failed RPC requests
func ResetNetworkErrsCounter() {
atomic.StoreUint64(&networkErrsCounter, 0)
}
// NetworkError - error type in case of errors related to http/transport
// for ex. connection refused, connection reset, dns resolution failure etc.
// All errors returned by storage-rest-server (ex errFileNotFound, errDiskNotFound) are not considered to be network errors.
@@ -120,6 +133,7 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
resp, err := c.httpClient.Do(req)
if err != nil {
if c.HealthCheckFn != nil && xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
atomic.AddUint64(&networkErrsCounter, 1)
if c.MarkOffline() {
logger.LogIf(ctx, fmt.Errorf("Marking %s temporary offline; caused by %w", c.url.String(), err))
}