Add RPC tcp timeout/errs and AVG duration to prometheus (#15747)

This commit is contained in:
Anis Elleuch
2022-09-26 17:04:26 +01:00
committed by GitHub
parent 1480340830
commit 048a46ec2a
4 changed files with 122 additions and 28 deletions

View File

@@ -46,19 +46,6 @@ const (
closed
)
// Hold the number of failed RPC calls due to networking errors
var networkErrsCounter uint64
// GetNetworkErrsCounter returns the number of failed RPC requests
func GetNetworkErrsCounter() uint64 {
return atomic.LoadUint64(&networkErrsCounter)
}
// ResetNetworkErrsCounter resets the number of failed RPC requests
func ResetNetworkErrsCounter() {
atomic.StoreUint64(&networkErrsCounter, 0)
}
// NetworkError - error type in case of errors related to http/transport
// for ex. connection refused, connection reset, dns resolution failure etc.
// All errors returned by storage-rest-server (ex errFileNotFound, errDiskNotFound) are not considered to be network errors.
@@ -217,7 +204,7 @@ type respBodyMonitor struct {
func (r respBodyMonitor) Read(p []byte) (n int, err error) {
n, err = r.ReadCloser.Read(p)
if err != nil && err != io.EOF {
atomic.AddUint64(&networkErrsCounter, 1)
atomic.AddUint64(&globalStats.errs, 1)
}
return
}
@@ -225,7 +212,7 @@ func (r respBodyMonitor) Read(p []byte) (n int, err error) {
func (r respBodyMonitor) Close() (err error) {
err = r.ReadCloser.Close()
if err != nil {
atomic.AddUint64(&networkErrsCounter, 1)
atomic.AddUint64(&globalStats.errs, 1)
}
return
}
@@ -252,11 +239,15 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
if length > 0 {
req.ContentLength = length
}
req, update := setupReqStatsUpdate(req)
defer update()
resp, err := c.httpClient.Do(req)
if err != nil {
if xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
if !c.NoMetrics {
atomic.AddUint64(&networkErrsCounter, 1)
atomic.AddUint64(&globalStats.errs, 1)
}
if c.MarkOffline(err) {
logger.LogOnceIf(ctx, fmt.Errorf("Marking %s offline temporarily; caused by %w", c.url.Host, err), c.url.Host)
@@ -292,7 +283,7 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
if err != nil {
if xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
if !c.NoMetrics {
atomic.AddUint64(&networkErrsCounter, 1)
atomic.AddUint64(&globalStats.errs, 1)
}
if c.MarkOffline(err) {
logger.LogOnceIf(ctx, fmt.Errorf("Marking %s offline temporarily; caused by %w", c.url.Host, err), c.url.Host)