mirror of
https://github.com/minio/minio.git
synced 2025-11-09 21:49:46 -05:00
Add RPC tcp timeout/errs and AVG duration to prometheus (#15747)
This commit is contained in:
@@ -46,19 +46,6 @@ const (
|
||||
closed
|
||||
)
|
||||
|
||||
// Hold the number of failed RPC calls due to networking errors
|
||||
var networkErrsCounter uint64
|
||||
|
||||
// GetNetworkErrsCounter returns the number of failed RPC requests
|
||||
func GetNetworkErrsCounter() uint64 {
|
||||
return atomic.LoadUint64(&networkErrsCounter)
|
||||
}
|
||||
|
||||
// ResetNetworkErrsCounter resets the number of failed RPC requests
|
||||
func ResetNetworkErrsCounter() {
|
||||
atomic.StoreUint64(&networkErrsCounter, 0)
|
||||
}
|
||||
|
||||
// NetworkError - error type in case of errors related to http/transport
|
||||
// for ex. connection refused, connection reset, dns resolution failure etc.
|
||||
// All errors returned by storage-rest-server (ex errFileNotFound, errDiskNotFound) are not considered to be network errors.
|
||||
@@ -217,7 +204,7 @@ type respBodyMonitor struct {
|
||||
func (r respBodyMonitor) Read(p []byte) (n int, err error) {
|
||||
n, err = r.ReadCloser.Read(p)
|
||||
if err != nil && err != io.EOF {
|
||||
atomic.AddUint64(&networkErrsCounter, 1)
|
||||
atomic.AddUint64(&globalStats.errs, 1)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -225,7 +212,7 @@ func (r respBodyMonitor) Read(p []byte) (n int, err error) {
|
||||
func (r respBodyMonitor) Close() (err error) {
|
||||
err = r.ReadCloser.Close()
|
||||
if err != nil {
|
||||
atomic.AddUint64(&networkErrsCounter, 1)
|
||||
atomic.AddUint64(&globalStats.errs, 1)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -252,11 +239,15 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
|
||||
if length > 0 {
|
||||
req.ContentLength = length
|
||||
}
|
||||
|
||||
req, update := setupReqStatsUpdate(req)
|
||||
defer update()
|
||||
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
if xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
|
||||
if !c.NoMetrics {
|
||||
atomic.AddUint64(&networkErrsCounter, 1)
|
||||
atomic.AddUint64(&globalStats.errs, 1)
|
||||
}
|
||||
if c.MarkOffline(err) {
|
||||
logger.LogOnceIf(ctx, fmt.Errorf("Marking %s offline temporarily; caused by %w", c.url.Host, err), c.url.Host)
|
||||
@@ -292,7 +283,7 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
|
||||
if err != nil {
|
||||
if xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
|
||||
if !c.NoMetrics {
|
||||
atomic.AddUint64(&networkErrsCounter, 1)
|
||||
atomic.AddUint64(&globalStats.errs, 1)
|
||||
}
|
||||
if c.MarkOffline(err) {
|
||||
logger.LogOnceIf(ctx, fmt.Errorf("Marking %s offline temporarily; caused by %w", c.url.Host, err), c.url.Host)
|
||||
|
||||
80
internal/rest/rpc-stats.go
Normal file
80
internal/rest/rpc-stats.go
Normal file
@@ -0,0 +1,80 @@
|
||||
// Copyright (c) 2015-2022 MinIO, Inc.
|
||||
//
|
||||
// This file is part of MinIO Object Storage stack
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package rest
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptrace"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
var globalStats = struct {
|
||||
errs uint64
|
||||
|
||||
tcpDialErrs uint64
|
||||
tcpDialCount uint64
|
||||
tcpDialTotalDur uint64
|
||||
}{}
|
||||
|
||||
// RPCStats holds information about the DHCP/TCP metrics and errors
|
||||
type RPCStats struct {
|
||||
Errs uint64
|
||||
|
||||
DialAvgDuration uint64
|
||||
DialErrs uint64
|
||||
}
|
||||
|
||||
// GetRPCStats returns RPC stats, include calls errors and dhcp/tcp metrics
|
||||
func GetRPCStats() RPCStats {
|
||||
s := RPCStats{
|
||||
Errs: atomic.LoadUint64(&globalStats.errs),
|
||||
DialErrs: atomic.LoadUint64(&globalStats.tcpDialErrs),
|
||||
}
|
||||
if v := atomic.LoadUint64(&globalStats.tcpDialCount); v > 0 {
|
||||
s.DialAvgDuration = atomic.LoadUint64(&globalStats.tcpDialTotalDur) / v
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Return a function which update the global stats related to tcp connections
|
||||
func setupReqStatsUpdate(req *http.Request) (*http.Request, func()) {
|
||||
var dialStart, dialEnd time.Time
|
||||
|
||||
trace := &httptrace.ClientTrace{
|
||||
ConnectStart: func(network, addr string) {
|
||||
dialStart = time.Now()
|
||||
},
|
||||
ConnectDone: func(network, addr string, err error) {
|
||||
if err == nil {
|
||||
dialEnd = time.Now()
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
return req.WithContext(httptrace.WithClientTrace(req.Context(), trace)), func() {
|
||||
if !dialStart.IsZero() {
|
||||
if dialEnd.IsZero() {
|
||||
atomic.AddUint64(&globalStats.tcpDialErrs, 1)
|
||||
} else {
|
||||
atomic.AddUint64(&globalStats.tcpDialCount, 1)
|
||||
atomic.AddUint64(&globalStats.tcpDialTotalDur, uint64(dialEnd.Sub(dialStart)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user