mirror of
https://github.com/minio/minio.git
synced 2025-01-26 14:13:16 -05:00
rest: healthcheck should not update failure metrics (#12458)
Otherwise, we can see high numbers of networking issues when a node is down.
This commit is contained in:
parent
9a2102f5ed
commit
6c8be64cdb
@ -161,6 +161,7 @@ func newlockRESTClient(endpoint Endpoint) *lockRESTClient {
|
||||
// Use a separate client to avoid recursive calls.
|
||||
healthClient := rest.NewClient(serverURL, globalInternodeTransport, newAuthToken)
|
||||
healthClient.ExpectTimeouts = true
|
||||
healthClient.NoMetrics = true
|
||||
restClient.HealthCheckFn = func() bool {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), restClient.HealthCheckTimeout)
|
||||
defer cancel()
|
||||
|
@ -919,6 +919,7 @@ func newPeerRESTClient(peer *xnet.Host) *peerRESTClient {
|
||||
// Use a separate client to avoid recursive calls.
|
||||
healthClient := rest.NewClient(serverURL, globalInternodeTransport, newAuthToken)
|
||||
healthClient.ExpectTimeouts = true
|
||||
healthClient.NoMetrics = true
|
||||
|
||||
// Construct a new health function.
|
||||
restClient.HealthCheckFn = func() bool {
|
||||
|
@ -704,6 +704,7 @@ func newStorageRESTClient(endpoint Endpoint, healthcheck bool) *storageRESTClien
|
||||
// Use a separate client to avoid recursive calls.
|
||||
healthClient := rest.NewClient(serverURL, globalInternodeTransport, newAuthToken)
|
||||
healthClient.ExpectTimeouts = true
|
||||
healthClient.NoMetrics = true
|
||||
restClient.HealthCheckFn = func() bool {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), restClient.HealthCheckTimeout)
|
||||
defer cancel()
|
||||
|
@ -99,6 +99,9 @@ type Client struct {
|
||||
// This will not mark the client offline in these cases.
|
||||
ExpectTimeouts bool
|
||||
|
||||
// Avoid metrics update if set to true
|
||||
NoMetrics bool
|
||||
|
||||
httpClient *http.Client
|
||||
url *url.URL
|
||||
newAuthToken func(audience string) string
|
||||
@ -136,8 +139,10 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
|
||||
}
|
||||
resp, err := c.httpClient.Do(req)
|
||||
if err != nil {
|
||||
if c.HealthCheckFn != nil && xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
|
||||
atomic.AddUint64(&networkErrsCounter, 1)
|
||||
if xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
|
||||
if !c.NoMetrics {
|
||||
atomic.AddUint64(&networkErrsCounter, 1)
|
||||
}
|
||||
if c.MarkOffline() {
|
||||
logger.LogIf(ctx, fmt.Errorf("Marking %s temporary offline; caused by %w", c.url.String(), err))
|
||||
}
|
||||
@ -169,7 +174,10 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
|
||||
// Limit the ReadAll(), just in case, because of a bug, the server responds with large data.
|
||||
b, err := ioutil.ReadAll(io.LimitReader(resp.Body, c.MaxErrResponseSize))
|
||||
if err != nil {
|
||||
if c.HealthCheckFn != nil && xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
|
||||
if xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
|
||||
if !c.NoMetrics {
|
||||
atomic.AddUint64(&networkErrsCounter, 1)
|
||||
}
|
||||
if c.MarkOffline() {
|
||||
logger.LogIf(ctx, fmt.Errorf("Marking %s temporary offline; caused by %w", c.url.String(), err))
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user