mirror of
https://github.com/minio/minio.git
synced 2025-04-05 20:30:32 -04:00
rest: healthcheck should not update failure metrics (#12458)
Otherwise, we can see high numbers of networking issues when a node is down.
This commit is contained in:
parent
9a2102f5ed
commit
6c8be64cdb
@ -161,6 +161,7 @@ func newlockRESTClient(endpoint Endpoint) *lockRESTClient {
|
|||||||
// Use a separate client to avoid recursive calls.
|
// Use a separate client to avoid recursive calls.
|
||||||
healthClient := rest.NewClient(serverURL, globalInternodeTransport, newAuthToken)
|
healthClient := rest.NewClient(serverURL, globalInternodeTransport, newAuthToken)
|
||||||
healthClient.ExpectTimeouts = true
|
healthClient.ExpectTimeouts = true
|
||||||
|
healthClient.NoMetrics = true
|
||||||
restClient.HealthCheckFn = func() bool {
|
restClient.HealthCheckFn = func() bool {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), restClient.HealthCheckTimeout)
|
ctx, cancel := context.WithTimeout(context.Background(), restClient.HealthCheckTimeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
@ -919,6 +919,7 @@ func newPeerRESTClient(peer *xnet.Host) *peerRESTClient {
|
|||||||
// Use a separate client to avoid recursive calls.
|
// Use a separate client to avoid recursive calls.
|
||||||
healthClient := rest.NewClient(serverURL, globalInternodeTransport, newAuthToken)
|
healthClient := rest.NewClient(serverURL, globalInternodeTransport, newAuthToken)
|
||||||
healthClient.ExpectTimeouts = true
|
healthClient.ExpectTimeouts = true
|
||||||
|
healthClient.NoMetrics = true
|
||||||
|
|
||||||
// Construct a new health function.
|
// Construct a new health function.
|
||||||
restClient.HealthCheckFn = func() bool {
|
restClient.HealthCheckFn = func() bool {
|
||||||
|
@ -704,6 +704,7 @@ func newStorageRESTClient(endpoint Endpoint, healthcheck bool) *storageRESTClien
|
|||||||
// Use a separate client to avoid recursive calls.
|
// Use a separate client to avoid recursive calls.
|
||||||
healthClient := rest.NewClient(serverURL, globalInternodeTransport, newAuthToken)
|
healthClient := rest.NewClient(serverURL, globalInternodeTransport, newAuthToken)
|
||||||
healthClient.ExpectTimeouts = true
|
healthClient.ExpectTimeouts = true
|
||||||
|
healthClient.NoMetrics = true
|
||||||
restClient.HealthCheckFn = func() bool {
|
restClient.HealthCheckFn = func() bool {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), restClient.HealthCheckTimeout)
|
ctx, cancel := context.WithTimeout(context.Background(), restClient.HealthCheckTimeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
@ -99,6 +99,9 @@ type Client struct {
|
|||||||
// This will not mark the client offline in these cases.
|
// This will not mark the client offline in these cases.
|
||||||
ExpectTimeouts bool
|
ExpectTimeouts bool
|
||||||
|
|
||||||
|
// Avoid metrics update if set to true
|
||||||
|
NoMetrics bool
|
||||||
|
|
||||||
httpClient *http.Client
|
httpClient *http.Client
|
||||||
url *url.URL
|
url *url.URL
|
||||||
newAuthToken func(audience string) string
|
newAuthToken func(audience string) string
|
||||||
@ -136,8 +139,10 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
|
|||||||
}
|
}
|
||||||
resp, err := c.httpClient.Do(req)
|
resp, err := c.httpClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if c.HealthCheckFn != nil && xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
|
if xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
|
||||||
|
if !c.NoMetrics {
|
||||||
atomic.AddUint64(&networkErrsCounter, 1)
|
atomic.AddUint64(&networkErrsCounter, 1)
|
||||||
|
}
|
||||||
if c.MarkOffline() {
|
if c.MarkOffline() {
|
||||||
logger.LogIf(ctx, fmt.Errorf("Marking %s temporary offline; caused by %w", c.url.String(), err))
|
logger.LogIf(ctx, fmt.Errorf("Marking %s temporary offline; caused by %w", c.url.String(), err))
|
||||||
}
|
}
|
||||||
@ -169,7 +174,10 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
|
|||||||
// Limit the ReadAll(), just in case, because of a bug, the server responds with large data.
|
// Limit the ReadAll(), just in case, because of a bug, the server responds with large data.
|
||||||
b, err := ioutil.ReadAll(io.LimitReader(resp.Body, c.MaxErrResponseSize))
|
b, err := ioutil.ReadAll(io.LimitReader(resp.Body, c.MaxErrResponseSize))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if c.HealthCheckFn != nil && xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
|
if xnet.IsNetworkOrHostDown(err, c.ExpectTimeouts) {
|
||||||
|
if !c.NoMetrics {
|
||||||
|
atomic.AddUint64(&networkErrsCounter, 1)
|
||||||
|
}
|
||||||
if c.MarkOffline() {
|
if c.MarkOffline() {
|
||||||
logger.LogIf(ctx, fmt.Errorf("Marking %s temporary offline; caused by %w", c.url.String(), err))
|
logger.LogIf(ctx, fmt.Errorf("Marking %s temporary offline; caused by %w", c.url.String(), err))
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user