MRF: Better detection of non stable disks (#12252)

MRF does not detect when a node is disconnected and reconnected quickly
this change will ensure that MRF is alerted by comparing the last disk
reconnection timestamp with the last MRF check time.

Signed-off-by: Anis Elleuch <anis@min.io>

Co-authored-by: Klaus Post <klauspost@gmail.com>
This commit is contained in:
Anis Elleuch
2021-05-11 17:19:15 +01:00
committed by GitHub
parent e84f533c6c
commit 56d4d7b8b1
7 changed files with 48 additions and 6 deletions

View File

@@ -75,6 +75,8 @@ func (n *NetworkError) Unwrap() error {
// Client - http based RPC client.
type Client struct {
connected int32 // ref: https://golang.org/pkg/sync/atomic/#pkg-note-BUG
_ int32 // For 64 bits alignment
lastConn int64
// HealthCheckFn is the function set to test for health.
// If not set the client will not keep track of health.
@@ -196,6 +198,7 @@ func NewClient(url *url.URL, tr http.RoundTripper, newAuthToken func(aud string)
url: url,
newAuthToken: newAuthToken,
connected: online,
lastConn: time.Now().UnixNano(),
MaxErrResponseSize: 4096,
HealthCheckInterval: 200 * time.Millisecond,
HealthCheckTimeout: time.Second,
@@ -207,6 +210,11 @@ func (c *Client) IsOnline() bool {
return atomic.LoadInt32(&c.connected) == online
}
// LastConn returns when the disk was (re-)connected
func (c *Client) LastConn() time.Time {
return time.Unix(0, atomic.LoadInt64(&c.lastConn))
}
// MarkOffline - will mark a client as being offline and spawns
// a goroutine that will attempt to reconnect if HealthCheckFn is set.
// returns true if the node changed state from online to offline
@@ -223,6 +231,7 @@ func (c *Client) MarkOffline() bool {
if c.HealthCheckFn() {
if atomic.CompareAndSwapInt32(&c.connected, offline, online) {
logger.Info("Client %s online", c.url.String())
atomic.StoreInt64(&c.lastConn, time.Now().UnixNano())
}
return
}