fix: background disk heal, to reload format consistently (#10502)

It was observed in VMware vsphere environment during a
pod replacement, `mc admin info` might report incorrect
offline nodes for the replaced drive. This issue eventually
goes away but requires quite a lot of time for all servers
to be in sync.

This PR fixes this behavior properly.
This commit is contained in:
Harshavardhana
2020-09-16 21:14:35 -07:00
committed by GitHub
parent d616d8a857
commit e60834838f
8 changed files with 59 additions and 93 deletions

View File

@@ -103,9 +103,6 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.url.String()+method+querySep+values.Encode(), body)
if err != nil {
if xnet.IsNetworkOrHostDown(err) {
c.MarkOffline()
}
return nil, &NetworkError{err}
}
req.Header.Set("Authorization", "Bearer "+c.newAuthToken(req.URL.Query().Encode()))
@@ -173,7 +170,6 @@ func NewClient(url *url.URL, newCustomTransport func() *http.Transport, newAuthT
url: url,
newAuthToken: newAuthToken,
connected: online,
MaxErrResponseSize: 4096,
HealthCheckInterval: 200 * time.Millisecond,
HealthCheckTimeout: time.Second,
@@ -191,21 +187,18 @@ func (c *Client) MarkOffline() {
// Start goroutine that will attempt to reconnect.
// If server is already trying to reconnect this will have no effect.
if c.HealthCheckFn != nil && atomic.CompareAndSwapInt32(&c.connected, online, offline) {
if c.httpIdleConnsCloser != nil {
c.httpIdleConnsCloser()
}
go func() {
go func(healthFunc func() bool) {
ticker := time.NewTicker(c.HealthCheckInterval)
defer ticker.Stop()
for range ticker.C {
if status := atomic.LoadInt32(&c.connected); status == closed {
if atomic.LoadInt32(&c.connected) == closed {
return
}
if c.HealthCheckFn() {
if healthFunc() {
atomic.CompareAndSwapInt32(&c.connected, offline, online)
return
}
}
}()
}(c.HealthCheckFn)
}
}