fix: make sure to correctly initialize health checks (#17765)

health checks were missing for drives replaced since

- HealFormat() would replace the drives without a health check
- disconnected drives when they reconnect via connectEndpoint()
  the loop also loses health checks for local disks and merges
  these into a single code.
- other than this separate cleanUp, health check variables to avoid
  overloading them with similar requirements.
- also ensure that we compete via context selector for disk monitoring
  such that the canceled disks don't linger around longer waiting for
  the ticker to trigger.
- allow disabling active monitoring.
This commit is contained in:
Harshavardhana
2023-08-01 10:54:26 -07:00
committed by GitHub
parent 004f1e2f66
commit b0f0e53bba
11 changed files with 83 additions and 45 deletions

View File

@@ -58,7 +58,7 @@ type epHealth struct {
}
// isOffline returns current liveness result of remote target. Add endpoint to
// healthcheck map if missing and default to online status
// healthCheck map if missing and default to online status
func (sys *BucketTargetSys) isOffline(ep *url.URL) bool {
sys.hMutex.RLock()
defer sys.hMutex.RUnlock()
@@ -126,7 +126,7 @@ func (sys *BucketTargetSys) heartBeat(ctx context.Context) {
}
}
// periodically rebuild the healthcheck map from list of targets to clear
// periodically rebuild the healthCheck map from list of targets to clear
// out stale endpoints
func (sys *BucketTargetSys) reloadHealthCheckers(ctx context.Context) {
m := make(map[string]epHealth)
@@ -362,7 +362,7 @@ func NewBucketTargetSys(ctx context.Context) *BucketTargetSys {
hc: make(map[string]epHealth),
hcClient: newHCClient(),
}
// reload healthcheck endpoints map periodically to remove stale endpoints from the map.
// reload healthCheck endpoints map periodically to remove stale endpoints from the map.
go func() {
rTimer := time.NewTimer(defaultHealthCheckReloadDuration)
defer rTimer.Stop()