fix: make sure to correctly initialize health checks (#17765)

health checks were missing for drives replaced since

- HealFormat() would replace the drives without a health check
- disconnected drives when they reconnect via connectEndpoint()
  the loop also loses health checks for local disks and merges
  these into a single code.
- other than this separate cleanUp, health check variables to avoid
  overloading them with similar requirements.
- also ensure that we compete via context selector for disk monitoring
  such that the canceled disks don't linger around longer waiting for
  the ticker to trigger.
- allow disabling active monitoring.
This commit is contained in:
Harshavardhana
2023-08-01 10:54:26 -07:00
committed by GitHub
parent 004f1e2f66
commit b0f0e53bba
11 changed files with 83 additions and 45 deletions

View File

@@ -116,7 +116,10 @@ func (s *erasureSets) getDiskMap() map[Endpoint]StorageAPI {
// Initializes a new StorageAPI from the endpoint argument, returns
// StorageAPI and also `format` which exists on the disk.
func connectEndpoint(endpoint Endpoint) (StorageAPI, *formatErasureV3, error) {
disk, err := newStorageAPI(endpoint, false)
disk, err := newStorageAPI(endpoint, storageOpts{
cleanUp: false,
healthCheck: false,
})
if err != nil {
return nil, nil, err
}
@@ -132,6 +135,15 @@ func connectEndpoint(endpoint Endpoint) (StorageAPI, *formatErasureV3, error) {
return nil, nil, fmt.Errorf("Drive: %s returned %w", disk, err) // make sure to '%w' to wrap the error
}
disk.Close()
disk, err = newStorageAPI(endpoint, storageOpts{
cleanUp: true,
healthCheck: true,
})
if err != nil {
return nil, nil, err
}
return disk, format, nil
}
@@ -241,21 +253,10 @@ func (s *erasureSets) connectDisks() {
}
s.erasureDisks[setIndex][diskIndex].Close()
}
if disk.IsLocal() {
disk.SetDiskID(format.Erasure.This)
s.erasureDisks[setIndex][diskIndex] = disk
} else {
// Enable healthcheck disk for remote endpoint.
disk, err = newStorageAPI(endpoint, true)
if err != nil {
printEndpointError(endpoint, err, false)
s.erasureDisksMu.Unlock()
return
}
disk.SetDiskID(format.Erasure.This)
s.erasureDisks[setIndex][diskIndex] = disk
}
disk.SetDiskID(format.Erasure.This)
disk.SetDiskLoc(s.poolIndex, setIndex, diskIndex)
s.erasureDisks[setIndex][diskIndex] = disk
s.erasureDisksMu.Unlock()
}(endpoint)
}
@@ -1055,7 +1056,10 @@ func markRootDisksAsDown(storageDisks []StorageAPI, errs []error) {
// HealFormat - heals missing `format.json` on fresh unformatted disks.
func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealResultItem, err error) {
storageDisks, _ := initStorageDisksWithErrors(s.endpoints.Endpoints, false)
storageDisks, _ := initStorageDisksWithErrors(s.endpoints.Endpoints, storageOpts{
cleanUp: true,
healthCheck: true,
})
defer func(storageDisks []StorageAPI) {
if err != nil {