fix: background disk heal, to reload format consistently (#10502)

It was observed in VMware vsphere environment during a pod replacement, `mc admin info` might report incorrect offline nodes for the replaced drive. This issue eventually goes away but requires quite a lot of time for all servers to be in sync. This PR fixes this behavior properly.
2025-11-07 04:42:56 -05:00 · 2020-09-16 21:14:35 -07:00
parent d616d8a857
commit e60834838f
8 changed files with 59 additions and 93 deletions
--- a/cmd/erasure.go
+++ b/cmd/erasure.go
@@ -140,6 +140,7 @@ func getDisksInfo(disks []StorageAPI, endpoints []string) (disksInfo []madmin.Di
 		index := index
 		g.Go(func() error {
 			if disks[index] == OfflineDisk {
+				logger.LogIf(GlobalContext, fmt.Errorf("%s: %s", errDiskNotFound, endpoints[index]))
 				disksInfo[index] = madmin.Disk{
 					State:    diskErrToDriveState(errDiskNotFound),
 					Endpoint: endpoints[index],
@@ -149,11 +150,9 @@ func getDisksInfo(disks []StorageAPI, endpoints []string) (disksInfo []madmin.Di
 			}
 			info, err := disks[index].DiskInfo(context.TODO())
 			if err != nil {
-				if !IsErr(err, baseErrs...) {
-					reqInfo := (&logger.ReqInfo{}).AppendTags("disk", disks[index].String())
-					ctx := logger.SetReqInfo(GlobalContext, reqInfo)
-					logger.LogIf(ctx, err)
-				}
+				reqInfo := (&logger.ReqInfo{}).AppendTags("disk", disks[index].String())
+				ctx := logger.SetReqInfo(GlobalContext, reqInfo)
+				logger.LogIf(ctx, err)
 				disksInfo[index] = madmin.Disk{
 					State:    diskErrToDriveState(err),
 					Endpoint: endpoints[index],