fix: background disk heal, to reload format consistently (#10502)

It was observed in VMware vsphere environment during a
pod replacement, `mc admin info` might report incorrect
offline nodes for the replaced drive. This issue eventually
goes away but requires quite a lot of time for all servers
to be in sync.

This PR fixes this behavior properly.
This commit is contained in:
Harshavardhana
2020-09-16 21:14:35 -07:00
committed by GitHub
parent d616d8a857
commit e60834838f
8 changed files with 59 additions and 93 deletions

View File

@@ -21,7 +21,6 @@ import (
"path"
"time"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/madmin"
)
@@ -129,24 +128,5 @@ func healDiskFormat(ctx context.Context, objAPI ObjectLayer, opts madmin.HealOpt
return madmin.HealResultItem{}, err
}
// Healing succeeded notify the peers to reload format and re-initialize disks.
// We will not notify peers if healing is not required.
if err == nil {
// Notify servers in background and retry if needed.
go func() {
retry:
for _, nerr := range globalNotificationSys.ReloadFormat(opts.DryRun) {
if nerr.Err != nil {
if nerr.Err.Error() == errServerNotInitialized.Error() {
time.Sleep(time.Second)
goto retry
}
logger.GetReqInfo(ctx).SetTags("peerAddress", nerr.Host.String())
logger.LogIf(ctx, nerr.Err)
}
}
}()
}
return res, nil
}