fix: rootdisk detection by not using cached value when GetDiskInfo() errors out (#15249)

GetDiskInfo() uses timedValue to cache the disk info for one second.

timedValue behavior was recently changed to return an old cached value
when calculating a new value returns an error.

When a mount point is empty, GetDiskInfo() will return errUnformattedDisk,
timedValue will return cached disk info with unexpected IsRootDisk value,
e.g. false if the mount point belongs to a root disk. Therefore, the mount
point will be considered a valid disk and will be formatted as well.

This commit will also add more defensive code when marking root disks:
always mark a disk offline for any GetDiskInfo() error except
errUnformattedDisk. The server will try anyway to reconnect to those
disks every 10 seconds.
This commit is contained in:
Anis Elleuch 2022-07-08 01:05:23 +01:00 committed by GitHub
parent 32b2f6117e
commit ed0cbfb31e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 20 additions and 8 deletions

View File

@ -1165,8 +1165,12 @@ func markRootDisksAsDown(storageDisks []StorageAPI, errs []error) {
// Do nothing
return
}
infos, _ := getHealDiskInfos(storageDisks, errs)
infos, ierrs := getHealDiskInfos(storageDisks, errs)
for i := range storageDisks {
if ierrs[i] != nil && ierrs[i] != errUnformattedDisk {
storageDisks[i] = nil
continue
}
if storageDisks[i] != nil && infos[i].RootDisk {
// We should not heal on root disk. i.e in a situation where the minio-administrator has unmounted a
// defective drive we should not heal a path on the root disk.

View File

@ -232,6 +232,7 @@ type MetricsGroup struct {
// to populate new values upon cache invalidation.
func (g *MetricsGroup) RegisterRead(read func(ctx context.Context) []Metric) {
g.metricsCache.Once.Do(func() {
g.metricsCache.Relax = true
g.metricsCache.TTL = g.cacheInterval
g.metricsCache.Update = func() (interface{}, error) {
return read(GlobalContext), nil

View File

@ -931,6 +931,10 @@ type timedValue struct {
// Should be set before calling Get().
TTL time.Duration
// When set to true, return the last cached value
// even if updating the value errors out
Relax bool
// Once can be used to initialize values for lazy initialization.
// Should be set before calling Get().
Once sync.Once
@ -951,13 +955,16 @@ func (t *timedValue) Get() (interface{}, error) {
v, err := t.Update()
if err != nil {
// if update fails, return current
// cached value along with error.
//
// Let the caller decide if they want
// to use the returned value based
// on error.
v = t.get(0)
if t.Relax {
// if update fails, return current
// cached value along with error.
//
// Let the caller decide if they want
// to use the returned value based
// on error.
v = t.get(0)
return v, err
}
return v, err
}