Heal only when atleast one of the disk is unformatted (#5866)

Current healing has an issue when disks are healed
even when they are offline without knowing if disk
is unformatted. This can lead to issues of pre-maturely
removing the disk from the set just because it was
temporarily offline.

There is an increasing number of `mc admin heal` usage
on a cron or regular basis. It is possible that if healing
code saw disk is offline it might prematurely take it down,
this causes availability issues.

Fixes #5826
This commit is contained in:
Harshavardhana 2018-04-30 20:37:39 -07:00 committed by Nitish Tiwari
parent e43cd0d5c8
commit 5f9041571f
2 changed files with 12 additions and 10 deletions

View File

@ -288,10 +288,10 @@ func formatXLMigrateV2ToV3(export string) error {
return ioutil.WriteFile(formatPath, b, 0644) return ioutil.WriteFile(formatPath, b, 0644)
} }
// Returns true, if one of the errors is non-nil. // Returns true, if one of the errors is non-nil and is Unformatted disk.
func hasAnyErrors(errs []error) bool { func hasAnyErrorsUnformatted(errs []error) bool {
for _, err := range errs { for _, err := range errs {
if err != nil { if err != nil && err == errUnformattedDisk {
return true return true
} }
} }

View File

@ -998,8 +998,9 @@ func (s *xlSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) {
return nil return nil
} }
// HealFormat - heals missing `format.json` on freshly or corrupted // HealFormat - heals missing `format.json` on fresh unformatted disks.
// disks (missing format.json but does have erasure coded data in it). // TODO: In future support corrupted disks missing format.json but has erasure
// coded data in it.
func (s *xlSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealResultItem, err error) { func (s *xlSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealResultItem, err error) {
// Acquire lock on format.json // Acquire lock on format.json
formatLock := s.getHashedSet(formatConfigFile).nsMutex.NewNSLock(minioMetaBucket, formatConfigFile) formatLock := s.getHashedSet(formatConfigFile).nsMutex.NewNSLock(minioMetaBucket, formatConfigFile)
@ -1051,11 +1052,6 @@ func (s *xlSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealRe
} }
} }
// no errors found, no healing is required.
if !hasAnyErrors(sErrs) {
return res, errNoHealRequired
}
for index, sErr := range sErrs { for index, sErr := range sErrs {
if sErr != nil { if sErr != nil {
// Look for acceptable heal errors, for any other // Look for acceptable heal errors, for any other
@ -1066,6 +1062,12 @@ func (s *xlSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealRe
} }
} }
if !hasAnyErrorsUnformatted(sErrs) {
// No unformatted disks found disks are either offline
// or online, no healing is required.
return res, errNoHealRequired
}
// All disks are unformatted, return quorum error. // All disks are unformatted, return quorum error.
if shouldInitXLDisks(sErrs) { if shouldInitXLDisks(sErrs) {
return res, errXLReadQuorum return res, errXLReadQuorum