mirror of
https://github.com/minio/minio.git
synced 2025-11-22 10:37:42 -05:00
XL: Bring in some modularity into format verification and healing. (#1832)
This commit is contained in:
@@ -21,25 +21,239 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/skyrings/skyring-common/tools/uuid"
|
||||
)
|
||||
|
||||
// fsFormat - structure holding 'fs' format.
|
||||
type fsFormat struct {
|
||||
Version string `json:"version"`
|
||||
}
|
||||
|
||||
// xlFormat - structure holding 'xl' format.
|
||||
type xlFormat struct {
|
||||
Version string `json:"version"`
|
||||
Disk string `json:"disk"`
|
||||
JBOD []string `json:"jbod"`
|
||||
Version string `json:"version"` // Version of 'xl' format.
|
||||
Disk string `json:"disk"` // Disk field carries assigned disk uuid.
|
||||
// JBOD field carries the input disk order generated the first
|
||||
// time when fresh disks were supplied.
|
||||
JBOD []string `json:"jbod"`
|
||||
}
|
||||
|
||||
// formatConfigV1 - structure holds format config version '1'.
|
||||
type formatConfigV1 struct {
|
||||
Version string `json:"version"`
|
||||
Format string `json:"format"`
|
||||
FS *fsFormat `json:"fs,omitempty"`
|
||||
XL *xlFormat `json:"xl,omitempty"`
|
||||
Version string `json:"version"` // Version of the format config.
|
||||
// Format indicates the backend format type, supports two values 'xl' and 'fs'.
|
||||
Format string `json:"format"`
|
||||
FS *fsFormat `json:"fs,omitempty"` // FS field holds fs format.
|
||||
XL *xlFormat `json:"xl,omitempty"` // XL field holds xl format.
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
All disks online
|
||||
-----------------
|
||||
- All Unformatted - format all and return success.
|
||||
- Some Unformatted - format all and return success.
|
||||
- Any JBOD inconsistent - return failure // Requires deep inspection, phase2.
|
||||
- Some are corrupt (missing format.json) - return failure // Requires deep inspection, phase2.
|
||||
- Any unrecognized disks - return failure
|
||||
|
||||
Some disks are offline and we have quorum.
|
||||
-----------------
|
||||
- Some unformatted - no heal, return success.
|
||||
- Any JBOD inconsistent - return failure // Requires deep inspection, phase2.
|
||||
- Some are corrupt (missing format.json) - return failure // Requires deep inspection, phase2.
|
||||
- Any unrecognized disks - return failure
|
||||
|
||||
No read quorum
|
||||
-----------------
|
||||
failure for all cases.
|
||||
|
||||
// Pseudo code for managing `format.json`.
|
||||
|
||||
// Generic checks.
|
||||
if (no quorum) return error
|
||||
if (any disk is corrupt) return error // phase2
|
||||
if (jbod inconsistent) return error // phase2
|
||||
if (disks not recognized) // Always error.
|
||||
|
||||
// Specific checks.
|
||||
if (all disks online)
|
||||
if (all disks return format.json)
|
||||
if (jbod consistent)
|
||||
if (all disks recognized)
|
||||
return
|
||||
else
|
||||
if (all disks return format.json not found)
|
||||
(initialize format)
|
||||
return
|
||||
else (some disks return format.json not found)
|
||||
(heal format)
|
||||
return
|
||||
fi
|
||||
fi
|
||||
else // No healing at this point forward, some disks are offline or dead.
|
||||
if (some disks return format.json not found)
|
||||
if (with force)
|
||||
// Offline disks are marked as dead.
|
||||
(heal format) // Offline disks should be marked as dead.
|
||||
return success
|
||||
else (without force)
|
||||
// --force is necessary to heal few drives, because some drives
|
||||
// are offline. Offline disks will be marked as dead.
|
||||
return error
|
||||
fi
|
||||
fi
|
||||
*/
|
||||
|
||||
var errSomeDiskUnformatted = errors.New("some disks are found to be unformatted")
|
||||
var errSomeDiskOffline = errors.New("some disks are offline")
|
||||
|
||||
// Returns error slice into understandable errors.
|
||||
func reduceFormatErrs(errs []error, diskCount int) error {
|
||||
var errUnformattedDiskCount = 0
|
||||
var errDiskNotFoundCount = 0
|
||||
for _, err := range errs {
|
||||
if err == errUnformattedDisk {
|
||||
errUnformattedDiskCount++
|
||||
} else if err == errDiskNotFound {
|
||||
errDiskNotFoundCount++
|
||||
}
|
||||
}
|
||||
// Returns errUnformattedDisk if all disks report unFormattedDisk.
|
||||
if errUnformattedDiskCount == diskCount {
|
||||
return errUnformattedDisk
|
||||
} else if errUnformattedDiskCount < diskCount && errDiskNotFoundCount == 0 {
|
||||
// Only some disks return unFormattedDisk and all disks are online.
|
||||
return errSomeDiskUnformatted
|
||||
} else if errUnformattedDiskCount < diskCount && errDiskNotFoundCount > 0 {
|
||||
// Only some disks return unFormattedDisk and some disks are
|
||||
// offline as well.
|
||||
return errSomeDiskOffline
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// loadAllFormats - load all format config from all input disks in parallel.
|
||||
func loadAllFormats(bootstrapDisks []StorageAPI) ([]*formatConfigV1, []error) {
|
||||
// Initialize sync waitgroup.
|
||||
var wg = &sync.WaitGroup{}
|
||||
|
||||
// Initialize list of errors.
|
||||
var sErrs = make([]error, len(bootstrapDisks))
|
||||
|
||||
// Initialize format configs.
|
||||
var formatConfigs = make([]*formatConfigV1, len(bootstrapDisks))
|
||||
|
||||
// Make a volume entry on all underlying storage disks.
|
||||
for index, disk := range bootstrapDisks {
|
||||
wg.Add(1)
|
||||
// Make a volume inside a go-routine.
|
||||
go func(index int, disk StorageAPI) {
|
||||
defer wg.Done()
|
||||
formatConfig, lErr := loadFormat(disk)
|
||||
if lErr != nil {
|
||||
sErrs[index] = lErr
|
||||
return
|
||||
}
|
||||
formatConfigs[index] = formatConfig
|
||||
}(index, disk)
|
||||
}
|
||||
|
||||
// Wait for all make vol to finish.
|
||||
wg.Wait()
|
||||
|
||||
for _, err := range sErrs {
|
||||
if err != nil {
|
||||
// Return all formats and errors.
|
||||
return formatConfigs, sErrs
|
||||
}
|
||||
}
|
||||
// Return all formats and nil
|
||||
return formatConfigs, nil
|
||||
}
|
||||
|
||||
// genericFormatCheck - validates and returns error.
|
||||
// if (no quorum) return error
|
||||
// if (any disk is corrupt) return error // phase2
|
||||
// if (jbod inconsistent) return error // phase2
|
||||
// if (disks not recognized) // Always error.
|
||||
func genericFormatCheck(formatConfigs []*formatConfigV1, sErrs []error) (err error) {
|
||||
// Calculate the errors.
|
||||
var (
|
||||
errCorruptFormatCount = 0
|
||||
errCount = 0
|
||||
)
|
||||
|
||||
// Through all errors calculate the actual errors.
|
||||
for _, lErr := range sErrs {
|
||||
if lErr == nil {
|
||||
continue
|
||||
}
|
||||
// These errors are good conditions, means disk is online.
|
||||
if lErr == errUnformattedDisk || lErr == errVolumeNotFound {
|
||||
continue
|
||||
}
|
||||
if lErr == errCorruptedFormat {
|
||||
errCorruptFormatCount++
|
||||
} else {
|
||||
errCount++
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate read quorum.
|
||||
readQuorum := len(formatConfigs)/2 + 1
|
||||
|
||||
// Validate the err count under tolerant limit.
|
||||
if errCount > len(formatConfigs)-readQuorum {
|
||||
return errXLReadQuorum
|
||||
}
|
||||
|
||||
// One of the disk has corrupt format, return error.
|
||||
if errCorruptFormatCount > 0 {
|
||||
return errCorruptedFormat
|
||||
}
|
||||
|
||||
// Validates if format and JBOD are consistent across all disks.
|
||||
if err = checkFormatXL(formatConfigs); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Success..
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkDisksConsistency - checks if all disks are consistent with all JBOD entries on all disks.
|
||||
func checkDisksConsistency(formatConfigs []*formatConfigV1) error {
|
||||
var disks = make([]string, len(formatConfigs))
|
||||
var disksFound = make(map[string]bool)
|
||||
// Collect currently available disk uuids.
|
||||
for index, formatConfig := range formatConfigs {
|
||||
if formatConfig == nil {
|
||||
continue
|
||||
}
|
||||
disks[index] = formatConfig.XL.Disk
|
||||
}
|
||||
// Validate collected uuids and verify JBOD.
|
||||
for index, uuid := range disks {
|
||||
if uuid == "" {
|
||||
continue
|
||||
}
|
||||
var formatConfig = formatConfigs[index]
|
||||
for _, savedUUID := range formatConfig.XL.JBOD {
|
||||
if savedUUID == uuid {
|
||||
disksFound[uuid] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
// Check if all disks are found.
|
||||
for _, value := range disksFound {
|
||||
if !value {
|
||||
return errors.New("Some disks not found in JBOD.")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkJBODConsistency - validate xl jbod order if they are consistent.
|
||||
@@ -60,7 +274,7 @@ func checkJBODConsistency(formatConfigs []*formatConfigV1) error {
|
||||
}
|
||||
savedJBODStr := strings.Join(format.XL.JBOD, ".")
|
||||
if jbodStr != savedJBODStr {
|
||||
return errors.New("Inconsistent disks.")
|
||||
return errors.New("Inconsistent JBOD found.")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -87,10 +301,8 @@ func reorderDisks(bootstrapDisks []StorageAPI, formatConfigs []*formatConfigV1)
|
||||
}
|
||||
// Pick the first JBOD list to verify the order and construct new set of disk slice.
|
||||
var newDisks = make([]StorageAPI, len(bootstrapDisks))
|
||||
var unclaimedJBODIndex = make(map[int]struct{})
|
||||
for fIndex, format := range formatConfigs {
|
||||
if format == nil {
|
||||
unclaimedJBODIndex[fIndex] = struct{}{}
|
||||
continue
|
||||
}
|
||||
jIndex := findIndex(format.XL.Disk, savedJBOD)
|
||||
@@ -99,17 +311,6 @@ func reorderDisks(bootstrapDisks []StorageAPI, formatConfigs []*formatConfigV1)
|
||||
}
|
||||
newDisks[jIndex] = bootstrapDisks[fIndex]
|
||||
}
|
||||
// Save the unclaimed jbods as well.
|
||||
for index, disk := range newDisks {
|
||||
if disk == nil {
|
||||
for fIndex := range unclaimedJBODIndex {
|
||||
newDisks[index] = bootstrapDisks[fIndex]
|
||||
delete(unclaimedJBODIndex, fIndex)
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
return newDisks, nil
|
||||
}
|
||||
|
||||
@@ -146,83 +347,104 @@ func loadFormat(disk StorageAPI) (format *formatConfigV1, err error) {
|
||||
// Heals any missing format.json on the drives. Returns error only for unexpected errors
|
||||
// as regular errors can be ignored since there might be enough quorum to be operational.
|
||||
func healFormatXL(bootstrapDisks []StorageAPI) error {
|
||||
needHeal := make([]bool, len(bootstrapDisks)) // Slice indicating which drives needs healing.
|
||||
|
||||
formatConfigs := make([]*formatConfigV1, len(bootstrapDisks))
|
||||
var referenceConfig *formatConfigV1
|
||||
successCount := 0 // Tracks if we have successfully loaded all `format.json` from all disks.
|
||||
formatNotFoundCount := 0 // Tracks if we `format.json` is not found on all disks.
|
||||
// Loads `format.json` from all disks.
|
||||
for index, disk := range bootstrapDisks {
|
||||
formatXL, err := loadFormat(disk)
|
||||
if err != nil {
|
||||
if err == errUnformattedDisk {
|
||||
// format.json is missing, should be healed.
|
||||
needHeal[index] = true
|
||||
formatNotFoundCount++
|
||||
continue
|
||||
} else if err == errDiskNotFound { // Is a valid case we
|
||||
// can proceed without healing.
|
||||
return nil
|
||||
}
|
||||
// Return error for unsupported errors.
|
||||
return err
|
||||
} // Success.
|
||||
formatConfigs[index] = formatXL
|
||||
successCount++
|
||||
}
|
||||
// All `format.json` has been read successfully, previously completed.
|
||||
if successCount == len(bootstrapDisks) {
|
||||
// Return success.
|
||||
return nil
|
||||
}
|
||||
// All disks are fresh, format.json will be written by initFormatXL()
|
||||
if formatNotFoundCount == len(bootstrapDisks) {
|
||||
return initFormatXL(bootstrapDisks)
|
||||
}
|
||||
// Validate format configs for consistency in JBOD and disks.
|
||||
if err := checkFormatXL(formatConfigs); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if referenceConfig == nil {
|
||||
// This config will be used to update the drives missing format.json.
|
||||
for _, formatConfig := range formatConfigs {
|
||||
if formatConfig == nil {
|
||||
continue
|
||||
}
|
||||
referenceConfig = formatConfig
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
uuidUsage := make([]struct {
|
||||
uuid string // Disk uuid
|
||||
inuse bool // indicates if the uuid is used by any disk
|
||||
inUse bool // indicates if the uuid is used by
|
||||
// any disk
|
||||
}, len(bootstrapDisks))
|
||||
|
||||
needHeal := make([]bool, len(bootstrapDisks)) // Slice indicating which drives needs healing.
|
||||
|
||||
// Returns any unused drive UUID.
|
||||
getUnusedUUID := func() string {
|
||||
for index := range uuidUsage {
|
||||
if !uuidUsage[index].inuse {
|
||||
uuidUsage[index].inuse = true
|
||||
if !uuidUsage[index].inUse {
|
||||
uuidUsage[index].inUse = true
|
||||
return uuidUsage[index].uuid
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
formatConfigs := make([]*formatConfigV1, len(bootstrapDisks))
|
||||
var referenceConfig *formatConfigV1
|
||||
for index, disk := range bootstrapDisks {
|
||||
formatXL, err := loadFormat(disk)
|
||||
if err == errUnformattedDisk {
|
||||
// format.json is missing, should be healed.
|
||||
needHeal[index] = true
|
||||
continue
|
||||
}
|
||||
if err == nil {
|
||||
if referenceConfig == nil {
|
||||
// this config will be used to update the drives missing format.json
|
||||
referenceConfig = formatXL
|
||||
}
|
||||
formatConfigs[index] = formatXL
|
||||
} else {
|
||||
// Abort format.json healing if any one of the drives is not available because we don't
|
||||
// know if that drive is down permanently or temporarily. So we don't want to reuse
|
||||
// its uuid for any other disks.
|
||||
// Return nil so that operations can continue if quorum is available.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
if referenceConfig == nil {
|
||||
// All disks are fresh, format.json will be written by initFormatXL()
|
||||
return nil
|
||||
}
|
||||
|
||||
// From reference config update UUID's not be in use.
|
||||
for index, diskUUID := range referenceConfig.XL.JBOD {
|
||||
uuidUsage[index].uuid = diskUUID
|
||||
uuidUsage[index].inuse = false
|
||||
uuidUsage[index].inUse = false
|
||||
}
|
||||
|
||||
// For all config formats validate if they are in use and update
|
||||
// the uuidUsage values.
|
||||
// For all config formats validate if they are in use and
|
||||
// update the uuidUsage values.
|
||||
for _, config := range formatConfigs {
|
||||
if config == nil {
|
||||
continue
|
||||
}
|
||||
for index := range uuidUsage {
|
||||
if config.XL.Disk == uuidUsage[index].uuid {
|
||||
uuidUsage[index].inuse = true
|
||||
uuidUsage[index].inUse = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This section heals the format.json and updates the fresh disks
|
||||
// by reapply the unused UUID's .
|
||||
// by apply a new UUID for all the fresh disks.
|
||||
for index, heal := range needHeal {
|
||||
if !heal {
|
||||
// Previously we detected that heal is not needed on the disk.
|
||||
continue
|
||||
}
|
||||
config := &formatConfigV1{}
|
||||
*config = *referenceConfig
|
||||
config.XL.Disk = getUnusedUUID()
|
||||
if config.XL.Disk == "" {
|
||||
// getUnusedUUID() should have returned an unused uuid, it
|
||||
// getUnusedUUID() should have
|
||||
// returned an unused uuid, it
|
||||
// is an unexpected error.
|
||||
return errUnexpected
|
||||
}
|
||||
@@ -231,10 +453,10 @@ func healFormatXL(bootstrapDisks []StorageAPI) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Fresh disk without format.json
|
||||
_, _ = bootstrapDisks[index].AppendFile(minioMetaBucket, formatConfigFile, formatBytes)
|
||||
// Ignore any error from AppendFile() as quorum might still be there to be operational.
|
||||
// Ignore any error from AppendFile() as
|
||||
// quorum might still be there to be operational.
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -246,12 +468,6 @@ func loadFormatXL(bootstrapDisks []StorageAPI) (disks []StorageAPI, err error) {
|
||||
var diskNotFoundCount = 0
|
||||
formatConfigs := make([]*formatConfigV1, len(bootstrapDisks))
|
||||
|
||||
// Heal missing format.json on the drives.
|
||||
if err = healFormatXL(bootstrapDisks); err != nil {
|
||||
// There was an unexpected unrecoverable error during healing.
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Try to load `format.json` bootstrap disks.
|
||||
for index, disk := range bootstrapDisks {
|
||||
var formatXL *formatConfigV1
|
||||
@@ -277,9 +493,9 @@ func loadFormatXL(bootstrapDisks []StorageAPI) (disks []StorageAPI, err error) {
|
||||
} else if diskNotFoundCount == len(bootstrapDisks) {
|
||||
return nil, errDiskNotFound
|
||||
} else if diskNotFoundCount > len(bootstrapDisks)-(len(bootstrapDisks)/2+1) {
|
||||
return nil, errReadQuorum
|
||||
return nil, errXLReadQuorum
|
||||
} else if unformattedDisksFoundCnt > len(bootstrapDisks)-(len(bootstrapDisks)/2+1) {
|
||||
return nil, errReadQuorum
|
||||
return nil, errXLReadQuorum
|
||||
}
|
||||
|
||||
// Validate the format configs read are correct.
|
||||
@@ -310,7 +526,10 @@ func checkFormatXL(formatConfigs []*formatConfigV1) error {
|
||||
return fmt.Errorf("Number of disks %d did not match the backend format %d", len(formatConfigs), len(formatXL.XL.JBOD))
|
||||
}
|
||||
}
|
||||
return checkJBODConsistency(formatConfigs)
|
||||
if err := checkJBODConsistency(formatConfigs); err != nil {
|
||||
return err
|
||||
}
|
||||
return checkDisksConsistency(formatConfigs)
|
||||
}
|
||||
|
||||
// initFormatXL - save XL format configuration on all disks.
|
||||
@@ -328,7 +547,7 @@ func initFormatXL(storageDisks []StorageAPI) (err error) {
|
||||
if saveFormatErrCnt <= len(storageDisks)-(len(storageDisks)/2+3) {
|
||||
continue
|
||||
}
|
||||
return errWriteQuorum
|
||||
return errXLWriteQuorum
|
||||
}
|
||||
}
|
||||
var u *uuid.UUID
|
||||
|
||||
Reference in New Issue
Block a user