XL: Bring in some modularity into format verification and healing. (#1832)

This commit is contained in:
Harshavardhana
2016-06-02 16:34:15 -07:00
parent aa1d769b1e
commit fb95c1fad3
18 changed files with 684 additions and 145 deletions

View File

@@ -21,25 +21,239 @@ import (
"errors"
"fmt"
"strings"
"sync"
"github.com/skyrings/skyring-common/tools/uuid"
)
// fsFormat - structure holding 'fs' format.
type fsFormat struct {
Version string `json:"version"`
}
// xlFormat - structure holding 'xl' format.
type xlFormat struct {
Version string `json:"version"`
Disk string `json:"disk"`
JBOD []string `json:"jbod"`
Version string `json:"version"` // Version of 'xl' format.
Disk string `json:"disk"` // Disk field carries assigned disk uuid.
// JBOD field carries the input disk order generated the first
// time when fresh disks were supplied.
JBOD []string `json:"jbod"`
}
// formatConfigV1 - structure holds format config version '1'.
type formatConfigV1 struct {
Version string `json:"version"`
Format string `json:"format"`
FS *fsFormat `json:"fs,omitempty"`
XL *xlFormat `json:"xl,omitempty"`
Version string `json:"version"` // Version of the format config.
// Format indicates the backend format type, supports two values 'xl' and 'fs'.
Format string `json:"format"`
FS *fsFormat `json:"fs,omitempty"` // FS field holds fs format.
XL *xlFormat `json:"xl,omitempty"` // XL field holds xl format.
}
/*
All disks online
-----------------
- All Unformatted - format all and return success.
- Some Unformatted - format all and return success.
- Any JBOD inconsistent - return failure // Requires deep inspection, phase2.
- Some are corrupt (missing format.json) - return failure // Requires deep inspection, phase2.
- Any unrecognized disks - return failure
Some disks are offline and we have quorum.
-----------------
- Some unformatted - no heal, return success.
- Any JBOD inconsistent - return failure // Requires deep inspection, phase2.
- Some are corrupt (missing format.json) - return failure // Requires deep inspection, phase2.
- Any unrecognized disks - return failure
No read quorum
-----------------
failure for all cases.
// Pseudo code for managing `format.json`.
// Generic checks.
if (no quorum) return error
if (any disk is corrupt) return error // phase2
if (jbod inconsistent) return error // phase2
if (disks not recognized) // Always error.
// Specific checks.
if (all disks online)
if (all disks return format.json)
if (jbod consistent)
if (all disks recognized)
return
else
if (all disks return format.json not found)
(initialize format)
return
else (some disks return format.json not found)
(heal format)
return
fi
fi
else // No healing at this point forward, some disks are offline or dead.
if (some disks return format.json not found)
if (with force)
// Offline disks are marked as dead.
(heal format) // Offline disks should be marked as dead.
return success
else (without force)
// --force is necessary to heal few drives, because some drives
// are offline. Offline disks will be marked as dead.
return error
fi
fi
*/
var errSomeDiskUnformatted = errors.New("some disks are found to be unformatted")
var errSomeDiskOffline = errors.New("some disks are offline")
// Returns error slice into understandable errors.
func reduceFormatErrs(errs []error, diskCount int) error {
var errUnformattedDiskCount = 0
var errDiskNotFoundCount = 0
for _, err := range errs {
if err == errUnformattedDisk {
errUnformattedDiskCount++
} else if err == errDiskNotFound {
errDiskNotFoundCount++
}
}
// Returns errUnformattedDisk if all disks report unFormattedDisk.
if errUnformattedDiskCount == diskCount {
return errUnformattedDisk
} else if errUnformattedDiskCount < diskCount && errDiskNotFoundCount == 0 {
// Only some disks return unFormattedDisk and all disks are online.
return errSomeDiskUnformatted
} else if errUnformattedDiskCount < diskCount && errDiskNotFoundCount > 0 {
// Only some disks return unFormattedDisk and some disks are
// offline as well.
return errSomeDiskOffline
}
return nil
}
// loadAllFormats - load all format config from all input disks in parallel.
func loadAllFormats(bootstrapDisks []StorageAPI) ([]*formatConfigV1, []error) {
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var sErrs = make([]error, len(bootstrapDisks))
// Initialize format configs.
var formatConfigs = make([]*formatConfigV1, len(bootstrapDisks))
// Make a volume entry on all underlying storage disks.
for index, disk := range bootstrapDisks {
wg.Add(1)
// Make a volume inside a go-routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
formatConfig, lErr := loadFormat(disk)
if lErr != nil {
sErrs[index] = lErr
return
}
formatConfigs[index] = formatConfig
}(index, disk)
}
// Wait for all make vol to finish.
wg.Wait()
for _, err := range sErrs {
if err != nil {
// Return all formats and errors.
return formatConfigs, sErrs
}
}
// Return all formats and nil
return formatConfigs, nil
}
// genericFormatCheck - validates and returns error.
// if (no quorum) return error
// if (any disk is corrupt) return error // phase2
// if (jbod inconsistent) return error // phase2
// if (disks not recognized) // Always error.
func genericFormatCheck(formatConfigs []*formatConfigV1, sErrs []error) (err error) {
// Calculate the errors.
var (
errCorruptFormatCount = 0
errCount = 0
)
// Through all errors calculate the actual errors.
for _, lErr := range sErrs {
if lErr == nil {
continue
}
// These errors are good conditions, means disk is online.
if lErr == errUnformattedDisk || lErr == errVolumeNotFound {
continue
}
if lErr == errCorruptedFormat {
errCorruptFormatCount++
} else {
errCount++
}
}
// Calculate read quorum.
readQuorum := len(formatConfigs)/2 + 1
// Validate the err count under tolerant limit.
if errCount > len(formatConfigs)-readQuorum {
return errXLReadQuorum
}
// One of the disk has corrupt format, return error.
if errCorruptFormatCount > 0 {
return errCorruptedFormat
}
// Validates if format and JBOD are consistent across all disks.
if err = checkFormatXL(formatConfigs); err != nil {
return err
}
// Success..
return nil
}
// checkDisksConsistency - checks if all disks are consistent with all JBOD entries on all disks.
func checkDisksConsistency(formatConfigs []*formatConfigV1) error {
var disks = make([]string, len(formatConfigs))
var disksFound = make(map[string]bool)
// Collect currently available disk uuids.
for index, formatConfig := range formatConfigs {
if formatConfig == nil {
continue
}
disks[index] = formatConfig.XL.Disk
}
// Validate collected uuids and verify JBOD.
for index, uuid := range disks {
if uuid == "" {
continue
}
var formatConfig = formatConfigs[index]
for _, savedUUID := range formatConfig.XL.JBOD {
if savedUUID == uuid {
disksFound[uuid] = true
}
}
}
// Check if all disks are found.
for _, value := range disksFound {
if !value {
return errors.New("Some disks not found in JBOD.")
}
}
return nil
}
// checkJBODConsistency - validate xl jbod order if they are consistent.
@@ -60,7 +274,7 @@ func checkJBODConsistency(formatConfigs []*formatConfigV1) error {
}
savedJBODStr := strings.Join(format.XL.JBOD, ".")
if jbodStr != savedJBODStr {
return errors.New("Inconsistent disks.")
return errors.New("Inconsistent JBOD found.")
}
}
return nil
@@ -87,10 +301,8 @@ func reorderDisks(bootstrapDisks []StorageAPI, formatConfigs []*formatConfigV1)
}
// Pick the first JBOD list to verify the order and construct new set of disk slice.
var newDisks = make([]StorageAPI, len(bootstrapDisks))
var unclaimedJBODIndex = make(map[int]struct{})
for fIndex, format := range formatConfigs {
if format == nil {
unclaimedJBODIndex[fIndex] = struct{}{}
continue
}
jIndex := findIndex(format.XL.Disk, savedJBOD)
@@ -99,17 +311,6 @@ func reorderDisks(bootstrapDisks []StorageAPI, formatConfigs []*formatConfigV1)
}
newDisks[jIndex] = bootstrapDisks[fIndex]
}
// Save the unclaimed jbods as well.
for index, disk := range newDisks {
if disk == nil {
for fIndex := range unclaimedJBODIndex {
newDisks[index] = bootstrapDisks[fIndex]
delete(unclaimedJBODIndex, fIndex)
break
}
continue
}
}
return newDisks, nil
}
@@ -146,83 +347,104 @@ func loadFormat(disk StorageAPI) (format *formatConfigV1, err error) {
// Heals any missing format.json on the drives. Returns error only for unexpected errors
// as regular errors can be ignored since there might be enough quorum to be operational.
func healFormatXL(bootstrapDisks []StorageAPI) error {
needHeal := make([]bool, len(bootstrapDisks)) // Slice indicating which drives needs healing.
formatConfigs := make([]*formatConfigV1, len(bootstrapDisks))
var referenceConfig *formatConfigV1
successCount := 0 // Tracks if we have successfully loaded all `format.json` from all disks.
formatNotFoundCount := 0 // Tracks if we `format.json` is not found on all disks.
// Loads `format.json` from all disks.
for index, disk := range bootstrapDisks {
formatXL, err := loadFormat(disk)
if err != nil {
if err == errUnformattedDisk {
// format.json is missing, should be healed.
needHeal[index] = true
formatNotFoundCount++
continue
} else if err == errDiskNotFound { // Is a valid case we
// can proceed without healing.
return nil
}
// Return error for unsupported errors.
return err
} // Success.
formatConfigs[index] = formatXL
successCount++
}
// All `format.json` has been read successfully, previously completed.
if successCount == len(bootstrapDisks) {
// Return success.
return nil
}
// All disks are fresh, format.json will be written by initFormatXL()
if formatNotFoundCount == len(bootstrapDisks) {
return initFormatXL(bootstrapDisks)
}
// Validate format configs for consistency in JBOD and disks.
if err := checkFormatXL(formatConfigs); err != nil {
return err
}
if referenceConfig == nil {
// This config will be used to update the drives missing format.json.
for _, formatConfig := range formatConfigs {
if formatConfig == nil {
continue
}
referenceConfig = formatConfig
break
}
}
uuidUsage := make([]struct {
uuid string // Disk uuid
inuse bool // indicates if the uuid is used by any disk
inUse bool // indicates if the uuid is used by
// any disk
}, len(bootstrapDisks))
needHeal := make([]bool, len(bootstrapDisks)) // Slice indicating which drives needs healing.
// Returns any unused drive UUID.
getUnusedUUID := func() string {
for index := range uuidUsage {
if !uuidUsage[index].inuse {
uuidUsage[index].inuse = true
if !uuidUsage[index].inUse {
uuidUsage[index].inUse = true
return uuidUsage[index].uuid
}
}
return ""
}
formatConfigs := make([]*formatConfigV1, len(bootstrapDisks))
var referenceConfig *formatConfigV1
for index, disk := range bootstrapDisks {
formatXL, err := loadFormat(disk)
if err == errUnformattedDisk {
// format.json is missing, should be healed.
needHeal[index] = true
continue
}
if err == nil {
if referenceConfig == nil {
// this config will be used to update the drives missing format.json
referenceConfig = formatXL
}
formatConfigs[index] = formatXL
} else {
// Abort format.json healing if any one of the drives is not available because we don't
// know if that drive is down permanently or temporarily. So we don't want to reuse
// its uuid for any other disks.
// Return nil so that operations can continue if quorum is available.
return nil
}
}
if referenceConfig == nil {
// All disks are fresh, format.json will be written by initFormatXL()
return nil
}
// From reference config update UUID's not be in use.
for index, diskUUID := range referenceConfig.XL.JBOD {
uuidUsage[index].uuid = diskUUID
uuidUsage[index].inuse = false
uuidUsage[index].inUse = false
}
// For all config formats validate if they are in use and update
// the uuidUsage values.
// For all config formats validate if they are in use and
// update the uuidUsage values.
for _, config := range formatConfigs {
if config == nil {
continue
}
for index := range uuidUsage {
if config.XL.Disk == uuidUsage[index].uuid {
uuidUsage[index].inuse = true
uuidUsage[index].inUse = true
break
}
}
}
// This section heals the format.json and updates the fresh disks
// by reapply the unused UUID's .
// by apply a new UUID for all the fresh disks.
for index, heal := range needHeal {
if !heal {
// Previously we detected that heal is not needed on the disk.
continue
}
config := &formatConfigV1{}
*config = *referenceConfig
config.XL.Disk = getUnusedUUID()
if config.XL.Disk == "" {
// getUnusedUUID() should have returned an unused uuid, it
// getUnusedUUID() should have
// returned an unused uuid, it
// is an unexpected error.
return errUnexpected
}
@@ -231,10 +453,10 @@ func healFormatXL(bootstrapDisks []StorageAPI) error {
if err != nil {
return err
}
// Fresh disk without format.json
_, _ = bootstrapDisks[index].AppendFile(minioMetaBucket, formatConfigFile, formatBytes)
// Ignore any error from AppendFile() as quorum might still be there to be operational.
// Ignore any error from AppendFile() as
// quorum might still be there to be operational.
}
return nil
}
@@ -246,12 +468,6 @@ func loadFormatXL(bootstrapDisks []StorageAPI) (disks []StorageAPI, err error) {
var diskNotFoundCount = 0
formatConfigs := make([]*formatConfigV1, len(bootstrapDisks))
// Heal missing format.json on the drives.
if err = healFormatXL(bootstrapDisks); err != nil {
// There was an unexpected unrecoverable error during healing.
return nil, err
}
// Try to load `format.json` bootstrap disks.
for index, disk := range bootstrapDisks {
var formatXL *formatConfigV1
@@ -277,9 +493,9 @@ func loadFormatXL(bootstrapDisks []StorageAPI) (disks []StorageAPI, err error) {
} else if diskNotFoundCount == len(bootstrapDisks) {
return nil, errDiskNotFound
} else if diskNotFoundCount > len(bootstrapDisks)-(len(bootstrapDisks)/2+1) {
return nil, errReadQuorum
return nil, errXLReadQuorum
} else if unformattedDisksFoundCnt > len(bootstrapDisks)-(len(bootstrapDisks)/2+1) {
return nil, errReadQuorum
return nil, errXLReadQuorum
}
// Validate the format configs read are correct.
@@ -310,7 +526,10 @@ func checkFormatXL(formatConfigs []*formatConfigV1) error {
return fmt.Errorf("Number of disks %d did not match the backend format %d", len(formatConfigs), len(formatXL.XL.JBOD))
}
}
return checkJBODConsistency(formatConfigs)
if err := checkJBODConsistency(formatConfigs); err != nil {
return err
}
return checkDisksConsistency(formatConfigs)
}
// initFormatXL - save XL format configuration on all disks.
@@ -328,7 +547,7 @@ func initFormatXL(storageDisks []StorageAPI) (err error) {
if saveFormatErrCnt <= len(storageDisks)-(len(storageDisks)/2+3) {
continue
}
return errWriteQuorum
return errXLWriteQuorum
}
}
var u *uuid.UUID