allow server to start even with corrupted/faulty disks (#10175)

This commit is contained in:
Harshavardhana 2020-08-03 18:17:48 -07:00 committed by GitHub
parent 5ce82b45da
commit b16781846e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 58 additions and 73 deletions

View File

@ -187,12 +187,6 @@ Example 1:
"",
)
ErrCorruptedBackend = newErrFn(
"Unable to use the specified backend, pre-existing content detected",
"Please ensure your disk mount does not have any pre-existing content",
"",
)
ErrUnableToWriteInBackend = newErrFn(
"Unable to write to the backend",
"Please ensure MinIO binary has write permissions for the backend",

View File

@ -18,6 +18,7 @@ package cmd
import (
"context"
"errors"
"fmt"
"io"
"sync"
@ -197,10 +198,10 @@ func listAllBuckets(storageDisks []StorageAPI, healBuckets map[string]VolInfo) (
// Only heal on disks where we are sure that healing is needed. We can expand
// this list as and when we figure out more errors can be added to this list safely.
func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, quorumModTime time.Time) bool {
switch erErr {
case errFileNotFound, errFileVersionNotFound:
switch {
case errors.Is(erErr, errFileNotFound) || errors.Is(erErr, errFileVersionNotFound):
return true
case errCorruptedFormat:
case errors.Is(erErr, errCorruptedFormat):
return true
}
if erErr == nil {
@ -686,9 +687,9 @@ func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (valid
// or when er.meta is not readable in read quorum disks.
var notFoundErasureMeta, corruptedErasureMeta int
for _, readErr := range errs {
if readErr == errFileNotFound || readErr == errFileVersionNotFound {
if errors.Is(readErr, errFileNotFound) || errors.Is(readErr, errFileVersionNotFound) {
notFoundErasureMeta++
} else if readErr == errCorruptedFormat {
} else if errors.Is(readErr, errCorruptedFormat) {
corruptedErasureMeta++
}
}
@ -699,7 +700,10 @@ func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (valid
// double counting when both parts and er.meta
// are not available.
if errs[i] != dataErrs[i] {
if dataErrs[i] == errFileNotFound || dataErrs[i] == errFileVersionNotFound {
if IsErr(dataErrs[i], []error{
errFileNotFound,
errFileVersionNotFound,
}...) {
notFoundParts++
}
}

View File

@ -1199,21 +1199,11 @@ func (s *erasureSets) ReloadFormat(ctx context.Context, dryRun bool) (err error)
}
}(storageDisks)
formats, sErrs := loadFormatErasureAll(storageDisks, false)
formats, _ := loadFormatErasureAll(storageDisks, false)
if err = checkFormatErasureValues(formats, s.drivesPerSet); err != nil {
return err
}
for index, sErr := range sErrs {
if sErr != nil {
// Look for acceptable heal errors, for any other
// errors we should simply quit and return.
if _, ok := formatHealErrors[sErr]; !ok {
return fmt.Errorf("Disk %s: %w", s.endpoints[index], sErr)
}
}
}
refFormat, err := getFormatErasureInQuorum(formats)
if err != nil {
return err
@ -1357,16 +1347,6 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
res.After.Drives[k] = madmin.HealDriveInfo(v)
}
for index, sErr := range sErrs {
if sErr != nil {
// Look for acceptable heal errors, for any other
// errors we should simply quit and return.
if _, ok := formatHealErrors[sErr]; !ok {
return res, fmt.Errorf("Disk %s: %w", s.endpoints[index], sErr)
}
}
}
if countErrs(sErrs, errUnformattedDisk) == 0 {
// No unformatted disks found disks are either offline
// or online, no healing is required.

View File

@ -18,6 +18,7 @@ package cmd
import (
"context"
"errors"
"fmt"
"sort"
"sync"
@ -89,18 +90,18 @@ func (d byDiskTotal) Less(i, j int) bool {
func diskErrToDriveState(err error) (state string) {
state = madmin.DriveStateUnknown
switch err {
case errDiskNotFound:
switch {
case errors.Is(err, errDiskNotFound):
state = madmin.DriveStateOffline
case errCorruptedFormat:
case errors.Is(err, errCorruptedFormat):
state = madmin.DriveStateCorrupt
case errUnformattedDisk:
case errors.Is(err, errUnformattedDisk):
state = madmin.DriveStateUnformatted
case errDiskAccessDenied:
case errors.Is(err, errDiskAccessDenied):
state = madmin.DriveStatePermission
case errFaultyDisk:
case errors.Is(err, errFaultyDisk):
state = madmin.DriveStateFaulty
case nil:
case err == nil:
state = madmin.DriveStateOk
}
return

View File

@ -27,7 +27,6 @@ import (
"sync"
humanize "github.com/dustin/go-humanize"
"github.com/minio/minio/cmd/config"
"github.com/minio/minio/cmd/config/storageclass"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/color"
@ -58,18 +57,6 @@ const (
// Offline disk UUID represents an offline disk.
const offlineDiskUUID = "ffffffff-ffff-ffff-ffff-ffffffffffff"
// Healing is only supported for the list of errors mentioned here.
var formatHealErrors = map[error]struct{}{
errUnformattedDisk: {},
errDiskNotFound: {},
}
// List of errors considered critical for disk formatting.
var formatCriticalErrors = map[error]struct{}{
errCorruptedFormat: {},
errFaultyDisk: {},
}
// Used to detect the version of "xl" format.
type formatErasureVersionDetect struct {
Erasure struct {
@ -415,7 +402,8 @@ func loadFormatErasure(disk StorageAPI) (format *formatErasureV3, err error) {
}
if !isHiddenDirectories(vols...) {
// 'format.json' not found, but we found user data, reject such disks.
return nil, errCorruptedFormat
return nil, fmt.Errorf("some unexpected files '%v' found on %s: %w",
vols, disk, errCorruptedFormat)
}
// No other data found, its a fresh disk.
return nil, errUnformattedDisk
@ -490,7 +478,8 @@ func formatErasureGetDeploymentID(refFormat *formatErasureV3, formats []*formatE
} else if deploymentID != format.ID {
// DeploymentID found earlier doesn't match with the
// current format.json's ID.
return "", errCorruptedFormat
return "", fmt.Errorf("Deployment IDs do not match expected %s, got %s: %w",
deploymentID, format.ID, errCorruptedFormat)
}
}
}
@ -500,14 +489,7 @@ func formatErasureGetDeploymentID(refFormat *formatErasureV3, formats []*formatE
// formatErasureFixDeploymentID - Add deployment id if it is not present.
func formatErasureFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatErasureV3) (err error) {
// Attempt to load all `format.json` from all disks.
var sErrs []error
formats, sErrs := loadFormatErasureAll(storageDisks, false)
for i, sErr := range sErrs {
if _, ok := formatCriticalErrors[sErr]; ok {
return config.ErrCorruptedBackend(err).Hint(fmt.Sprintf("Clear any pre-existing content on %s", endpoints[i]))
}
}
formats, _ := loadFormatErasureAll(storageDisks, false)
for index := range formats {
// If the Erasure sets do not match, set those formats to nil,
// We do not have to update the ID on those format.json file.
@ -515,6 +497,7 @@ func formatErasureFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI
formats[index] = nil
}
}
refFormat.ID, err = formatErasureGetDeploymentID(refFormat, formats)
if err != nil {
return err

View File

@ -18,6 +18,7 @@ package cmd
import (
"encoding/json"
"errors"
"io/ioutil"
"os"
"reflect"
@ -436,8 +437,8 @@ func TestGetErasureID(t *testing.T) {
}
formats[2].ID = "bad-id"
if _, err = formatErasureGetDeploymentID(quorumFormat, formats); err != errCorruptedFormat {
t.Fatal("Unexpected Success")
if _, err = formatErasureGetDeploymentID(quorumFormat, formats); !errors.Is(err, errCorruptedFormat) {
t.Fatalf("Unexpect error %s", err)
}
}

View File

@ -27,7 +27,6 @@ import (
"time"
"github.com/dustin/go-humanize"
"github.com/minio/minio/cmd/config"
xhttp "github.com/minio/minio/cmd/http"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/sync/errgroup"
@ -253,10 +252,7 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
formatConfigs, sErrs := loadFormatErasureAll(storageDisks, false)
// Check if we have
for i, sErr := range sErrs {
if _, ok := formatCriticalErrors[sErr]; ok {
return nil, nil, config.ErrCorruptedBackend(err).Hint(fmt.Sprintf("Clear any pre-existing content on %s", endpoints[i]))
}
// not critical error but still print the error, nonetheless, which is perhaps unhandled
// print the error, nonetheless, which is perhaps unhandled
if sErr != errUnformattedDisk && sErr != errDiskNotFound && retryCount >= 5 {
if sErr != nil {
logger.Info("Unable to read 'format.json' from %s: %v\n", endpoints[i], sErr)

View File

@ -19,10 +19,10 @@ package cmd
import "os"
// errUnexpected - unexpected error, requires manual intervention.
var errUnexpected = StorageErr("Unexpected error, please report this issue at https://github.com/minio/minio/issues")
var errUnexpected = StorageErr("unexpected error, please report this issue at https://github.com/minio/minio/issues")
// errCorruptedFormat - corrupted backend format.
var errCorruptedFormat = StorageErr("corrupted backend format, please join https://slack.min.io for assistance")
var errCorruptedFormat = StorageErr("corrupted backend format, specified disk mount has unexpected previous content")
// errUnformattedDisk - unformatted disk found.
var errUnformattedDisk = StorageErr("unformatted disk found")

View File

@ -505,6 +505,7 @@ func (s *xlStorage) GetDiskID() (string, error) {
// Somebody else got the lock first.
return diskID, nil
}
formatFile := pathJoin(s.diskPath, minioMetaBucket, formatConfigFile)
fi, err := os.Stat(formatFile)
if err != nil {
@ -520,8 +521,12 @@ func (s *xlStorage) GetDiskID() (string, error) {
} else if os.IsPermission(err) {
return "", errDiskAccessDenied
}
return "", err
logger.LogIf(GlobalContext, err) // log unexpected errors
return "", errCorruptedFormat
} else if os.IsPermission(err) {
return "", errDiskAccessDenied
}
logger.LogIf(GlobalContext, err) // log unexpected errors
return "", errCorruptedFormat
}
@ -533,13 +538,34 @@ func (s *xlStorage) GetDiskID() (string, error) {
b, err := ioutil.ReadFile(formatFile)
if err != nil {
// If the disk is still not initialized.
if os.IsNotExist(err) {
_, err = os.Stat(s.diskPath)
if err == nil {
// Disk is present but missing `format.json`
return "", errUnformattedDisk
}
if os.IsNotExist(err) {
return "", errDiskNotFound
} else if os.IsPermission(err) {
return "", errDiskAccessDenied
}
logger.LogIf(GlobalContext, err) // log unexpected errors
return "", errCorruptedFormat
} else if os.IsPermission(err) {
return "", errDiskAccessDenied
}
logger.LogIf(GlobalContext, err) // log unexpected errors
return "", errCorruptedFormat
}
format := &formatErasureV3{}
var json = jsoniter.ConfigCompatibleWithStandardLibrary
if err = json.Unmarshal(b, &format); err != nil {
logger.LogIf(GlobalContext, err) // log unexpected errors
return "", errCorruptedFormat
}
s.diskID = format.Erasure.This
s.formatFileInfo = fi
s.formatLastCheck = time.Now()