mirror of
https://github.com/minio/minio.git
synced 2025-04-22 11:26:36 -04:00
allow server to start even with corrupted/faulty disks (#10175)
This commit is contained in:
parent
5ce82b45da
commit
b16781846e
@ -187,12 +187,6 @@ Example 1:
|
|||||||
"",
|
"",
|
||||||
)
|
)
|
||||||
|
|
||||||
ErrCorruptedBackend = newErrFn(
|
|
||||||
"Unable to use the specified backend, pre-existing content detected",
|
|
||||||
"Please ensure your disk mount does not have any pre-existing content",
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
|
|
||||||
ErrUnableToWriteInBackend = newErrFn(
|
ErrUnableToWriteInBackend = newErrFn(
|
||||||
"Unable to write to the backend",
|
"Unable to write to the backend",
|
||||||
"Please ensure MinIO binary has write permissions for the backend",
|
"Please ensure MinIO binary has write permissions for the backend",
|
||||||
|
@ -18,6 +18,7 @@ package cmd
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"sync"
|
"sync"
|
||||||
@ -197,10 +198,10 @@ func listAllBuckets(storageDisks []StorageAPI, healBuckets map[string]VolInfo) (
|
|||||||
// Only heal on disks where we are sure that healing is needed. We can expand
|
// Only heal on disks where we are sure that healing is needed. We can expand
|
||||||
// this list as and when we figure out more errors can be added to this list safely.
|
// this list as and when we figure out more errors can be added to this list safely.
|
||||||
func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, quorumModTime time.Time) bool {
|
func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, quorumModTime time.Time) bool {
|
||||||
switch erErr {
|
switch {
|
||||||
case errFileNotFound, errFileVersionNotFound:
|
case errors.Is(erErr, errFileNotFound) || errors.Is(erErr, errFileVersionNotFound):
|
||||||
return true
|
return true
|
||||||
case errCorruptedFormat:
|
case errors.Is(erErr, errCorruptedFormat):
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if erErr == nil {
|
if erErr == nil {
|
||||||
@ -686,9 +687,9 @@ func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (valid
|
|||||||
// or when er.meta is not readable in read quorum disks.
|
// or when er.meta is not readable in read quorum disks.
|
||||||
var notFoundErasureMeta, corruptedErasureMeta int
|
var notFoundErasureMeta, corruptedErasureMeta int
|
||||||
for _, readErr := range errs {
|
for _, readErr := range errs {
|
||||||
if readErr == errFileNotFound || readErr == errFileVersionNotFound {
|
if errors.Is(readErr, errFileNotFound) || errors.Is(readErr, errFileVersionNotFound) {
|
||||||
notFoundErasureMeta++
|
notFoundErasureMeta++
|
||||||
} else if readErr == errCorruptedFormat {
|
} else if errors.Is(readErr, errCorruptedFormat) {
|
||||||
corruptedErasureMeta++
|
corruptedErasureMeta++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -699,7 +700,10 @@ func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (valid
|
|||||||
// double counting when both parts and er.meta
|
// double counting when both parts and er.meta
|
||||||
// are not available.
|
// are not available.
|
||||||
if errs[i] != dataErrs[i] {
|
if errs[i] != dataErrs[i] {
|
||||||
if dataErrs[i] == errFileNotFound || dataErrs[i] == errFileVersionNotFound {
|
if IsErr(dataErrs[i], []error{
|
||||||
|
errFileNotFound,
|
||||||
|
errFileVersionNotFound,
|
||||||
|
}...) {
|
||||||
notFoundParts++
|
notFoundParts++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1199,21 +1199,11 @@ func (s *erasureSets) ReloadFormat(ctx context.Context, dryRun bool) (err error)
|
|||||||
}
|
}
|
||||||
}(storageDisks)
|
}(storageDisks)
|
||||||
|
|
||||||
formats, sErrs := loadFormatErasureAll(storageDisks, false)
|
formats, _ := loadFormatErasureAll(storageDisks, false)
|
||||||
if err = checkFormatErasureValues(formats, s.drivesPerSet); err != nil {
|
if err = checkFormatErasureValues(formats, s.drivesPerSet); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for index, sErr := range sErrs {
|
|
||||||
if sErr != nil {
|
|
||||||
// Look for acceptable heal errors, for any other
|
|
||||||
// errors we should simply quit and return.
|
|
||||||
if _, ok := formatHealErrors[sErr]; !ok {
|
|
||||||
return fmt.Errorf("Disk %s: %w", s.endpoints[index], sErr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
refFormat, err := getFormatErasureInQuorum(formats)
|
refFormat, err := getFormatErasureInQuorum(formats)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -1357,16 +1347,6 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
|
|||||||
res.After.Drives[k] = madmin.HealDriveInfo(v)
|
res.After.Drives[k] = madmin.HealDriveInfo(v)
|
||||||
}
|
}
|
||||||
|
|
||||||
for index, sErr := range sErrs {
|
|
||||||
if sErr != nil {
|
|
||||||
// Look for acceptable heal errors, for any other
|
|
||||||
// errors we should simply quit and return.
|
|
||||||
if _, ok := formatHealErrors[sErr]; !ok {
|
|
||||||
return res, fmt.Errorf("Disk %s: %w", s.endpoints[index], sErr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if countErrs(sErrs, errUnformattedDisk) == 0 {
|
if countErrs(sErrs, errUnformattedDisk) == 0 {
|
||||||
// No unformatted disks found disks are either offline
|
// No unformatted disks found disks are either offline
|
||||||
// or online, no healing is required.
|
// or online, no healing is required.
|
||||||
|
@ -18,6 +18,7 @@ package cmd
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
@ -89,18 +90,18 @@ func (d byDiskTotal) Less(i, j int) bool {
|
|||||||
|
|
||||||
func diskErrToDriveState(err error) (state string) {
|
func diskErrToDriveState(err error) (state string) {
|
||||||
state = madmin.DriveStateUnknown
|
state = madmin.DriveStateUnknown
|
||||||
switch err {
|
switch {
|
||||||
case errDiskNotFound:
|
case errors.Is(err, errDiskNotFound):
|
||||||
state = madmin.DriveStateOffline
|
state = madmin.DriveStateOffline
|
||||||
case errCorruptedFormat:
|
case errors.Is(err, errCorruptedFormat):
|
||||||
state = madmin.DriveStateCorrupt
|
state = madmin.DriveStateCorrupt
|
||||||
case errUnformattedDisk:
|
case errors.Is(err, errUnformattedDisk):
|
||||||
state = madmin.DriveStateUnformatted
|
state = madmin.DriveStateUnformatted
|
||||||
case errDiskAccessDenied:
|
case errors.Is(err, errDiskAccessDenied):
|
||||||
state = madmin.DriveStatePermission
|
state = madmin.DriveStatePermission
|
||||||
case errFaultyDisk:
|
case errors.Is(err, errFaultyDisk):
|
||||||
state = madmin.DriveStateFaulty
|
state = madmin.DriveStateFaulty
|
||||||
case nil:
|
case err == nil:
|
||||||
state = madmin.DriveStateOk
|
state = madmin.DriveStateOk
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
|
@ -27,7 +27,6 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
humanize "github.com/dustin/go-humanize"
|
humanize "github.com/dustin/go-humanize"
|
||||||
"github.com/minio/minio/cmd/config"
|
|
||||||
"github.com/minio/minio/cmd/config/storageclass"
|
"github.com/minio/minio/cmd/config/storageclass"
|
||||||
"github.com/minio/minio/cmd/logger"
|
"github.com/minio/minio/cmd/logger"
|
||||||
"github.com/minio/minio/pkg/color"
|
"github.com/minio/minio/pkg/color"
|
||||||
@ -58,18 +57,6 @@ const (
|
|||||||
// Offline disk UUID represents an offline disk.
|
// Offline disk UUID represents an offline disk.
|
||||||
const offlineDiskUUID = "ffffffff-ffff-ffff-ffff-ffffffffffff"
|
const offlineDiskUUID = "ffffffff-ffff-ffff-ffff-ffffffffffff"
|
||||||
|
|
||||||
// Healing is only supported for the list of errors mentioned here.
|
|
||||||
var formatHealErrors = map[error]struct{}{
|
|
||||||
errUnformattedDisk: {},
|
|
||||||
errDiskNotFound: {},
|
|
||||||
}
|
|
||||||
|
|
||||||
// List of errors considered critical for disk formatting.
|
|
||||||
var formatCriticalErrors = map[error]struct{}{
|
|
||||||
errCorruptedFormat: {},
|
|
||||||
errFaultyDisk: {},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used to detect the version of "xl" format.
|
// Used to detect the version of "xl" format.
|
||||||
type formatErasureVersionDetect struct {
|
type formatErasureVersionDetect struct {
|
||||||
Erasure struct {
|
Erasure struct {
|
||||||
@ -415,7 +402,8 @@ func loadFormatErasure(disk StorageAPI) (format *formatErasureV3, err error) {
|
|||||||
}
|
}
|
||||||
if !isHiddenDirectories(vols...) {
|
if !isHiddenDirectories(vols...) {
|
||||||
// 'format.json' not found, but we found user data, reject such disks.
|
// 'format.json' not found, but we found user data, reject such disks.
|
||||||
return nil, errCorruptedFormat
|
return nil, fmt.Errorf("some unexpected files '%v' found on %s: %w",
|
||||||
|
vols, disk, errCorruptedFormat)
|
||||||
}
|
}
|
||||||
// No other data found, its a fresh disk.
|
// No other data found, its a fresh disk.
|
||||||
return nil, errUnformattedDisk
|
return nil, errUnformattedDisk
|
||||||
@ -490,7 +478,8 @@ func formatErasureGetDeploymentID(refFormat *formatErasureV3, formats []*formatE
|
|||||||
} else if deploymentID != format.ID {
|
} else if deploymentID != format.ID {
|
||||||
// DeploymentID found earlier doesn't match with the
|
// DeploymentID found earlier doesn't match with the
|
||||||
// current format.json's ID.
|
// current format.json's ID.
|
||||||
return "", errCorruptedFormat
|
return "", fmt.Errorf("Deployment IDs do not match expected %s, got %s: %w",
|
||||||
|
deploymentID, format.ID, errCorruptedFormat)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -500,14 +489,7 @@ func formatErasureGetDeploymentID(refFormat *formatErasureV3, formats []*formatE
|
|||||||
// formatErasureFixDeploymentID - Add deployment id if it is not present.
|
// formatErasureFixDeploymentID - Add deployment id if it is not present.
|
||||||
func formatErasureFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatErasureV3) (err error) {
|
func formatErasureFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatErasureV3) (err error) {
|
||||||
// Attempt to load all `format.json` from all disks.
|
// Attempt to load all `format.json` from all disks.
|
||||||
var sErrs []error
|
formats, _ := loadFormatErasureAll(storageDisks, false)
|
||||||
formats, sErrs := loadFormatErasureAll(storageDisks, false)
|
|
||||||
for i, sErr := range sErrs {
|
|
||||||
if _, ok := formatCriticalErrors[sErr]; ok {
|
|
||||||
return config.ErrCorruptedBackend(err).Hint(fmt.Sprintf("Clear any pre-existing content on %s", endpoints[i]))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for index := range formats {
|
for index := range formats {
|
||||||
// If the Erasure sets do not match, set those formats to nil,
|
// If the Erasure sets do not match, set those formats to nil,
|
||||||
// We do not have to update the ID on those format.json file.
|
// We do not have to update the ID on those format.json file.
|
||||||
@ -515,6 +497,7 @@ func formatErasureFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI
|
|||||||
formats[index] = nil
|
formats[index] = nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
refFormat.ID, err = formatErasureGetDeploymentID(refFormat, formats)
|
refFormat.ID, err = formatErasureGetDeploymentID(refFormat, formats)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -18,6 +18,7 @@ package cmd
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"reflect"
|
"reflect"
|
||||||
@ -436,8 +437,8 @@ func TestGetErasureID(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
formats[2].ID = "bad-id"
|
formats[2].ID = "bad-id"
|
||||||
if _, err = formatErasureGetDeploymentID(quorumFormat, formats); err != errCorruptedFormat {
|
if _, err = formatErasureGetDeploymentID(quorumFormat, formats); !errors.Is(err, errCorruptedFormat) {
|
||||||
t.Fatal("Unexpected Success")
|
t.Fatalf("Unexpect error %s", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,7 +27,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/dustin/go-humanize"
|
"github.com/dustin/go-humanize"
|
||||||
"github.com/minio/minio/cmd/config"
|
|
||||||
xhttp "github.com/minio/minio/cmd/http"
|
xhttp "github.com/minio/minio/cmd/http"
|
||||||
"github.com/minio/minio/cmd/logger"
|
"github.com/minio/minio/cmd/logger"
|
||||||
"github.com/minio/minio/pkg/sync/errgroup"
|
"github.com/minio/minio/pkg/sync/errgroup"
|
||||||
@ -253,10 +252,7 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
|
|||||||
formatConfigs, sErrs := loadFormatErasureAll(storageDisks, false)
|
formatConfigs, sErrs := loadFormatErasureAll(storageDisks, false)
|
||||||
// Check if we have
|
// Check if we have
|
||||||
for i, sErr := range sErrs {
|
for i, sErr := range sErrs {
|
||||||
if _, ok := formatCriticalErrors[sErr]; ok {
|
// print the error, nonetheless, which is perhaps unhandled
|
||||||
return nil, nil, config.ErrCorruptedBackend(err).Hint(fmt.Sprintf("Clear any pre-existing content on %s", endpoints[i]))
|
|
||||||
}
|
|
||||||
// not critical error but still print the error, nonetheless, which is perhaps unhandled
|
|
||||||
if sErr != errUnformattedDisk && sErr != errDiskNotFound && retryCount >= 5 {
|
if sErr != errUnformattedDisk && sErr != errDiskNotFound && retryCount >= 5 {
|
||||||
if sErr != nil {
|
if sErr != nil {
|
||||||
logger.Info("Unable to read 'format.json' from %s: %v\n", endpoints[i], sErr)
|
logger.Info("Unable to read 'format.json' from %s: %v\n", endpoints[i], sErr)
|
||||||
|
@ -19,10 +19,10 @@ package cmd
|
|||||||
import "os"
|
import "os"
|
||||||
|
|
||||||
// errUnexpected - unexpected error, requires manual intervention.
|
// errUnexpected - unexpected error, requires manual intervention.
|
||||||
var errUnexpected = StorageErr("Unexpected error, please report this issue at https://github.com/minio/minio/issues")
|
var errUnexpected = StorageErr("unexpected error, please report this issue at https://github.com/minio/minio/issues")
|
||||||
|
|
||||||
// errCorruptedFormat - corrupted backend format.
|
// errCorruptedFormat - corrupted backend format.
|
||||||
var errCorruptedFormat = StorageErr("corrupted backend format, please join https://slack.min.io for assistance")
|
var errCorruptedFormat = StorageErr("corrupted backend format, specified disk mount has unexpected previous content")
|
||||||
|
|
||||||
// errUnformattedDisk - unformatted disk found.
|
// errUnformattedDisk - unformatted disk found.
|
||||||
var errUnformattedDisk = StorageErr("unformatted disk found")
|
var errUnformattedDisk = StorageErr("unformatted disk found")
|
||||||
|
@ -505,6 +505,7 @@ func (s *xlStorage) GetDiskID() (string, error) {
|
|||||||
// Somebody else got the lock first.
|
// Somebody else got the lock first.
|
||||||
return diskID, nil
|
return diskID, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
formatFile := pathJoin(s.diskPath, minioMetaBucket, formatConfigFile)
|
formatFile := pathJoin(s.diskPath, minioMetaBucket, formatConfigFile)
|
||||||
fi, err := os.Stat(formatFile)
|
fi, err := os.Stat(formatFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -520,8 +521,12 @@ func (s *xlStorage) GetDiskID() (string, error) {
|
|||||||
} else if os.IsPermission(err) {
|
} else if os.IsPermission(err) {
|
||||||
return "", errDiskAccessDenied
|
return "", errDiskAccessDenied
|
||||||
}
|
}
|
||||||
return "", err
|
logger.LogIf(GlobalContext, err) // log unexpected errors
|
||||||
|
return "", errCorruptedFormat
|
||||||
|
} else if os.IsPermission(err) {
|
||||||
|
return "", errDiskAccessDenied
|
||||||
}
|
}
|
||||||
|
logger.LogIf(GlobalContext, err) // log unexpected errors
|
||||||
return "", errCorruptedFormat
|
return "", errCorruptedFormat
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -533,13 +538,34 @@ func (s *xlStorage) GetDiskID() (string, error) {
|
|||||||
|
|
||||||
b, err := ioutil.ReadFile(formatFile)
|
b, err := ioutil.ReadFile(formatFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// If the disk is still not initialized.
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
_, err = os.Stat(s.diskPath)
|
||||||
|
if err == nil {
|
||||||
|
// Disk is present but missing `format.json`
|
||||||
|
return "", errUnformattedDisk
|
||||||
|
}
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return "", errDiskNotFound
|
||||||
|
} else if os.IsPermission(err) {
|
||||||
|
return "", errDiskAccessDenied
|
||||||
|
}
|
||||||
|
logger.LogIf(GlobalContext, err) // log unexpected errors
|
||||||
|
return "", errCorruptedFormat
|
||||||
|
} else if os.IsPermission(err) {
|
||||||
|
return "", errDiskAccessDenied
|
||||||
|
}
|
||||||
|
logger.LogIf(GlobalContext, err) // log unexpected errors
|
||||||
return "", errCorruptedFormat
|
return "", errCorruptedFormat
|
||||||
}
|
}
|
||||||
|
|
||||||
format := &formatErasureV3{}
|
format := &formatErasureV3{}
|
||||||
var json = jsoniter.ConfigCompatibleWithStandardLibrary
|
var json = jsoniter.ConfigCompatibleWithStandardLibrary
|
||||||
if err = json.Unmarshal(b, &format); err != nil {
|
if err = json.Unmarshal(b, &format); err != nil {
|
||||||
|
logger.LogIf(GlobalContext, err) // log unexpected errors
|
||||||
return "", errCorruptedFormat
|
return "", errCorruptedFormat
|
||||||
}
|
}
|
||||||
|
|
||||||
s.diskID = format.Erasure.This
|
s.diskID = format.Erasure.This
|
||||||
s.formatFileInfo = fi
|
s.formatFileInfo = fi
|
||||||
s.formatLastCheck = time.Now()
|
s.formatLastCheck = time.Now()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user