mirror of
https://github.com/minio/minio.git
synced 2025-01-12 15:33:22 -05:00
Fix preInit logic when mixed disk situations exist. (#4904)
When servers are started simultaneously across multiple nodes or simulating a local setup, it can happen such that one of the servers in setup reaches a following situation where it observes - Some servers are formatted - Some servers are unformatted - Some servers are offline Current state machine doesn't handle this correctly, to fix this situation where we have unformatted, formatted and disks offline we do not decisively know the course of action. So we wait for the offline disks to change their state. Once the offline disks change their state to either one of these states we can decisively move forward. - nil (formatted disk) - errUnformattedDisk - Or any other error such as errCorruptedDisk. Fixes #4903
This commit is contained in:
parent
f66239e82f
commit
b9fc4150f6
@ -13,10 +13,11 @@ clone_folder: c:\gopath\src\github.com\minio\minio
|
|||||||
environment:
|
environment:
|
||||||
GOVERSION: 1.8.3
|
GOVERSION: 1.8.3
|
||||||
GOPATH: c:\gopath
|
GOPATH: c:\gopath
|
||||||
|
GOROOT: c:\go18
|
||||||
|
|
||||||
# scripts that run after cloning repository
|
# scripts that run after cloning repository
|
||||||
install:
|
install:
|
||||||
- set PATH=%GOPATH%\bin;c:\go\bin;%PATH%
|
- set PATH=%GOPATH%\bin;c:\go18\bin;%PATH%
|
||||||
- go version
|
- go version
|
||||||
- go env
|
- go env
|
||||||
- python --version
|
- python --version
|
||||||
|
@ -70,12 +70,11 @@ import (
|
|||||||
type InitActions int
|
type InitActions int
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// FormatDisks - see above table for disk states where it
|
// FormatDisks - see above table for disk states where it is applicable.
|
||||||
// is applicable.
|
|
||||||
FormatDisks InitActions = iota
|
FormatDisks InitActions = iota
|
||||||
|
|
||||||
// WaitForHeal - Wait for disks to heal.
|
// SuggestToHeal - Prints heal message and initialize object layer.
|
||||||
WaitForHeal
|
SuggestToHeal
|
||||||
|
|
||||||
// WaitForQuorum - Wait for quorum number of disks to be online.
|
// WaitForQuorum - Wait for quorum number of disks to be online.
|
||||||
WaitForQuorum
|
WaitForQuorum
|
||||||
@ -134,14 +133,14 @@ func quickErrToActions(errMap map[error]int) InitActions {
|
|||||||
// - Unformatted setup
|
// - Unformatted setup
|
||||||
// - Format/Wait for format when `disksUnformatted == diskCount`
|
// - Format/Wait for format when `disksUnformatted == diskCount`
|
||||||
//
|
//
|
||||||
// - Wait for all when `disksUnformatted + disksOffline == disksCount`
|
// - Wait for all when `disksUnformatted + disksFormatted + diskOffline == diskCount`
|
||||||
//
|
//
|
||||||
// Under all other conditions should lead to server initialization aborted.
|
// Under all other conditions should lead to server initialization aborted.
|
||||||
func prepForInitXL(firstDisk bool, sErrs []error, diskCount int) InitActions {
|
func prepForInitXL(firstDisk bool, sErrs []error, diskCount int) InitActions {
|
||||||
// Count errors by error value.
|
// Count errors by error value.
|
||||||
errMap := make(map[error]int)
|
errMap := make(map[error]int)
|
||||||
for _, err := range sErrs {
|
for _, err := range sErrs {
|
||||||
errMap[err]++
|
errMap[errorCause(err)]++
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validates and converts specific config errors into WaitForConfig.
|
// Validates and converts specific config errors into WaitForConfig.
|
||||||
@ -149,7 +148,6 @@ func prepForInitXL(firstDisk bool, sErrs []error, diskCount int) InitActions {
|
|||||||
return WaitForConfig
|
return WaitForConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
quorum := diskCount/2 + 1
|
|
||||||
readQuorum := diskCount / 2
|
readQuorum := diskCount / 2
|
||||||
disksOffline := errMap[errDiskNotFound]
|
disksOffline := errMap[errDiskNotFound]
|
||||||
disksFormatted := errMap[nil]
|
disksFormatted := errMap[nil]
|
||||||
@ -169,28 +167,25 @@ func prepForInitXL(firstDisk bool, sErrs []error, diskCount int) InitActions {
|
|||||||
return WaitForFormatting
|
return WaitForFormatting
|
||||||
}
|
}
|
||||||
|
|
||||||
// Total disks unformatted are in quorum verify if we have some offline disks.
|
|
||||||
if disksUnformatted >= quorum {
|
|
||||||
// Some disks offline and some disks unformatted, wait for all of them to come online.
|
|
||||||
if disksUnformatted+disksFormatted+disksOffline == diskCount {
|
|
||||||
return WaitForAll
|
|
||||||
}
|
|
||||||
|
|
||||||
// Some disks possibly corrupted and too many unformatted disks.
|
|
||||||
return Abort
|
|
||||||
}
|
|
||||||
|
|
||||||
// Already formatted and in quorum, proceed to initialization of object layer.
|
// Already formatted and in quorum, proceed to initialization of object layer.
|
||||||
if disksFormatted >= readQuorum {
|
if disksFormatted >= readQuorum {
|
||||||
if disksFormatted+disksOffline == diskCount {
|
if disksFormatted+disksOffline == diskCount {
|
||||||
return InitObjectLayer
|
return InitObjectLayer
|
||||||
}
|
}
|
||||||
|
|
||||||
// Some of the formatted disks are possibly corrupted or unformatted, heal them.
|
// Some of the formatted disks are possibly corrupted or unformatted,
|
||||||
return WaitForHeal
|
// let user know to heal them.
|
||||||
|
return SuggestToHeal
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exhausted all our checks, un-handled errors perhaps we Abort.
|
// Some unformatted, some disks formatted and some disks are offline but we don't
|
||||||
|
// quorum to decide. This is an undecisive state - wait for all of offline disks
|
||||||
|
// to be online to figure out the course of action.
|
||||||
|
if disksUnformatted+disksFormatted+disksOffline == diskCount {
|
||||||
|
return WaitForAll
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exhausted all our checks, un-handled situations such as some disks corrupted we Abort.
|
||||||
return Abort
|
return Abort
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -280,7 +275,7 @@ func retryFormattingXLDisks(firstDisk bool, endpoints EndpointList, storageDisks
|
|||||||
printRegularMsg(endpoints, storageDisks, printOnceFn())
|
printRegularMsg(endpoints, storageDisks, printOnceFn())
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
case WaitForHeal:
|
case SuggestToHeal:
|
||||||
// Validate formats loaded before proceeding forward.
|
// Validate formats loaded before proceeding forward.
|
||||||
err := genericFormatCheckXL(formatConfigs, sErrs)
|
err := genericFormatCheckXL(formatConfigs, sErrs)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
@ -26,8 +26,8 @@ func (action InitActions) String() string {
|
|||||||
return "FormatDisks"
|
return "FormatDisks"
|
||||||
case WaitForFormatting:
|
case WaitForFormatting:
|
||||||
return "WaitForFormatting"
|
return "WaitForFormatting"
|
||||||
case WaitForHeal:
|
case SuggestToHeal:
|
||||||
return "WaitForHeal"
|
return "SuggestToHeal"
|
||||||
case WaitForAll:
|
case WaitForAll:
|
||||||
return "WaitForAll"
|
return "WaitForAll"
|
||||||
case WaitForQuorum:
|
case WaitForQuorum:
|
||||||
@ -76,6 +76,7 @@ func TestPrepForInitXL(t *testing.T) {
|
|||||||
errUnformattedDisk, errUnformattedDisk, errUnformattedDisk, errUnformattedDisk,
|
errUnformattedDisk, errUnformattedDisk, errUnformattedDisk, errUnformattedDisk,
|
||||||
errUnformattedDisk, errCorruptedFormat, errCorruptedFormat, errDiskNotFound,
|
errUnformattedDisk, errCorruptedFormat, errCorruptedFormat, errDiskNotFound,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Quorum number of disks not online yet.
|
// Quorum number of disks not online yet.
|
||||||
noQuourm := []error{
|
noQuourm := []error{
|
||||||
errDiskNotFound, errDiskNotFound, errDiskNotFound, errDiskNotFound,
|
errDiskNotFound, errDiskNotFound, errDiskNotFound, errDiskNotFound,
|
||||||
@ -101,6 +102,20 @@ func TestPrepForInitXL(t *testing.T) {
|
|||||||
nil, nil, nil, nil,
|
nil, nil, nil, nil,
|
||||||
errServerTimeMismatch, nil, nil, nil,
|
errServerTimeMismatch, nil, nil, nil,
|
||||||
}
|
}
|
||||||
|
// Suggest to heal under formatted disks in quorum.
|
||||||
|
formattedDisksInQuorum := []error{
|
||||||
|
nil, nil, nil, nil,
|
||||||
|
errUnformattedDisk, errUnformattedDisk, errDiskNotFound, errDiskNotFound,
|
||||||
|
}
|
||||||
|
// Wait for all under undecisive state.
|
||||||
|
undecisiveErrs1 := []error{
|
||||||
|
errDiskNotFound, nil, nil, nil,
|
||||||
|
errUnformattedDisk, errUnformattedDisk, errDiskNotFound, errDiskNotFound,
|
||||||
|
}
|
||||||
|
undecisiveErrs2 := []error{
|
||||||
|
errDiskNotFound, errDiskNotFound, errDiskNotFound, errDiskNotFound,
|
||||||
|
errUnformattedDisk, errUnformattedDisk, errUnformattedDisk, errUnformattedDisk,
|
||||||
|
}
|
||||||
|
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
// Params for prepForInit().
|
// Params for prepForInit().
|
||||||
@ -116,7 +131,7 @@ func TestPrepForInitXL(t *testing.T) {
|
|||||||
{true, quorumUnformatted, 8, WaitForAll},
|
{true, quorumUnformatted, 8, WaitForAll},
|
||||||
{true, quorumUnformattedSomeCorrupted, 8, Abort},
|
{true, quorumUnformattedSomeCorrupted, 8, Abort},
|
||||||
{true, noQuourm, 8, WaitForQuorum},
|
{true, noQuourm, 8, WaitForQuorum},
|
||||||
{true, minorityCorrupted, 8, WaitForHeal},
|
{true, minorityCorrupted, 8, SuggestToHeal},
|
||||||
{true, majorityCorrupted, 8, Abort},
|
{true, majorityCorrupted, 8, Abort},
|
||||||
// Remote disks.
|
// Remote disks.
|
||||||
{false, allFormatted, 8, InitObjectLayer},
|
{false, allFormatted, 8, InitObjectLayer},
|
||||||
@ -125,8 +140,11 @@ func TestPrepForInitXL(t *testing.T) {
|
|||||||
{false, quorumUnformatted, 8, WaitForAll},
|
{false, quorumUnformatted, 8, WaitForAll},
|
||||||
{false, quorumUnformattedSomeCorrupted, 8, Abort},
|
{false, quorumUnformattedSomeCorrupted, 8, Abort},
|
||||||
{false, noQuourm, 8, WaitForQuorum},
|
{false, noQuourm, 8, WaitForQuorum},
|
||||||
{false, minorityCorrupted, 8, WaitForHeal},
|
{false, minorityCorrupted, 8, SuggestToHeal},
|
||||||
|
{false, formattedDisksInQuorum, 8, SuggestToHeal},
|
||||||
{false, majorityCorrupted, 8, Abort},
|
{false, majorityCorrupted, 8, Abort},
|
||||||
|
{false, undecisiveErrs1, 8, WaitForAll},
|
||||||
|
{false, undecisiveErrs2, 8, WaitForAll},
|
||||||
// Config mistakes.
|
// Config mistakes.
|
||||||
{true, accessKeyIDErr, 8, WaitForConfig},
|
{true, accessKeyIDErr, 8, WaitForConfig},
|
||||||
{true, authenticationErr, 8, WaitForConfig},
|
{true, authenticationErr, 8, WaitForConfig},
|
||||||
@ -136,7 +154,7 @@ func TestPrepForInitXL(t *testing.T) {
|
|||||||
for i, test := range testCases {
|
for i, test := range testCases {
|
||||||
actual := prepForInitXL(test.firstDisk, test.errs, test.diskCount)
|
actual := prepForInitXL(test.firstDisk, test.errs, test.diskCount)
|
||||||
if actual != test.action {
|
if actual != test.action {
|
||||||
t.Errorf("Test %d expected %s but receieved %s\n", i+1, test.action, actual)
|
t.Errorf("Test %d expected %s but received %s\n", i+1, test.action, actual)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user