mirror of
https://github.com/minio/minio.git
synced 2024-12-24 22:25:54 -05:00
Fix preInit logic when mixed disk situations exist. (#4904)
When servers are started simultaneously across multiple nodes or simulating a local setup, it can happen such that one of the servers in setup reaches a following situation where it observes - Some servers are formatted - Some servers are unformatted - Some servers are offline Current state machine doesn't handle this correctly, to fix this situation where we have unformatted, formatted and disks offline we do not decisively know the course of action. So we wait for the offline disks to change their state. Once the offline disks change their state to either one of these states we can decisively move forward. - nil (formatted disk) - errUnformattedDisk - Or any other error such as errCorruptedDisk. Fixes #4903
This commit is contained in:
parent
f66239e82f
commit
b9fc4150f6
@ -13,10 +13,11 @@ clone_folder: c:\gopath\src\github.com\minio\minio
|
||||
environment:
|
||||
GOVERSION: 1.8.3
|
||||
GOPATH: c:\gopath
|
||||
GOROOT: c:\go18
|
||||
|
||||
# scripts that run after cloning repository
|
||||
install:
|
||||
- set PATH=%GOPATH%\bin;c:\go\bin;%PATH%
|
||||
- set PATH=%GOPATH%\bin;c:\go18\bin;%PATH%
|
||||
- go version
|
||||
- go env
|
||||
- python --version
|
||||
|
@ -70,12 +70,11 @@ import (
|
||||
type InitActions int
|
||||
|
||||
const (
|
||||
// FormatDisks - see above table for disk states where it
|
||||
// is applicable.
|
||||
// FormatDisks - see above table for disk states where it is applicable.
|
||||
FormatDisks InitActions = iota
|
||||
|
||||
// WaitForHeal - Wait for disks to heal.
|
||||
WaitForHeal
|
||||
// SuggestToHeal - Prints heal message and initialize object layer.
|
||||
SuggestToHeal
|
||||
|
||||
// WaitForQuorum - Wait for quorum number of disks to be online.
|
||||
WaitForQuorum
|
||||
@ -134,14 +133,14 @@ func quickErrToActions(errMap map[error]int) InitActions {
|
||||
// - Unformatted setup
|
||||
// - Format/Wait for format when `disksUnformatted == diskCount`
|
||||
//
|
||||
// - Wait for all when `disksUnformatted + disksOffline == disksCount`
|
||||
// - Wait for all when `disksUnformatted + disksFormatted + diskOffline == diskCount`
|
||||
//
|
||||
// Under all other conditions should lead to server initialization aborted.
|
||||
func prepForInitXL(firstDisk bool, sErrs []error, diskCount int) InitActions {
|
||||
// Count errors by error value.
|
||||
errMap := make(map[error]int)
|
||||
for _, err := range sErrs {
|
||||
errMap[err]++
|
||||
errMap[errorCause(err)]++
|
||||
}
|
||||
|
||||
// Validates and converts specific config errors into WaitForConfig.
|
||||
@ -149,7 +148,6 @@ func prepForInitXL(firstDisk bool, sErrs []error, diskCount int) InitActions {
|
||||
return WaitForConfig
|
||||
}
|
||||
|
||||
quorum := diskCount/2 + 1
|
||||
readQuorum := diskCount / 2
|
||||
disksOffline := errMap[errDiskNotFound]
|
||||
disksFormatted := errMap[nil]
|
||||
@ -169,28 +167,25 @@ func prepForInitXL(firstDisk bool, sErrs []error, diskCount int) InitActions {
|
||||
return WaitForFormatting
|
||||
}
|
||||
|
||||
// Total disks unformatted are in quorum verify if we have some offline disks.
|
||||
if disksUnformatted >= quorum {
|
||||
// Some disks offline and some disks unformatted, wait for all of them to come online.
|
||||
if disksUnformatted+disksFormatted+disksOffline == diskCount {
|
||||
return WaitForAll
|
||||
}
|
||||
|
||||
// Some disks possibly corrupted and too many unformatted disks.
|
||||
return Abort
|
||||
}
|
||||
|
||||
// Already formatted and in quorum, proceed to initialization of object layer.
|
||||
if disksFormatted >= readQuorum {
|
||||
if disksFormatted+disksOffline == diskCount {
|
||||
return InitObjectLayer
|
||||
}
|
||||
|
||||
// Some of the formatted disks are possibly corrupted or unformatted, heal them.
|
||||
return WaitForHeal
|
||||
// Some of the formatted disks are possibly corrupted or unformatted,
|
||||
// let user know to heal them.
|
||||
return SuggestToHeal
|
||||
}
|
||||
|
||||
// Exhausted all our checks, un-handled errors perhaps we Abort.
|
||||
// Some unformatted, some disks formatted and some disks are offline but we don't
|
||||
// quorum to decide. This is an undecisive state - wait for all of offline disks
|
||||
// to be online to figure out the course of action.
|
||||
if disksUnformatted+disksFormatted+disksOffline == diskCount {
|
||||
return WaitForAll
|
||||
}
|
||||
|
||||
// Exhausted all our checks, un-handled situations such as some disks corrupted we Abort.
|
||||
return Abort
|
||||
}
|
||||
|
||||
@ -280,7 +275,7 @@ func retryFormattingXLDisks(firstDisk bool, endpoints EndpointList, storageDisks
|
||||
printRegularMsg(endpoints, storageDisks, printOnceFn())
|
||||
}
|
||||
return err
|
||||
case WaitForHeal:
|
||||
case SuggestToHeal:
|
||||
// Validate formats loaded before proceeding forward.
|
||||
err := genericFormatCheckXL(formatConfigs, sErrs)
|
||||
if err == nil {
|
||||
|
@ -26,8 +26,8 @@ func (action InitActions) String() string {
|
||||
return "FormatDisks"
|
||||
case WaitForFormatting:
|
||||
return "WaitForFormatting"
|
||||
case WaitForHeal:
|
||||
return "WaitForHeal"
|
||||
case SuggestToHeal:
|
||||
return "SuggestToHeal"
|
||||
case WaitForAll:
|
||||
return "WaitForAll"
|
||||
case WaitForQuorum:
|
||||
@ -76,6 +76,7 @@ func TestPrepForInitXL(t *testing.T) {
|
||||
errUnformattedDisk, errUnformattedDisk, errUnformattedDisk, errUnformattedDisk,
|
||||
errUnformattedDisk, errCorruptedFormat, errCorruptedFormat, errDiskNotFound,
|
||||
}
|
||||
|
||||
// Quorum number of disks not online yet.
|
||||
noQuourm := []error{
|
||||
errDiskNotFound, errDiskNotFound, errDiskNotFound, errDiskNotFound,
|
||||
@ -101,6 +102,20 @@ func TestPrepForInitXL(t *testing.T) {
|
||||
nil, nil, nil, nil,
|
||||
errServerTimeMismatch, nil, nil, nil,
|
||||
}
|
||||
// Suggest to heal under formatted disks in quorum.
|
||||
formattedDisksInQuorum := []error{
|
||||
nil, nil, nil, nil,
|
||||
errUnformattedDisk, errUnformattedDisk, errDiskNotFound, errDiskNotFound,
|
||||
}
|
||||
// Wait for all under undecisive state.
|
||||
undecisiveErrs1 := []error{
|
||||
errDiskNotFound, nil, nil, nil,
|
||||
errUnformattedDisk, errUnformattedDisk, errDiskNotFound, errDiskNotFound,
|
||||
}
|
||||
undecisiveErrs2 := []error{
|
||||
errDiskNotFound, errDiskNotFound, errDiskNotFound, errDiskNotFound,
|
||||
errUnformattedDisk, errUnformattedDisk, errUnformattedDisk, errUnformattedDisk,
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
// Params for prepForInit().
|
||||
@ -116,7 +131,7 @@ func TestPrepForInitXL(t *testing.T) {
|
||||
{true, quorumUnformatted, 8, WaitForAll},
|
||||
{true, quorumUnformattedSomeCorrupted, 8, Abort},
|
||||
{true, noQuourm, 8, WaitForQuorum},
|
||||
{true, minorityCorrupted, 8, WaitForHeal},
|
||||
{true, minorityCorrupted, 8, SuggestToHeal},
|
||||
{true, majorityCorrupted, 8, Abort},
|
||||
// Remote disks.
|
||||
{false, allFormatted, 8, InitObjectLayer},
|
||||
@ -125,8 +140,11 @@ func TestPrepForInitXL(t *testing.T) {
|
||||
{false, quorumUnformatted, 8, WaitForAll},
|
||||
{false, quorumUnformattedSomeCorrupted, 8, Abort},
|
||||
{false, noQuourm, 8, WaitForQuorum},
|
||||
{false, minorityCorrupted, 8, WaitForHeal},
|
||||
{false, minorityCorrupted, 8, SuggestToHeal},
|
||||
{false, formattedDisksInQuorum, 8, SuggestToHeal},
|
||||
{false, majorityCorrupted, 8, Abort},
|
||||
{false, undecisiveErrs1, 8, WaitForAll},
|
||||
{false, undecisiveErrs2, 8, WaitForAll},
|
||||
// Config mistakes.
|
||||
{true, accessKeyIDErr, 8, WaitForConfig},
|
||||
{true, authenticationErr, 8, WaitForConfig},
|
||||
@ -136,7 +154,7 @@ func TestPrepForInitXL(t *testing.T) {
|
||||
for i, test := range testCases {
|
||||
actual := prepForInitXL(test.firstDisk, test.errs, test.diskCount)
|
||||
if actual != test.action {
|
||||
t.Errorf("Test %d expected %s but receieved %s\n", i+1, test.action, actual)
|
||||
t.Errorf("Test %d expected %s but received %s\n", i+1, test.action, actual)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user