Print storage errors during distributed initialization (#6441)

This commit will print connection failures to other disks in other nodes
after 5 retries. It is useful for users to understand why the
distribued cluster fails to boot up.
This commit is contained in:
Anis Elleuch
2018-09-11 00:21:59 +01:00
committed by Dee Koder
parent 12b4971b70
commit 7571582000
5 changed files with 42 additions and 4 deletions

View File

@@ -120,13 +120,25 @@ var errXLV3ThisEmpty = fmt.Errorf("XL format version 3 has This field empty")
// connect to list of endpoints and load all XL disk formats, validate the formats are correct
// and are in quorum, if no formats are found attempt to initialize all of them for the first
// time. additionally make sure to close all the disks used in this attempt.
func connectLoadInitFormats(firstDisk bool, endpoints EndpointList, setCount, drivesPerSet int) (*formatXLV3, error) {
func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints EndpointList, setCount, drivesPerSet int) (*formatXLV3, error) {
// Initialize all storage disks
storageDisks, err := initStorageDisks(endpoints)
if err != nil {
return nil, err
}
defer closeStorageDisks(storageDisks)
// Connect to all storage disks, a connection failure will be
// only logged after some retries.
for _, disk := range storageDisks {
if disk != nil {
connectErr := disk.LastError()
if connectErr != nil && retryCount >= 5 {
logger.Info("Unable to connect to %s: %v\n", disk.String(), connectErr.Error())
}
}
}
// Attempt to load all `format.json` from all disks.
formatConfigs, sErrs := loadFormatXLAll(storageDisks)
// Check if we have
@@ -238,8 +250,8 @@ func waitForFormatXL(ctx context.Context, firstDisk bool, endpoints EndpointList
retryTimerCh := newRetryTimerSimple(doneCh)
for {
select {
case _ = <-retryTimerCh:
format, err := connectLoadInitFormats(firstDisk, endpoints, setCount, disksPerSet)
case retryCount := <-retryTimerCh:
format, err := connectLoadInitFormats(retryCount, firstDisk, endpoints, setCount, disksPerSet)
if err != nil {
switch err {
case errNotFirstDisk: