From f961ec4aaffd0c9020bd58bbd83b470bbd5880ef Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Wed, 14 Feb 2024 10:37:34 -0800 Subject: [PATCH] fix: revert allow offline disks on fresh start (#19052) the PR in #16541 was incorrect and hand wrong assumptions about the overall setup, revert this since this expectation to have offline servers is wrong and we can end up with a bigger chicken and egg problem. This reverts commit 5996c8c4d5551941fc2d844f7f996d488ff501b1. Bonus: - preserve disk in globalLocalDrives properly upon connectDisks() - do not return 'nil' from newXLStorage(), getting it ready for the next set of changes for 'format.json' loading. --- cmd/erasure-sets.go | 15 ++++++++++++--- cmd/format-erasure.go | 20 ++++++++------------ cmd/prepare-storage.go | 22 ++++++++++++++-------- cmd/xl-storage.go | 39 ++++++++++++++++++++------------------- 4 files changed, 54 insertions(+), 42 deletions(-) diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index b58bade0c..cc487f720 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -264,13 +264,22 @@ func (s *erasureSets) connectDisks() { disk.SetDiskLoc(s.poolIndex, setIndex, diskIndex) disk.SetFormatData(formatData) s.erasureDisks[setIndex][diskIndex] = disk - s.erasureDisksMu.Unlock() - if disk.IsLocal() && globalIsDistErasure { + if disk.IsLocal() { globalLocalDrivesMu.Lock() - globalLocalSetDrives[s.poolIndex][setIndex][diskIndex] = disk + if globalIsDistErasure { + globalLocalSetDrives[s.poolIndex][setIndex][diskIndex] = disk + } + for i, ldisk := range globalLocalDrives { + _, k, l := ldisk.GetDiskLoc() + if k == setIndex && l == diskIndex { + globalLocalDrives[i] = disk + break + } + } globalLocalDrivesMu.Unlock() } + s.erasureDisksMu.Unlock() }(endpoint) } diff --git a/cmd/format-erasure.go b/cmd/format-erasure.go index 41a7a5366..eae474461 100644 --- a/cmd/format-erasure.go +++ b/cmd/format-erasure.go @@ -306,7 +306,12 @@ func countErrs(errs []error, err error) int { return i } -// Check if unformatted disks are equal to write quorum. +// Does all errors indicate we need to initialize all disks?. +func shouldInitErasureDisks(errs []error) bool { + return countErrs(errs, errUnformattedDisk) == len(errs) +} + +// Check if unformatted disks are equal to 50%+1 of all the drives. func quorumUnformattedDisks(errs []error) bool { return countErrs(errs, errUnformattedDisk) >= (len(errs)/2)+1 } @@ -773,9 +778,6 @@ func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount, hostCount := make(map[string]int, setDriveCount) for j := 0; j < setDriveCount; j++ { disk := storageDisks[i*setDriveCount+j] - if disk == nil { - continue - } newFormat := format.Clone() newFormat.Erasure.This = format.Erasure.Sets[i][j] if deploymentID != "" { @@ -800,14 +802,8 @@ func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount, logger.Info(" - Drive: %s", disk.String()) } }) - var warning string - if wantAtMost == 0 { - warning = fmt.Sprintf("Host %v has all drives of set. ", host) - } else { - warning = fmt.Sprintf("Host %v has more than %v drives of set. ", host, wantAtMost) - } - logger.Info(color.Yellow("WARNING: ") + warning + - "A host failure will result in data becoming unavailable.") + logger.Info(color.Yellow("WARNING:")+" Host %v has more than %v drives of set. "+ + "A host failure will result in data becoming unavailable.", host, wantAtMost) } } } diff --git a/cmd/prepare-storage.go b/cmd/prepare-storage.go index beec2ebcc..e0ad6a942 100644 --- a/cmd/prepare-storage.go +++ b/cmd/prepare-storage.go @@ -208,15 +208,8 @@ func connectLoadInitFormats(verboseLogging bool, firstDisk bool, endpoints Endpo return nil, nil, err } - // Return error when quorum unformatted disks - indicating we are - // waiting for first server to be online. - unformattedDisks := quorumUnformattedDisks(sErrs) - if unformattedDisks && !firstDisk { - return nil, nil, errNotFirstDisk - } - // All disks report unformatted we should initialized everyone. - if unformattedDisks && firstDisk { + if shouldInitErasureDisks(sErrs) && firstDisk { logger.Info("Formatting %s pool, %v set(s), %v drives per set.", humanize.Ordinal(poolCount), setCount, setDriveCount) @@ -232,6 +225,19 @@ func connectLoadInitFormats(verboseLogging bool, firstDisk bool, endpoints Endpo return storageDisks, format, nil } + // Return error when quorum unformatted disks - indicating we are + // waiting for first server to be online. + unformattedDisks := quorumUnformattedDisks(sErrs) + if unformattedDisks && !firstDisk { + return nil, nil, errNotFirstDisk + } + + // Return error when quorum unformatted disks but waiting for rest + // of the servers to be online. + if unformattedDisks && firstDisk { + return nil, nil, errFirstDiskWait + } + format, err = getFormatErasureInQuorum(formatConfigs) if err != nil { logger.LogIf(GlobalContext, err) diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index 170767a30..aadec5b9f 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -229,14 +229,24 @@ func makeFormatErasureMetaVolumes(disk StorageAPI) error { // Initialize a new storage disk. func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) { - path := ep.Path - if path, err = getValidPath(path); err != nil { - return nil, err + s = &xlStorage{ + drivePath: ep.Path, + endpoint: ep, + globalSync: globalFSOSync, + poolIndex: -1, + setIndex: -1, + diskIndex: -1, } - info, err := disk.GetInfo(path, true) + s.drivePath, err = getValidPath(ep.Path) if err != nil { - return nil, err + s.drivePath = ep.Path + return s, err + } + + info, err := disk.GetInfo(s.drivePath, true) + if err != nil { + return s, err } if !globalIsCICD && !globalIsErasureSD { @@ -247,7 +257,7 @@ func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) { // size less than or equal to the threshold as rootDrives. rootDrive = info.Total <= globalRootDiskThreshold } else { - rootDrive, err = disk.IsRootDisk(path, SlashSeparator) + rootDrive, err = disk.IsRootDisk(s.drivePath, SlashSeparator) if err != nil { return nil, err } @@ -257,15 +267,6 @@ func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) { } } - s = &xlStorage{ - drivePath: path, - endpoint: ep, - globalSync: globalFSOSync, - poolIndex: -1, - setIndex: -1, - diskIndex: -1, - } - // Sanitize before setting it if info.NRRequests > 0 { s.nrRequests = info.NRRequests @@ -285,11 +286,11 @@ func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) { formatData, formatFi, err := formatErasureMigrate(s.drivePath) if err != nil && !errors.Is(err, os.ErrNotExist) { if os.IsPermission(err) { - return nil, errDiskAccessDenied + return s, errDiskAccessDenied } else if isSysErrIO(err) { - return nil, errFaultyDisk + return s, errFaultyDisk } - return nil, err + return s, err } s.formatData = formatData s.formatFileInfo = formatFi @@ -297,7 +298,7 @@ func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) { // Create all necessary bucket folders if possible. if err = makeFormatErasureMetaVolumes(s); err != nil { - return nil, err + return s, err } if len(s.formatData) > 0 {