re-use StorageAPI while loading drive formats (#19770)

Bonus: safe settings for deployment ID to avoid races
This commit is contained in:
Harshavardhana 2024-05-19 01:06:49 -07:00 committed by GitHub
parent e947a844c9
commit 1fd90c93ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 65 additions and 59 deletions

View File

@ -154,18 +154,16 @@ func isServerResolvable(endpoint Endpoint, timeout time.Duration) error {
// connect to list of endpoints and load all Erasure disk formats, validate the formats are correct
// and are in quorum, if no formats are found attempt to initialize all of them for the first
// time. additionally make sure to close all the disks used in this attempt.
func connectLoadInitFormats(verboseLogging bool, firstDisk bool, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) (storageDisks []StorageAPI, format *formatErasureV3, err error) {
// Initialize all storage disks
storageDisks, errs := initStorageDisksWithErrors(endpoints, storageOpts{cleanUp: true, healthCheck: true})
func connectLoadInitFormats(verboseLogging bool, firstDisk bool, storageDisks []StorageAPI, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) (format *formatErasureV3, err error) {
// Attempt to load all `format.json` from all disks.
formatConfigs, sErrs := loadFormatErasureAll(storageDisks, false)
defer func(storageDisks []StorageAPI) {
if err != nil {
closeStorageDisks(storageDisks...)
}
}(storageDisks)
if err := checkDiskFatalErrs(sErrs); err != nil {
return nil, err
}
for i, err := range errs {
if err != nil && !errors.Is(err, errXLBackend) {
for i, err := range sErrs {
if err != nil && !errors.Is(err, errXLBackend) && !errors.Is(err, errUnformattedDisk) {
if errors.Is(err, errDiskNotFound) && verboseLogging {
if globalEndpoints.NEndpoints() > 1 {
logger.Error("Unable to connect to %s: %v", endpoints[i], isServerResolvable(endpoints[i], time.Second))
@ -182,30 +180,13 @@ func connectLoadInitFormats(verboseLogging bool, firstDisk bool, endpoints Endpo
}
}
if err := checkDiskFatalErrs(errs); err != nil {
return nil, nil, err
}
// Attempt to load all `format.json` from all disks.
formatConfigs, sErrs := loadFormatErasureAll(storageDisks, false)
// Check if we have
for i, sErr := range sErrs {
// print the error, nonetheless, which is perhaps unhandled
if !errors.Is(sErr, errUnformattedDisk) && !errors.Is(sErr, errDiskNotFound) && verboseLogging {
if sErr != nil {
logger.Error("Unable to read 'format.json' from %s: %v\n", endpoints[i], sErr)
}
}
}
// Pre-emptively check if one of the formatted disks
// is invalid. This function returns success for the
// most part unless one of the formats is not consistent
// with expected Erasure format. For example if a user is
// trying to pool FS backend into an Erasure set.
if err = checkFormatErasureValues(formatConfigs, storageDisks, setDriveCount); err != nil {
return nil, nil, err
return nil, err
}
// All disks report unformatted we should initialized everyone.
@ -216,45 +197,46 @@ func connectLoadInitFormats(verboseLogging bool, firstDisk bool, endpoints Endpo
// Initialize erasure code format on disks
format, err = initFormatErasure(GlobalContext, storageDisks, setCount, setDriveCount, deploymentID, sErrs)
if err != nil {
return nil, nil, err
return nil, err
}
// Assign globalDeploymentID() on first run for the
// minio server managing the first disk
globalDeploymentIDPtr.Store(&format.ID)
return storageDisks, format, nil
return format, nil
}
// Return error when quorum unformatted disks - indicating we are
// waiting for first server to be online.
unformattedDisks := quorumUnformattedDisks(sErrs)
if unformattedDisks && !firstDisk {
return nil, nil, errNotFirstDisk
return nil, errNotFirstDisk
}
// Return error when quorum unformatted disks but waiting for rest
// of the servers to be online.
if unformattedDisks && firstDisk {
return nil, nil, errFirstDiskWait
return nil, errFirstDiskWait
}
format, err = getFormatErasureInQuorum(formatConfigs)
if err != nil {
internalLogIf(GlobalContext, err)
return nil, nil, err
var drivesNotFound int
for _, format := range formatConfigs {
if format != nil {
continue
}
drivesNotFound++
}
return nil, fmt.Errorf("%w (offline-drives=%d/%d)", err, drivesNotFound, len(formatConfigs))
}
if format.ID == "" {
internalLogIf(GlobalContext, errors.New("unexpected error deployment ID is missing, refusing to continue"))
return nil, nil, errInvalidArgument
return nil, errors.New("deployment ID missing from disk format, unable to start the server")
}
globalDeploymentIDPtr.Store(&format.ID)
return storageDisks, format, nil
return format, nil
}
// Format disks before initialization of object layer.
func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) ([]StorageAPI, *formatErasureV3, error) {
func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) (storageDisks []StorageAPI, format *formatErasureV3, err error) {
if len(endpoints) == 0 || setCount == 0 || setDriveCount == 0 {
return nil, nil, errInvalidArgument
}
@ -271,7 +253,26 @@ func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCou
verbose bool
)
storageDisks, format, err := connectLoadInitFormats(verbose, firstDisk, endpoints, poolCount, setCount, setDriveCount, deploymentID)
// Initialize all storage disks
storageDisks, errs := initStorageDisksWithErrors(endpoints, storageOpts{cleanUp: true, healthCheck: true})
if err := checkDiskFatalErrs(errs); err != nil {
return nil, nil, err
}
defer func() {
if err == nil && format != nil {
// Assign globalDeploymentID() on first run for the
// minio server managing the first disk
globalDeploymentIDPtr.Store(&format.ID)
// Set the deployment ID here to avoid races.
xhttp.SetDeploymentID(format.ID)
xhttp.SetMinIOVersion(Version)
}
}()
format, err = connectLoadInitFormats(verbose, firstDisk, storageDisks, endpoints, poolCount, setCount, setDriveCount, deploymentID)
if err == nil {
return storageDisks, format, nil
}
@ -289,28 +290,28 @@ func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCou
tries = 1
}
storageDisks, format, err := connectLoadInitFormats(verbose, firstDisk, endpoints, poolCount, setCount, setDriveCount, deploymentID)
format, err = connectLoadInitFormats(verbose, firstDisk, storageDisks, endpoints, poolCount, setCount, setDriveCount, deploymentID)
if err == nil {
return storageDisks, format, nil
}
tries++
switch err {
case errNotFirstDisk:
switch {
case errors.Is(err, errNotFirstDisk):
// Fresh setup, wait for first server to be up.
logger.Info("Waiting for the first server to format the drives (elapsed %s)\n", getElapsedTime())
case errFirstDiskWait:
case errors.Is(err, errFirstDiskWait):
// Fresh setup, wait for other servers to come up.
logger.Info("Waiting for all other servers to be online to format the drives (elapses %s)\n", getElapsedTime())
case errErasureReadQuorum:
case errors.Is(err, errErasureReadQuorum):
// no quorum available continue to wait for minimum number of servers.
logger.Info("Waiting for a minimum of %d drives to come online (elapsed %s)\n",
len(endpoints)/2, getElapsedTime())
case errErasureWriteQuorum:
case errors.Is(err, errErasureWriteQuorum):
// no quorum available continue to wait for minimum number of servers.
logger.Info("Waiting for a minimum of %d drives to come online (elapsed %s)\n",
(len(endpoints)/2)+1, getElapsedTime())
case errErasureV3ThisEmpty:
case errors.Is(err, errErasureV3ThisEmpty):
// need to wait for this error to be healed, so continue.
default:
// For all other unhandled errors we exit and fail.

View File

@ -69,13 +69,16 @@ func Fatal(err error, msg string, data ...interface{}) {
}
func fatal(err error, msg string, data ...interface{}) {
var errMsg string
if msg != "" {
errMsg = errorFmtFunc(fmt.Sprintf(msg, data...), err, jsonFlag)
if msg == "" {
if len(data) > 0 {
msg = fmt.Sprint(data...)
} else {
msg = "a fatal error"
}
} else {
errMsg = err.Error()
msg = fmt.Sprintf(msg, data...)
}
consoleLog(fatalMessage, errMsg)
consoleLog(fatalMessage, errorFmtFunc(msg, err, jsonFlag))
}
var fatalMessage fatalMsg
@ -183,13 +186,14 @@ func (i infoMsg) quiet(msg string, args ...interface{}) {
func (i infoMsg) pretty(msg string, args ...interface{}) {
if msg == "" {
fmt.Fprintln(Output, args...)
} else {
fmt.Fprintf(Output, msg, args...)
}
fmt.Fprintf(Output, msg, args...)
}
type errorMsg struct{}
var errorm errorMsg
var errorMessage errorMsg
func (i errorMsg) json(msg string, args ...interface{}) {
var message string
@ -217,8 +221,9 @@ func (i errorMsg) quiet(msg string, args ...interface{}) {
func (i errorMsg) pretty(msg string, args ...interface{}) {
if msg == "" {
fmt.Fprintln(Output, args...)
} else {
fmt.Fprintf(Output, msg, args...)
}
fmt.Fprintf(Output, msg, args...)
}
// Error :
@ -226,7 +231,7 @@ func Error(msg string, data ...interface{}) {
if DisableErrorLog {
return
}
consoleLog(errorm, msg, data...)
consoleLog(errorMessage, msg, data...)
}
// Info :