From 1fd90c93ff69a317782da41018cfab643a93b83d Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sun, 19 May 2024 01:06:49 -0700 Subject: [PATCH] re-use StorageAPI while loading drive formats (#19770) Bonus: safe settings for deployment ID to avoid races --- cmd/prepare-storage.go | 101 +++++++++++++++++++------------------ internal/logger/console.go | 23 +++++---- 2 files changed, 65 insertions(+), 59 deletions(-) diff --git a/cmd/prepare-storage.go b/cmd/prepare-storage.go index d80d5e5ab..5c25e4531 100644 --- a/cmd/prepare-storage.go +++ b/cmd/prepare-storage.go @@ -154,18 +154,16 @@ func isServerResolvable(endpoint Endpoint, timeout time.Duration) error { // connect to list of endpoints and load all Erasure disk formats, validate the formats are correct // and are in quorum, if no formats are found attempt to initialize all of them for the first // time. additionally make sure to close all the disks used in this attempt. -func connectLoadInitFormats(verboseLogging bool, firstDisk bool, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) (storageDisks []StorageAPI, format *formatErasureV3, err error) { - // Initialize all storage disks - storageDisks, errs := initStorageDisksWithErrors(endpoints, storageOpts{cleanUp: true, healthCheck: true}) +func connectLoadInitFormats(verboseLogging bool, firstDisk bool, storageDisks []StorageAPI, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) (format *formatErasureV3, err error) { + // Attempt to load all `format.json` from all disks. + formatConfigs, sErrs := loadFormatErasureAll(storageDisks, false) - defer func(storageDisks []StorageAPI) { - if err != nil { - closeStorageDisks(storageDisks...) - } - }(storageDisks) + if err := checkDiskFatalErrs(sErrs); err != nil { + return nil, err + } - for i, err := range errs { - if err != nil && !errors.Is(err, errXLBackend) { + for i, err := range sErrs { + if err != nil && !errors.Is(err, errXLBackend) && !errors.Is(err, errUnformattedDisk) { if errors.Is(err, errDiskNotFound) && verboseLogging { if globalEndpoints.NEndpoints() > 1 { logger.Error("Unable to connect to %s: %v", endpoints[i], isServerResolvable(endpoints[i], time.Second)) @@ -182,30 +180,13 @@ func connectLoadInitFormats(verboseLogging bool, firstDisk bool, endpoints Endpo } } - if err := checkDiskFatalErrs(errs); err != nil { - return nil, nil, err - } - - // Attempt to load all `format.json` from all disks. - formatConfigs, sErrs := loadFormatErasureAll(storageDisks, false) - - // Check if we have - for i, sErr := range sErrs { - // print the error, nonetheless, which is perhaps unhandled - if !errors.Is(sErr, errUnformattedDisk) && !errors.Is(sErr, errDiskNotFound) && verboseLogging { - if sErr != nil { - logger.Error("Unable to read 'format.json' from %s: %v\n", endpoints[i], sErr) - } - } - } - // Pre-emptively check if one of the formatted disks // is invalid. This function returns success for the // most part unless one of the formats is not consistent // with expected Erasure format. For example if a user is // trying to pool FS backend into an Erasure set. if err = checkFormatErasureValues(formatConfigs, storageDisks, setDriveCount); err != nil { - return nil, nil, err + return nil, err } // All disks report unformatted we should initialized everyone. @@ -216,45 +197,46 @@ func connectLoadInitFormats(verboseLogging bool, firstDisk bool, endpoints Endpo // Initialize erasure code format on disks format, err = initFormatErasure(GlobalContext, storageDisks, setCount, setDriveCount, deploymentID, sErrs) if err != nil { - return nil, nil, err + return nil, err } - // Assign globalDeploymentID() on first run for the - // minio server managing the first disk - globalDeploymentIDPtr.Store(&format.ID) - return storageDisks, format, nil + return format, nil } // Return error when quorum unformatted disks - indicating we are // waiting for first server to be online. unformattedDisks := quorumUnformattedDisks(sErrs) if unformattedDisks && !firstDisk { - return nil, nil, errNotFirstDisk + return nil, errNotFirstDisk } // Return error when quorum unformatted disks but waiting for rest // of the servers to be online. if unformattedDisks && firstDisk { - return nil, nil, errFirstDiskWait + return nil, errFirstDiskWait } format, err = getFormatErasureInQuorum(formatConfigs) if err != nil { - internalLogIf(GlobalContext, err) - return nil, nil, err + var drivesNotFound int + for _, format := range formatConfigs { + if format != nil { + continue + } + drivesNotFound++ + } + return nil, fmt.Errorf("%w (offline-drives=%d/%d)", err, drivesNotFound, len(formatConfigs)) } if format.ID == "" { - internalLogIf(GlobalContext, errors.New("unexpected error deployment ID is missing, refusing to continue")) - return nil, nil, errInvalidArgument + return nil, errors.New("deployment ID missing from disk format, unable to start the server") } - globalDeploymentIDPtr.Store(&format.ID) - return storageDisks, format, nil + return format, nil } // Format disks before initialization of object layer. -func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) ([]StorageAPI, *formatErasureV3, error) { +func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) (storageDisks []StorageAPI, format *formatErasureV3, err error) { if len(endpoints) == 0 || setCount == 0 || setDriveCount == 0 { return nil, nil, errInvalidArgument } @@ -271,7 +253,26 @@ func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCou verbose bool ) - storageDisks, format, err := connectLoadInitFormats(verbose, firstDisk, endpoints, poolCount, setCount, setDriveCount, deploymentID) + // Initialize all storage disks + storageDisks, errs := initStorageDisksWithErrors(endpoints, storageOpts{cleanUp: true, healthCheck: true}) + + if err := checkDiskFatalErrs(errs); err != nil { + return nil, nil, err + } + + defer func() { + if err == nil && format != nil { + // Assign globalDeploymentID() on first run for the + // minio server managing the first disk + globalDeploymentIDPtr.Store(&format.ID) + + // Set the deployment ID here to avoid races. + xhttp.SetDeploymentID(format.ID) + xhttp.SetMinIOVersion(Version) + } + }() + + format, err = connectLoadInitFormats(verbose, firstDisk, storageDisks, endpoints, poolCount, setCount, setDriveCount, deploymentID) if err == nil { return storageDisks, format, nil } @@ -289,28 +290,28 @@ func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCou tries = 1 } - storageDisks, format, err := connectLoadInitFormats(verbose, firstDisk, endpoints, poolCount, setCount, setDriveCount, deploymentID) + format, err = connectLoadInitFormats(verbose, firstDisk, storageDisks, endpoints, poolCount, setCount, setDriveCount, deploymentID) if err == nil { return storageDisks, format, nil } tries++ - switch err { - case errNotFirstDisk: + switch { + case errors.Is(err, errNotFirstDisk): // Fresh setup, wait for first server to be up. logger.Info("Waiting for the first server to format the drives (elapsed %s)\n", getElapsedTime()) - case errFirstDiskWait: + case errors.Is(err, errFirstDiskWait): // Fresh setup, wait for other servers to come up. logger.Info("Waiting for all other servers to be online to format the drives (elapses %s)\n", getElapsedTime()) - case errErasureReadQuorum: + case errors.Is(err, errErasureReadQuorum): // no quorum available continue to wait for minimum number of servers. logger.Info("Waiting for a minimum of %d drives to come online (elapsed %s)\n", len(endpoints)/2, getElapsedTime()) - case errErasureWriteQuorum: + case errors.Is(err, errErasureWriteQuorum): // no quorum available continue to wait for minimum number of servers. logger.Info("Waiting for a minimum of %d drives to come online (elapsed %s)\n", (len(endpoints)/2)+1, getElapsedTime()) - case errErasureV3ThisEmpty: + case errors.Is(err, errErasureV3ThisEmpty): // need to wait for this error to be healed, so continue. default: // For all other unhandled errors we exit and fail. diff --git a/internal/logger/console.go b/internal/logger/console.go index 9545266e5..2f367ebc7 100644 --- a/internal/logger/console.go +++ b/internal/logger/console.go @@ -69,13 +69,16 @@ func Fatal(err error, msg string, data ...interface{}) { } func fatal(err error, msg string, data ...interface{}) { - var errMsg string - if msg != "" { - errMsg = errorFmtFunc(fmt.Sprintf(msg, data...), err, jsonFlag) + if msg == "" { + if len(data) > 0 { + msg = fmt.Sprint(data...) + } else { + msg = "a fatal error" + } } else { - errMsg = err.Error() + msg = fmt.Sprintf(msg, data...) } - consoleLog(fatalMessage, errMsg) + consoleLog(fatalMessage, errorFmtFunc(msg, err, jsonFlag)) } var fatalMessage fatalMsg @@ -183,13 +186,14 @@ func (i infoMsg) quiet(msg string, args ...interface{}) { func (i infoMsg) pretty(msg string, args ...interface{}) { if msg == "" { fmt.Fprintln(Output, args...) + } else { + fmt.Fprintf(Output, msg, args...) } - fmt.Fprintf(Output, msg, args...) } type errorMsg struct{} -var errorm errorMsg +var errorMessage errorMsg func (i errorMsg) json(msg string, args ...interface{}) { var message string @@ -217,8 +221,9 @@ func (i errorMsg) quiet(msg string, args ...interface{}) { func (i errorMsg) pretty(msg string, args ...interface{}) { if msg == "" { fmt.Fprintln(Output, args...) + } else { + fmt.Fprintf(Output, msg, args...) } - fmt.Fprintf(Output, msg, args...) } // Error : @@ -226,7 +231,7 @@ func Error(msg string, data ...interface{}) { if DisableErrorLog { return } - consoleLog(errorm, msg, data...) + consoleLog(errorMessage, msg, data...) } // Info :