re-use StorageAPI while loading drive formats (#19770)

Bonus: safe settings for deployment ID to avoid races
This commit is contained in:
Harshavardhana 2024-05-19 01:06:49 -07:00 committed by GitHub
parent e947a844c9
commit 1fd90c93ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 65 additions and 59 deletions

View File

@ -154,18 +154,16 @@ func isServerResolvable(endpoint Endpoint, timeout time.Duration) error {
// connect to list of endpoints and load all Erasure disk formats, validate the formats are correct // connect to list of endpoints and load all Erasure disk formats, validate the formats are correct
// and are in quorum, if no formats are found attempt to initialize all of them for the first // and are in quorum, if no formats are found attempt to initialize all of them for the first
// time. additionally make sure to close all the disks used in this attempt. // time. additionally make sure to close all the disks used in this attempt.
func connectLoadInitFormats(verboseLogging bool, firstDisk bool, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) (storageDisks []StorageAPI, format *formatErasureV3, err error) { func connectLoadInitFormats(verboseLogging bool, firstDisk bool, storageDisks []StorageAPI, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) (format *formatErasureV3, err error) {
// Initialize all storage disks // Attempt to load all `format.json` from all disks.
storageDisks, errs := initStorageDisksWithErrors(endpoints, storageOpts{cleanUp: true, healthCheck: true}) formatConfigs, sErrs := loadFormatErasureAll(storageDisks, false)
defer func(storageDisks []StorageAPI) { if err := checkDiskFatalErrs(sErrs); err != nil {
if err != nil { return nil, err
closeStorageDisks(storageDisks...) }
}
}(storageDisks)
for i, err := range errs { for i, err := range sErrs {
if err != nil && !errors.Is(err, errXLBackend) { if err != nil && !errors.Is(err, errXLBackend) && !errors.Is(err, errUnformattedDisk) {
if errors.Is(err, errDiskNotFound) && verboseLogging { if errors.Is(err, errDiskNotFound) && verboseLogging {
if globalEndpoints.NEndpoints() > 1 { if globalEndpoints.NEndpoints() > 1 {
logger.Error("Unable to connect to %s: %v", endpoints[i], isServerResolvable(endpoints[i], time.Second)) logger.Error("Unable to connect to %s: %v", endpoints[i], isServerResolvable(endpoints[i], time.Second))
@ -182,30 +180,13 @@ func connectLoadInitFormats(verboseLogging bool, firstDisk bool, endpoints Endpo
} }
} }
if err := checkDiskFatalErrs(errs); err != nil {
return nil, nil, err
}
// Attempt to load all `format.json` from all disks.
formatConfigs, sErrs := loadFormatErasureAll(storageDisks, false)
// Check if we have
for i, sErr := range sErrs {
// print the error, nonetheless, which is perhaps unhandled
if !errors.Is(sErr, errUnformattedDisk) && !errors.Is(sErr, errDiskNotFound) && verboseLogging {
if sErr != nil {
logger.Error("Unable to read 'format.json' from %s: %v\n", endpoints[i], sErr)
}
}
}
// Pre-emptively check if one of the formatted disks // Pre-emptively check if one of the formatted disks
// is invalid. This function returns success for the // is invalid. This function returns success for the
// most part unless one of the formats is not consistent // most part unless one of the formats is not consistent
// with expected Erasure format. For example if a user is // with expected Erasure format. For example if a user is
// trying to pool FS backend into an Erasure set. // trying to pool FS backend into an Erasure set.
if err = checkFormatErasureValues(formatConfigs, storageDisks, setDriveCount); err != nil { if err = checkFormatErasureValues(formatConfigs, storageDisks, setDriveCount); err != nil {
return nil, nil, err return nil, err
} }
// All disks report unformatted we should initialized everyone. // All disks report unformatted we should initialized everyone.
@ -216,45 +197,46 @@ func connectLoadInitFormats(verboseLogging bool, firstDisk bool, endpoints Endpo
// Initialize erasure code format on disks // Initialize erasure code format on disks
format, err = initFormatErasure(GlobalContext, storageDisks, setCount, setDriveCount, deploymentID, sErrs) format, err = initFormatErasure(GlobalContext, storageDisks, setCount, setDriveCount, deploymentID, sErrs)
if err != nil { if err != nil {
return nil, nil, err return nil, err
} }
// Assign globalDeploymentID() on first run for the return format, nil
// minio server managing the first disk
globalDeploymentIDPtr.Store(&format.ID)
return storageDisks, format, nil
} }
// Return error when quorum unformatted disks - indicating we are // Return error when quorum unformatted disks - indicating we are
// waiting for first server to be online. // waiting for first server to be online.
unformattedDisks := quorumUnformattedDisks(sErrs) unformattedDisks := quorumUnformattedDisks(sErrs)
if unformattedDisks && !firstDisk { if unformattedDisks && !firstDisk {
return nil, nil, errNotFirstDisk return nil, errNotFirstDisk
} }
// Return error when quorum unformatted disks but waiting for rest // Return error when quorum unformatted disks but waiting for rest
// of the servers to be online. // of the servers to be online.
if unformattedDisks && firstDisk { if unformattedDisks && firstDisk {
return nil, nil, errFirstDiskWait return nil, errFirstDiskWait
} }
format, err = getFormatErasureInQuorum(formatConfigs) format, err = getFormatErasureInQuorum(formatConfigs)
if err != nil { if err != nil {
internalLogIf(GlobalContext, err) var drivesNotFound int
return nil, nil, err for _, format := range formatConfigs {
if format != nil {
continue
}
drivesNotFound++
}
return nil, fmt.Errorf("%w (offline-drives=%d/%d)", err, drivesNotFound, len(formatConfigs))
} }
if format.ID == "" { if format.ID == "" {
internalLogIf(GlobalContext, errors.New("unexpected error deployment ID is missing, refusing to continue")) return nil, errors.New("deployment ID missing from disk format, unable to start the server")
return nil, nil, errInvalidArgument
} }
globalDeploymentIDPtr.Store(&format.ID) return format, nil
return storageDisks, format, nil
} }
// Format disks before initialization of object layer. // Format disks before initialization of object layer.
func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) ([]StorageAPI, *formatErasureV3, error) { func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCount, setDriveCount int, deploymentID string) (storageDisks []StorageAPI, format *formatErasureV3, err error) {
if len(endpoints) == 0 || setCount == 0 || setDriveCount == 0 { if len(endpoints) == 0 || setCount == 0 || setDriveCount == 0 {
return nil, nil, errInvalidArgument return nil, nil, errInvalidArgument
} }
@ -271,7 +253,26 @@ func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCou
verbose bool verbose bool
) )
storageDisks, format, err := connectLoadInitFormats(verbose, firstDisk, endpoints, poolCount, setCount, setDriveCount, deploymentID) // Initialize all storage disks
storageDisks, errs := initStorageDisksWithErrors(endpoints, storageOpts{cleanUp: true, healthCheck: true})
if err := checkDiskFatalErrs(errs); err != nil {
return nil, nil, err
}
defer func() {
if err == nil && format != nil {
// Assign globalDeploymentID() on first run for the
// minio server managing the first disk
globalDeploymentIDPtr.Store(&format.ID)
// Set the deployment ID here to avoid races.
xhttp.SetDeploymentID(format.ID)
xhttp.SetMinIOVersion(Version)
}
}()
format, err = connectLoadInitFormats(verbose, firstDisk, storageDisks, endpoints, poolCount, setCount, setDriveCount, deploymentID)
if err == nil { if err == nil {
return storageDisks, format, nil return storageDisks, format, nil
} }
@ -289,28 +290,28 @@ func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCou
tries = 1 tries = 1
} }
storageDisks, format, err := connectLoadInitFormats(verbose, firstDisk, endpoints, poolCount, setCount, setDriveCount, deploymentID) format, err = connectLoadInitFormats(verbose, firstDisk, storageDisks, endpoints, poolCount, setCount, setDriveCount, deploymentID)
if err == nil { if err == nil {
return storageDisks, format, nil return storageDisks, format, nil
} }
tries++ tries++
switch err { switch {
case errNotFirstDisk: case errors.Is(err, errNotFirstDisk):
// Fresh setup, wait for first server to be up. // Fresh setup, wait for first server to be up.
logger.Info("Waiting for the first server to format the drives (elapsed %s)\n", getElapsedTime()) logger.Info("Waiting for the first server to format the drives (elapsed %s)\n", getElapsedTime())
case errFirstDiskWait: case errors.Is(err, errFirstDiskWait):
// Fresh setup, wait for other servers to come up. // Fresh setup, wait for other servers to come up.
logger.Info("Waiting for all other servers to be online to format the drives (elapses %s)\n", getElapsedTime()) logger.Info("Waiting for all other servers to be online to format the drives (elapses %s)\n", getElapsedTime())
case errErasureReadQuorum: case errors.Is(err, errErasureReadQuorum):
// no quorum available continue to wait for minimum number of servers. // no quorum available continue to wait for minimum number of servers.
logger.Info("Waiting for a minimum of %d drives to come online (elapsed %s)\n", logger.Info("Waiting for a minimum of %d drives to come online (elapsed %s)\n",
len(endpoints)/2, getElapsedTime()) len(endpoints)/2, getElapsedTime())
case errErasureWriteQuorum: case errors.Is(err, errErasureWriteQuorum):
// no quorum available continue to wait for minimum number of servers. // no quorum available continue to wait for minimum number of servers.
logger.Info("Waiting for a minimum of %d drives to come online (elapsed %s)\n", logger.Info("Waiting for a minimum of %d drives to come online (elapsed %s)\n",
(len(endpoints)/2)+1, getElapsedTime()) (len(endpoints)/2)+1, getElapsedTime())
case errErasureV3ThisEmpty: case errors.Is(err, errErasureV3ThisEmpty):
// need to wait for this error to be healed, so continue. // need to wait for this error to be healed, so continue.
default: default:
// For all other unhandled errors we exit and fail. // For all other unhandled errors we exit and fail.

View File

@ -69,13 +69,16 @@ func Fatal(err error, msg string, data ...interface{}) {
} }
func fatal(err error, msg string, data ...interface{}) { func fatal(err error, msg string, data ...interface{}) {
var errMsg string if msg == "" {
if msg != "" { if len(data) > 0 {
errMsg = errorFmtFunc(fmt.Sprintf(msg, data...), err, jsonFlag) msg = fmt.Sprint(data...)
} else {
msg = "a fatal error"
}
} else { } else {
errMsg = err.Error() msg = fmt.Sprintf(msg, data...)
} }
consoleLog(fatalMessage, errMsg) consoleLog(fatalMessage, errorFmtFunc(msg, err, jsonFlag))
} }
var fatalMessage fatalMsg var fatalMessage fatalMsg
@ -183,13 +186,14 @@ func (i infoMsg) quiet(msg string, args ...interface{}) {
func (i infoMsg) pretty(msg string, args ...interface{}) { func (i infoMsg) pretty(msg string, args ...interface{}) {
if msg == "" { if msg == "" {
fmt.Fprintln(Output, args...) fmt.Fprintln(Output, args...)
} else {
fmt.Fprintf(Output, msg, args...)
} }
fmt.Fprintf(Output, msg, args...)
} }
type errorMsg struct{} type errorMsg struct{}
var errorm errorMsg var errorMessage errorMsg
func (i errorMsg) json(msg string, args ...interface{}) { func (i errorMsg) json(msg string, args ...interface{}) {
var message string var message string
@ -217,8 +221,9 @@ func (i errorMsg) quiet(msg string, args ...interface{}) {
func (i errorMsg) pretty(msg string, args ...interface{}) { func (i errorMsg) pretty(msg string, args ...interface{}) {
if msg == "" { if msg == "" {
fmt.Fprintln(Output, args...) fmt.Fprintln(Output, args...)
} else {
fmt.Fprintf(Output, msg, args...)
} }
fmt.Fprintf(Output, msg, args...)
} }
// Error : // Error :
@ -226,7 +231,7 @@ func Error(msg string, data ...interface{}) {
if DisableErrorLog { if DisableErrorLog {
return return
} }
consoleLog(errorm, msg, data...) consoleLog(errorMessage, msg, data...)
} }
// Info : // Info :