speedup getFormatErasureInQuorum use driveCount (#14239)

startup speed-up, currently getFormatErasureInQuorum()
would spend up to 2-3secs when there are 3000+ drives
for example in a setup, simplify this implementation
to use drive counts.
This commit is contained in:
Harshavardhana
2022-02-04 12:21:21 -08:00
committed by GitHub
parent 778cccb15d
commit 6123377e66
7 changed files with 165 additions and 94 deletions

View File

@@ -185,7 +185,7 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
for i, err := range errs {
if err != nil {
if err == errDiskNotFound && retryCount >= 5 {
if err == errDiskNotFound && retryCount >= 10 {
logger.Error("Unable to connect to %s: %v", endpoints[i], isServerResolvable(endpoints[i], time.Second))
} else {
logger.Error("Unable to use the drive %s: %v", endpoints[i], err)
@@ -202,7 +202,7 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
// Check if we have
for i, sErr := range sErrs {
// print the error, nonetheless, which is perhaps unhandled
if sErr != errUnformattedDisk && sErr != errDiskNotFound && retryCount >= 5 {
if sErr != errUnformattedDisk && sErr != errDiskNotFound && retryCount >= 10 {
if sErr != nil {
logger.Error("Unable to read 'format.json' from %s: %v\n", endpoints[i], sErr)
}
@@ -315,23 +315,28 @@ func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCou
tries++ // tried already once
// Wait on each try for an update.
ticker := time.NewTicker(250 * time.Millisecond)
ticker := time.NewTicker(150 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-ticker.C:
if tries == 10 {
// Reset the tries count such that we log only for every 10 retries.
tries = 1
}
storageDisks, format, err := connectLoadInitFormats(tries, firstDisk, endpoints, poolCount, setCount, setDriveCount, deploymentID, distributionAlgo)
if err != nil {
tries++
switch err {
case errNotFirstDisk:
// Fresh setup, wait for first server to be up.
logger.Info("Waiting for the first server to format the disks.")
logger.Info("Waiting for the first server to format the disks (elapsed %s)\n", getElapsedTime())
continue
case errFirstDiskWait:
// Fresh setup, wait for other servers to come up.
logger.Info("Waiting for all other servers to be online to format the disks.")
logger.Info("Waiting for all other servers to be online to format the disks (elapses %s)\n", getElapsedTime())
continue
case errErasureReadQuorum:
// no quorum available continue to wait for minimum number of servers.