From fd37418da27d320edd1fc89be090571768a814a2 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Mon, 23 Oct 2023 12:30:20 -0700 Subject: [PATCH] fix: allow server not initialized error to be retried (#18300) Since relaxing quorum the error across pools for ListBuckets(), GetBucketInfo() we hit a situation where loading IAM could potentially return an error for second pool that server is not initialized. We need to handle this, let the pool come online and retry transparently - this PR fixes that. --- cmd/iam-store.go | 18 ++++-------------- cmd/iam.go | 3 ++- cmd/server-main.go | 28 +++++++++++++++++++++++----- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/cmd/iam-store.go b/cmd/iam-store.go index 596d384e8..07ab8ec86 100644 --- a/cmd/iam-store.go +++ b/cmd/iam-store.go @@ -112,14 +112,9 @@ func saveIAMFormat(ctx context.Context, store IAMStorageAPI) error { bootstrapTraceMsg("Load IAM format file") var iamFmt iamFormat path := getIAMFormatFilePath() - if err := store.loadIAMConfig(ctx, &iamFmt, path); err != nil { - switch err { - case errConfigNotFound: - // Need to migrate to V1. - default: - // if IAM format - return err - } + if err := store.loadIAMConfig(ctx, &iamFmt, path); err != nil && !errors.Is(err, errConfigNotFound) { + // if IAM format + return err } if iamFmt.Version >= iamFormatVersion1 { @@ -129,12 +124,7 @@ func saveIAMFormat(ctx context.Context, store IAMStorageAPI) error { bootstrapTraceMsg("Write IAM format file") // Save iam format to version 1. - if err := store.saveIAMConfig(ctx, newIAMFormatVersion1(), path); err != nil { - logger.LogIf(ctx, err) - return err - } - - return nil + return store.saveIAMConfig(ctx, newIAMFormatVersion1(), path) } func getGroupInfoPath(group string) string { diff --git a/cmd/iam.go b/cmd/iam.go index 21ae7b57f..5eba439af 100644 --- a/cmd/iam.go +++ b/cmd/iam.go @@ -293,6 +293,7 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer, etcdClient *etc if err := saveIAMFormat(retryCtx, sys.store); err != nil { if configRetriableErrors(err) { logger.Info("Waiting for all MinIO IAM sub-system to be initialized.. possible cause (%v)", err) + time.Sleep(time.Duration(r.Float64() * float64(time.Second))) continue } logger.LogIf(ctx, fmt.Errorf("IAM sub-system is partially initialized, unable to write the IAM format: %w", err)) @@ -307,7 +308,7 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer, etcdClient *etc if err := sys.Load(retryCtx, true); err != nil { if configRetriableErrors(err) { logger.Info("Waiting for all MinIO IAM sub-system to be initialized.. possible cause (%v)", err) - time.Sleep(time.Duration(r.Float64() * float64(5*time.Second))) + time.Sleep(time.Duration(r.Float64() * float64(time.Second))) continue } if err != nil { diff --git a/cmd/server-main.go b/cmd/server-main.go index 124ebfb6a..ea0ec28cd 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -372,6 +372,12 @@ func initAllSubsystems(ctx context.Context) { } func configRetriableErrors(err error) bool { + if err == nil { + return false + } + + notInitialized := err.Error() == "Server not initialized, please try again" + // Initializing sub-systems needs a retry mechanism for // the following reasons: // - Read quorum is lost just after the initialization @@ -392,7 +398,8 @@ func configRetriableErrors(err error) bool { errors.As(err, &wquorum) || isErrObjectNotFound(err) || isErrBucketNotFound(err) || - errors.Is(err, os.ErrDeadlineExceeded) + errors.Is(err, os.ErrDeadlineExceeded) || + notInitialized } func bootstrapTraceMsg(msg string) { @@ -813,10 +820,12 @@ func serverMain(ctx *cli.Context) { }() go func() { + r := rand.New(rand.NewSource(time.Now().UnixNano())) + if !globalDisableFreezeOnBoot { defer bootstrapTrace("unfreezeServices", unfreezeServices) t := time.AfterFunc(5*time.Minute, func() { - logger.Info(color.Yellow("WARNING: Taking more time to initialize the config subsystem. Please set '_MINIO_DISABLE_API_FREEZE_ON_BOOT=true' to not freeze the APIs")) + logger.Info(color.Yellow("WARNING: Initializing the config subsystem is taking longer than 5 minutes. Please set '_MINIO_DISABLE_API_FREEZE_ON_BOOT=true' to not freeze the APIs")) }) defer t.Stop() } @@ -864,9 +873,18 @@ func serverMain(ctx *cli.Context) { var buckets []BucketInfo // List buckets to initialize bucket metadata sub-sys. bootstrapTrace("listBuckets", func() { - buckets, err = newObject.ListBuckets(GlobalContext, BucketOptions{}) - if err != nil { - logger.LogIf(GlobalContext, fmt.Errorf("Unable to list buckets to initialize bucket metadata sub-system: %w", err)) + for { + buckets, err = newObject.ListBuckets(GlobalContext, BucketOptions{}) + if err != nil { + if configRetriableErrors(err) { + logger.Info("Waiting for list buckets to succeed to initialize buckets.. possible cause (%v)", err) + time.Sleep(time.Duration(r.Float64() * float64(time.Second))) + continue + } + logger.LogIf(GlobalContext, fmt.Errorf("Unable to list buckets to initialize bucket metadata sub-system: %w", err)) + } + + break } })