mirror of
https://github.com/minio/minio.git
synced 2024-12-25 06:35:56 -05:00
fix: allow server not initialized error to be retried (#18300)
Since relaxing quorum the error across pools for ListBuckets(), GetBucketInfo() we hit a situation where loading IAM could potentially return an error for second pool that server is not initialized. We need to handle this, let the pool come online and retry transparently - this PR fixes that.
This commit is contained in:
parent
bbfea29c2b
commit
fd37418da2
@ -112,14 +112,9 @@ func saveIAMFormat(ctx context.Context, store IAMStorageAPI) error {
|
|||||||
bootstrapTraceMsg("Load IAM format file")
|
bootstrapTraceMsg("Load IAM format file")
|
||||||
var iamFmt iamFormat
|
var iamFmt iamFormat
|
||||||
path := getIAMFormatFilePath()
|
path := getIAMFormatFilePath()
|
||||||
if err := store.loadIAMConfig(ctx, &iamFmt, path); err != nil {
|
if err := store.loadIAMConfig(ctx, &iamFmt, path); err != nil && !errors.Is(err, errConfigNotFound) {
|
||||||
switch err {
|
// if IAM format
|
||||||
case errConfigNotFound:
|
return err
|
||||||
// Need to migrate to V1.
|
|
||||||
default:
|
|
||||||
// if IAM format
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if iamFmt.Version >= iamFormatVersion1 {
|
if iamFmt.Version >= iamFormatVersion1 {
|
||||||
@ -129,12 +124,7 @@ func saveIAMFormat(ctx context.Context, store IAMStorageAPI) error {
|
|||||||
|
|
||||||
bootstrapTraceMsg("Write IAM format file")
|
bootstrapTraceMsg("Write IAM format file")
|
||||||
// Save iam format to version 1.
|
// Save iam format to version 1.
|
||||||
if err := store.saveIAMConfig(ctx, newIAMFormatVersion1(), path); err != nil {
|
return store.saveIAMConfig(ctx, newIAMFormatVersion1(), path)
|
||||||
logger.LogIf(ctx, err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func getGroupInfoPath(group string) string {
|
func getGroupInfoPath(group string) string {
|
||||||
|
@ -293,6 +293,7 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer, etcdClient *etc
|
|||||||
if err := saveIAMFormat(retryCtx, sys.store); err != nil {
|
if err := saveIAMFormat(retryCtx, sys.store); err != nil {
|
||||||
if configRetriableErrors(err) {
|
if configRetriableErrors(err) {
|
||||||
logger.Info("Waiting for all MinIO IAM sub-system to be initialized.. possible cause (%v)", err)
|
logger.Info("Waiting for all MinIO IAM sub-system to be initialized.. possible cause (%v)", err)
|
||||||
|
time.Sleep(time.Duration(r.Float64() * float64(time.Second)))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
logger.LogIf(ctx, fmt.Errorf("IAM sub-system is partially initialized, unable to write the IAM format: %w", err))
|
logger.LogIf(ctx, fmt.Errorf("IAM sub-system is partially initialized, unable to write the IAM format: %w", err))
|
||||||
@ -307,7 +308,7 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer, etcdClient *etc
|
|||||||
if err := sys.Load(retryCtx, true); err != nil {
|
if err := sys.Load(retryCtx, true); err != nil {
|
||||||
if configRetriableErrors(err) {
|
if configRetriableErrors(err) {
|
||||||
logger.Info("Waiting for all MinIO IAM sub-system to be initialized.. possible cause (%v)", err)
|
logger.Info("Waiting for all MinIO IAM sub-system to be initialized.. possible cause (%v)", err)
|
||||||
time.Sleep(time.Duration(r.Float64() * float64(5*time.Second)))
|
time.Sleep(time.Duration(r.Float64() * float64(time.Second)))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -372,6 +372,12 @@ func initAllSubsystems(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func configRetriableErrors(err error) bool {
|
func configRetriableErrors(err error) bool {
|
||||||
|
if err == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
notInitialized := err.Error() == "Server not initialized, please try again"
|
||||||
|
|
||||||
// Initializing sub-systems needs a retry mechanism for
|
// Initializing sub-systems needs a retry mechanism for
|
||||||
// the following reasons:
|
// the following reasons:
|
||||||
// - Read quorum is lost just after the initialization
|
// - Read quorum is lost just after the initialization
|
||||||
@ -392,7 +398,8 @@ func configRetriableErrors(err error) bool {
|
|||||||
errors.As(err, &wquorum) ||
|
errors.As(err, &wquorum) ||
|
||||||
isErrObjectNotFound(err) ||
|
isErrObjectNotFound(err) ||
|
||||||
isErrBucketNotFound(err) ||
|
isErrBucketNotFound(err) ||
|
||||||
errors.Is(err, os.ErrDeadlineExceeded)
|
errors.Is(err, os.ErrDeadlineExceeded) ||
|
||||||
|
notInitialized
|
||||||
}
|
}
|
||||||
|
|
||||||
func bootstrapTraceMsg(msg string) {
|
func bootstrapTraceMsg(msg string) {
|
||||||
@ -813,10 +820,12 @@ func serverMain(ctx *cli.Context) {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
|
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||||
|
|
||||||
if !globalDisableFreezeOnBoot {
|
if !globalDisableFreezeOnBoot {
|
||||||
defer bootstrapTrace("unfreezeServices", unfreezeServices)
|
defer bootstrapTrace("unfreezeServices", unfreezeServices)
|
||||||
t := time.AfterFunc(5*time.Minute, func() {
|
t := time.AfterFunc(5*time.Minute, func() {
|
||||||
logger.Info(color.Yellow("WARNING: Taking more time to initialize the config subsystem. Please set '_MINIO_DISABLE_API_FREEZE_ON_BOOT=true' to not freeze the APIs"))
|
logger.Info(color.Yellow("WARNING: Initializing the config subsystem is taking longer than 5 minutes. Please set '_MINIO_DISABLE_API_FREEZE_ON_BOOT=true' to not freeze the APIs"))
|
||||||
})
|
})
|
||||||
defer t.Stop()
|
defer t.Stop()
|
||||||
}
|
}
|
||||||
@ -864,9 +873,18 @@ func serverMain(ctx *cli.Context) {
|
|||||||
var buckets []BucketInfo
|
var buckets []BucketInfo
|
||||||
// List buckets to initialize bucket metadata sub-sys.
|
// List buckets to initialize bucket metadata sub-sys.
|
||||||
bootstrapTrace("listBuckets", func() {
|
bootstrapTrace("listBuckets", func() {
|
||||||
buckets, err = newObject.ListBuckets(GlobalContext, BucketOptions{})
|
for {
|
||||||
if err != nil {
|
buckets, err = newObject.ListBuckets(GlobalContext, BucketOptions{})
|
||||||
logger.LogIf(GlobalContext, fmt.Errorf("Unable to list buckets to initialize bucket metadata sub-system: %w", err))
|
if err != nil {
|
||||||
|
if configRetriableErrors(err) {
|
||||||
|
logger.Info("Waiting for list buckets to succeed to initialize buckets.. possible cause (%v)", err)
|
||||||
|
time.Sleep(time.Duration(r.Float64() * float64(time.Second)))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
logger.LogIf(GlobalContext, fmt.Errorf("Unable to list buckets to initialize bucket metadata sub-system: %w", err))
|
||||||
|
}
|
||||||
|
|
||||||
|
break
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user