mirror of
https://github.com/minio/minio.git
synced 2025-02-03 18:06:00 -05:00
Add parallel bucket healing during startup (#11457)
Replaces #11449 Does concurrent healing but limits concurrency to 50 buckets. Aborts on first error. `errgroup.Group` is extended to facilitate this in a generic way.
This commit is contained in:
parent
c7eacba41c
commit
b4ac05523b
@ -41,6 +41,7 @@ import (
|
||||
"github.com/minio/minio/pkg/color"
|
||||
"github.com/minio/minio/pkg/env"
|
||||
"github.com/minio/minio/pkg/madmin"
|
||||
"github.com/minio/minio/pkg/sync/errgroup"
|
||||
)
|
||||
|
||||
// ServerFlags - server command specific flags
|
||||
@ -346,10 +347,22 @@ func initAllSubsystems(ctx context.Context, newObject ObjectLayer) (err error) {
|
||||
logger.Info(fmt.Sprintf("Verifying if %d buckets are consistent across drives...", len(buckets)))
|
||||
}
|
||||
}
|
||||
for _, bucket := range buckets {
|
||||
if _, err = newObject.HealBucket(ctx, bucket.Name, madmin.HealOpts{Recreate: true}); err != nil {
|
||||
return fmt.Errorf("Unable to list buckets to heal: %w", err)
|
||||
|
||||
// Limit to no more than 50 concurrent buckets.
|
||||
g := errgroup.WithNErrs(len(buckets)).WithConcurrency(50)
|
||||
ctx, cancel := g.WithCancelOnError(ctx)
|
||||
defer cancel()
|
||||
for index := range buckets {
|
||||
index := index
|
||||
g.Go(func() error {
|
||||
if _, berr := newObject.HealBucket(ctx, buckets[index].Name, madmin.HealOpts{Recreate: true}); berr != nil {
|
||||
return fmt.Errorf("Unable to list buckets to heal: %w", berr)
|
||||
}
|
||||
return nil
|
||||
}, index)
|
||||
}
|
||||
if err := g.WaitErr(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,43 +17,116 @@
|
||||
package errgroup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// A Group is a collection of goroutines working on subtasks that are part of
|
||||
// the same overall task.
|
||||
//
|
||||
// A zero Group is valid and does not cancel on error.
|
||||
// A zero Group can be used if errors should not be tracked.
|
||||
type Group struct {
|
||||
wg sync.WaitGroup
|
||||
bucket chan struct{}
|
||||
errs []error
|
||||
firstErr int64
|
||||
cancel context.CancelFunc
|
||||
ctxCancel <-chan struct{} // nil if no context.
|
||||
ctxErr func() error
|
||||
}
|
||||
|
||||
// WithNErrs returns a new Group with length of errs slice upto nerrs,
|
||||
// upon Wait() errors are returned collected from all tasks.
|
||||
func WithNErrs(nerrs int) *Group {
|
||||
return &Group{errs: make([]error, nerrs)}
|
||||
return &Group{errs: make([]error, nerrs), firstErr: -1}
|
||||
}
|
||||
|
||||
// Wait blocks until all function calls from the Go method have returned, then
|
||||
// returns the slice of errors from all function calls.
|
||||
func (g *Group) Wait() []error {
|
||||
g.wg.Wait()
|
||||
if g.cancel != nil {
|
||||
g.cancel()
|
||||
}
|
||||
return g.errs
|
||||
}
|
||||
|
||||
// WaitErr blocks until all function calls from the Go method have returned, then
|
||||
// returns the first error returned.
|
||||
func (g *Group) WaitErr() error {
|
||||
g.wg.Wait()
|
||||
if g.cancel != nil {
|
||||
g.cancel()
|
||||
}
|
||||
if g.firstErr >= 0 && len(g.errs) > int(g.firstErr) {
|
||||
// len(g.errs) > int(g.firstErr) is for then used uninitialized.
|
||||
return g.errs[g.firstErr]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// WithConcurrency allows to limit the concurrency of the group.
|
||||
// This must be called before starting any async processes.
|
||||
// There is no order to which functions are allowed to run.
|
||||
// If n <= 0 no concurrency limits are enforced.
|
||||
// g is modified and returned as well.
|
||||
func (g *Group) WithConcurrency(n int) *Group {
|
||||
if n <= 0 {
|
||||
g.bucket = nil
|
||||
return g
|
||||
}
|
||||
|
||||
// Fill bucket with tokens
|
||||
g.bucket = make(chan struct{}, n)
|
||||
for i := 0; i < n; i++ {
|
||||
g.bucket <- struct{}{}
|
||||
}
|
||||
return g
|
||||
}
|
||||
|
||||
// WithCancelOnError will return a context that is canceled
|
||||
// as soon as an error occurs.
|
||||
// The returned CancelFunc must always be called similar to context.WithCancel.
|
||||
// If the supplied context is canceled any goroutines waiting for execution are also canceled.
|
||||
func (g *Group) WithCancelOnError(ctx context.Context) (context.Context, context.CancelFunc) {
|
||||
ctx, g.cancel = context.WithCancel(ctx)
|
||||
g.ctxCancel = ctx.Done()
|
||||
g.ctxErr = ctx.Err
|
||||
return ctx, g.cancel
|
||||
}
|
||||
|
||||
// Go calls the given function in a new goroutine.
|
||||
//
|
||||
// The first call to return a non-nil error will be
|
||||
// collected in errs slice and returned by Wait().
|
||||
// The errors will be collected in errs slice and returned by Wait().
|
||||
func (g *Group) Go(f func() error, index int) {
|
||||
g.wg.Add(1)
|
||||
|
||||
go func() {
|
||||
defer g.wg.Done()
|
||||
|
||||
if g.bucket != nil {
|
||||
// Wait for token
|
||||
select {
|
||||
case <-g.bucket:
|
||||
defer func() {
|
||||
// Put back token..
|
||||
g.bucket <- struct{}{}
|
||||
}()
|
||||
case <-g.ctxCancel:
|
||||
if len(g.errs) > index {
|
||||
atomic.CompareAndSwapInt64(&g.firstErr, -1, int64(index))
|
||||
g.errs[index] = g.ctxErr()
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
if err := f(); err != nil {
|
||||
if len(g.errs) > index {
|
||||
atomic.CompareAndSwapInt64(&g.firstErr, -1, int64(index))
|
||||
g.errs[index] = err
|
||||
}
|
||||
if g.cancel != nil {
|
||||
g.cancel()
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user