mirror of
https://github.com/minio/minio.git
synced 2025-02-04 02:15:59 -05:00
Add parallel bucket healing during startup (#11457)
Replaces #11449 Does concurrent healing but limits concurrency to 50 buckets. Aborts on first error. `errgroup.Group` is extended to facilitate this in a generic way.
This commit is contained in:
parent
c7eacba41c
commit
b4ac05523b
@ -41,6 +41,7 @@ import (
|
|||||||
"github.com/minio/minio/pkg/color"
|
"github.com/minio/minio/pkg/color"
|
||||||
"github.com/minio/minio/pkg/env"
|
"github.com/minio/minio/pkg/env"
|
||||||
"github.com/minio/minio/pkg/madmin"
|
"github.com/minio/minio/pkg/madmin"
|
||||||
|
"github.com/minio/minio/pkg/sync/errgroup"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ServerFlags - server command specific flags
|
// ServerFlags - server command specific flags
|
||||||
@ -346,10 +347,22 @@ func initAllSubsystems(ctx context.Context, newObject ObjectLayer) (err error) {
|
|||||||
logger.Info(fmt.Sprintf("Verifying if %d buckets are consistent across drives...", len(buckets)))
|
logger.Info(fmt.Sprintf("Verifying if %d buckets are consistent across drives...", len(buckets)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, bucket := range buckets {
|
|
||||||
if _, err = newObject.HealBucket(ctx, bucket.Name, madmin.HealOpts{Recreate: true}); err != nil {
|
// Limit to no more than 50 concurrent buckets.
|
||||||
return fmt.Errorf("Unable to list buckets to heal: %w", err)
|
g := errgroup.WithNErrs(len(buckets)).WithConcurrency(50)
|
||||||
}
|
ctx, cancel := g.WithCancelOnError(ctx)
|
||||||
|
defer cancel()
|
||||||
|
for index := range buckets {
|
||||||
|
index := index
|
||||||
|
g.Go(func() error {
|
||||||
|
if _, berr := newObject.HealBucket(ctx, buckets[index].Name, madmin.HealOpts{Recreate: true}); berr != nil {
|
||||||
|
return fmt.Errorf("Unable to list buckets to heal: %w", berr)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}, index)
|
||||||
|
}
|
||||||
|
if err := g.WaitErr(); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,43 +17,116 @@
|
|||||||
package errgroup
|
package errgroup
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"sync"
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
)
|
)
|
||||||
|
|
||||||
// A Group is a collection of goroutines working on subtasks that are part of
|
// A Group is a collection of goroutines working on subtasks that are part of
|
||||||
// the same overall task.
|
// the same overall task.
|
||||||
//
|
//
|
||||||
// A zero Group is valid and does not cancel on error.
|
// A zero Group can be used if errors should not be tracked.
|
||||||
type Group struct {
|
type Group struct {
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
errs []error
|
bucket chan struct{}
|
||||||
|
errs []error
|
||||||
|
firstErr int64
|
||||||
|
cancel context.CancelFunc
|
||||||
|
ctxCancel <-chan struct{} // nil if no context.
|
||||||
|
ctxErr func() error
|
||||||
}
|
}
|
||||||
|
|
||||||
// WithNErrs returns a new Group with length of errs slice upto nerrs,
|
// WithNErrs returns a new Group with length of errs slice upto nerrs,
|
||||||
// upon Wait() errors are returned collected from all tasks.
|
// upon Wait() errors are returned collected from all tasks.
|
||||||
func WithNErrs(nerrs int) *Group {
|
func WithNErrs(nerrs int) *Group {
|
||||||
return &Group{errs: make([]error, nerrs)}
|
return &Group{errs: make([]error, nerrs), firstErr: -1}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait blocks until all function calls from the Go method have returned, then
|
// Wait blocks until all function calls from the Go method have returned, then
|
||||||
// returns the slice of errors from all function calls.
|
// returns the slice of errors from all function calls.
|
||||||
func (g *Group) Wait() []error {
|
func (g *Group) Wait() []error {
|
||||||
g.wg.Wait()
|
g.wg.Wait()
|
||||||
|
if g.cancel != nil {
|
||||||
|
g.cancel()
|
||||||
|
}
|
||||||
return g.errs
|
return g.errs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WaitErr blocks until all function calls from the Go method have returned, then
|
||||||
|
// returns the first error returned.
|
||||||
|
func (g *Group) WaitErr() error {
|
||||||
|
g.wg.Wait()
|
||||||
|
if g.cancel != nil {
|
||||||
|
g.cancel()
|
||||||
|
}
|
||||||
|
if g.firstErr >= 0 && len(g.errs) > int(g.firstErr) {
|
||||||
|
// len(g.errs) > int(g.firstErr) is for then used uninitialized.
|
||||||
|
return g.errs[g.firstErr]
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithConcurrency allows to limit the concurrency of the group.
|
||||||
|
// This must be called before starting any async processes.
|
||||||
|
// There is no order to which functions are allowed to run.
|
||||||
|
// If n <= 0 no concurrency limits are enforced.
|
||||||
|
// g is modified and returned as well.
|
||||||
|
func (g *Group) WithConcurrency(n int) *Group {
|
||||||
|
if n <= 0 {
|
||||||
|
g.bucket = nil
|
||||||
|
return g
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill bucket with tokens
|
||||||
|
g.bucket = make(chan struct{}, n)
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
g.bucket <- struct{}{}
|
||||||
|
}
|
||||||
|
return g
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithCancelOnError will return a context that is canceled
|
||||||
|
// as soon as an error occurs.
|
||||||
|
// The returned CancelFunc must always be called similar to context.WithCancel.
|
||||||
|
// If the supplied context is canceled any goroutines waiting for execution are also canceled.
|
||||||
|
func (g *Group) WithCancelOnError(ctx context.Context) (context.Context, context.CancelFunc) {
|
||||||
|
ctx, g.cancel = context.WithCancel(ctx)
|
||||||
|
g.ctxCancel = ctx.Done()
|
||||||
|
g.ctxErr = ctx.Err
|
||||||
|
return ctx, g.cancel
|
||||||
|
}
|
||||||
|
|
||||||
// Go calls the given function in a new goroutine.
|
// Go calls the given function in a new goroutine.
|
||||||
//
|
//
|
||||||
// The first call to return a non-nil error will be
|
// The errors will be collected in errs slice and returned by Wait().
|
||||||
// collected in errs slice and returned by Wait().
|
|
||||||
func (g *Group) Go(f func() error, index int) {
|
func (g *Group) Go(f func() error, index int) {
|
||||||
g.wg.Add(1)
|
g.wg.Add(1)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
defer g.wg.Done()
|
defer g.wg.Done()
|
||||||
|
if g.bucket != nil {
|
||||||
|
// Wait for token
|
||||||
|
select {
|
||||||
|
case <-g.bucket:
|
||||||
|
defer func() {
|
||||||
|
// Put back token..
|
||||||
|
g.bucket <- struct{}{}
|
||||||
|
}()
|
||||||
|
case <-g.ctxCancel:
|
||||||
|
if len(g.errs) > index {
|
||||||
|
atomic.CompareAndSwapInt64(&g.firstErr, -1, int64(index))
|
||||||
|
g.errs[index] = g.ctxErr()
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
if err := f(); err != nil {
|
if err := f(); err != nil {
|
||||||
g.errs[index] = err
|
if len(g.errs) > index {
|
||||||
|
atomic.CompareAndSwapInt64(&g.firstErr, -1, int64(index))
|
||||||
|
g.errs[index] = err
|
||||||
|
}
|
||||||
|
if g.cancel != nil {
|
||||||
|
g.cancel()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user