mirror of
https://github.com/minio/minio.git
synced 2024-12-24 22:25:54 -05:00
retry and resume decom operation upon retriable failures (#15244)
it is possible in a k8s-like system reading pool.bin might not have quorum during startup, however, add a way to retry after this failure.
This commit is contained in:
parent
c1901f4e12
commit
5802df4365
@ -22,6 +22,7 @@ import (
|
|||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
"net/http"
|
"net/http"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
@ -523,14 +524,26 @@ func (z *erasureServerPools) Init(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
if globalEndpoints[idx].Endpoints[0].IsLocal {
|
if globalEndpoints[idx].Endpoints[0].IsLocal {
|
||||||
go func(pool PoolStatus) {
|
go func(pool PoolStatus) {
|
||||||
switch err := z.Decommission(ctx, pool.ID); err {
|
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||||
case nil:
|
for {
|
||||||
// we already started decommission
|
if err := z.Decommission(ctx, pool.ID); err != nil {
|
||||||
case errDecommissionAlreadyRunning:
|
switch err {
|
||||||
// A previous decommission running found restart it.
|
// we already started decommission
|
||||||
z.doDecommissionInRoutine(ctx, idx)
|
case errDecommissionAlreadyRunning:
|
||||||
default:
|
// A previous decommission running found restart it.
|
||||||
logger.LogIf(ctx, fmt.Errorf("Unable to resume decommission of pool %v: %w", pool, err))
|
z.doDecommissionInRoutine(ctx, idx)
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
if configRetriableErrors(err) {
|
||||||
|
logger.LogIf(ctx, fmt.Errorf("Unable to resume decommission of pool %v: %w: retrying..", pool, err))
|
||||||
|
time.Sleep(time.Second + time.Duration(r.Float64()*float64(5*time.Second)))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
logger.LogIf(ctx, fmt.Errorf("Unable to resume decommission of pool %v: %w", pool, err))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}(pool)
|
}(pool)
|
||||||
}
|
}
|
||||||
@ -984,7 +997,9 @@ func (z *erasureServerPools) DecommissionCancel(ctx context.Context, idx int) (e
|
|||||||
defer z.poolMetaMutex.Unlock()
|
defer z.poolMetaMutex.Unlock()
|
||||||
|
|
||||||
if z.poolMeta.DecommissionCancel(idx) {
|
if z.poolMeta.DecommissionCancel(idx) {
|
||||||
defer z.decommissionCancelers[idx]() // cancel any active thread.
|
if fn := z.decommissionCancelers[idx]; fn != nil {
|
||||||
|
defer fn() // cancel any active thread.
|
||||||
|
}
|
||||||
if err = z.poolMeta.save(ctx, z.serverPools); err != nil {
|
if err = z.poolMeta.save(ctx, z.serverPools); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -1006,7 +1021,9 @@ func (z *erasureServerPools) DecommissionFailed(ctx context.Context, idx int) (e
|
|||||||
defer z.poolMetaMutex.Unlock()
|
defer z.poolMetaMutex.Unlock()
|
||||||
|
|
||||||
if z.poolMeta.DecommissionFailed(idx) {
|
if z.poolMeta.DecommissionFailed(idx) {
|
||||||
defer z.decommissionCancelers[idx]() // cancel any active thread.
|
if fn := z.decommissionCancelers[idx]; fn != nil {
|
||||||
|
defer fn() // cancel any active thread.
|
||||||
|
}
|
||||||
if err = z.poolMeta.save(ctx, z.serverPools); err != nil {
|
if err = z.poolMeta.save(ctx, z.serverPools); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -1028,7 +1045,9 @@ func (z *erasureServerPools) CompleteDecommission(ctx context.Context, idx int)
|
|||||||
defer z.poolMetaMutex.Unlock()
|
defer z.poolMetaMutex.Unlock()
|
||||||
|
|
||||||
if z.poolMeta.DecommissionComplete(idx) {
|
if z.poolMeta.DecommissionComplete(idx) {
|
||||||
defer z.decommissionCancelers[idx]() // cancel any active thread.
|
if fn := z.decommissionCancelers[idx]; fn != nil {
|
||||||
|
defer fn() // cancel any active thread.
|
||||||
|
}
|
||||||
if err = z.poolMeta.save(ctx, z.serverPools); err != nil {
|
if err = z.poolMeta.save(ctx, z.serverPools); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user