mirror of
https://github.com/minio/minio.git
synced 2025-01-11 15:03:22 -05:00
Add dynamic reconfiguration of number of transition workers (#12926)
This commit is contained in:
parent
9e88941515
commit
65b6f4aa31
@ -24,7 +24,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"runtime"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@ -115,9 +114,14 @@ func initBackgroundExpiry(ctx context.Context, objectAPI ObjectLayer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type transitionState struct {
|
type transitionState struct {
|
||||||
once sync.Once
|
once sync.Once
|
||||||
// add future metrics here
|
|
||||||
transitionCh chan ObjectInfo
|
transitionCh chan ObjectInfo
|
||||||
|
|
||||||
|
ctx context.Context
|
||||||
|
objAPI ObjectLayer
|
||||||
|
mu sync.Mutex
|
||||||
|
numWorkers int
|
||||||
|
killCh chan struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *transitionState) queueTransitionTask(oi ObjectInfo) {
|
func (t *transitionState) queueTransitionTask(oi ObjectInfo) {
|
||||||
@ -132,50 +136,59 @@ func (t *transitionState) queueTransitionTask(oi ObjectInfo) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
globalTransitionState *transitionState
|
globalTransitionState *transitionState
|
||||||
globalTransitionConcurrent = runtime.GOMAXPROCS(0) / 2
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func newTransitionState() *transitionState {
|
func newTransitionState(ctx context.Context, objAPI ObjectLayer) *transitionState {
|
||||||
// fix minimum concurrent transition to 1 for single CPU setup
|
|
||||||
if globalTransitionConcurrent == 0 {
|
|
||||||
globalTransitionConcurrent = 1
|
|
||||||
}
|
|
||||||
return &transitionState{
|
return &transitionState{
|
||||||
transitionCh: make(chan ObjectInfo, 10000),
|
transitionCh: make(chan ObjectInfo, 10000),
|
||||||
|
ctx: ctx,
|
||||||
|
objAPI: objAPI,
|
||||||
|
killCh: make(chan struct{}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// addWorker creates a new worker to process tasks
|
// worker waits for transition tasks
|
||||||
func (t *transitionState) addWorker(ctx context.Context, objectAPI ObjectLayer) {
|
func (t *transitionState) worker(ctx context.Context, objectAPI ObjectLayer) {
|
||||||
// Add a new worker.
|
for {
|
||||||
go func() {
|
select {
|
||||||
for {
|
case <-t.killCh:
|
||||||
select {
|
return
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case oi, ok := <-t.transitionCh:
|
||||||
|
if !ok {
|
||||||
return
|
return
|
||||||
case oi, ok := <-t.transitionCh:
|
}
|
||||||
if !ok {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := transitionObject(ctx, objectAPI, oi); err != nil {
|
if err := transitionObject(ctx, objectAPI, oi); err != nil {
|
||||||
logger.LogIf(ctx, fmt.Errorf("Transition failed for %s/%s version:%s with %w", oi.Bucket, oi.Name, oi.VersionID, err))
|
logger.LogIf(ctx, fmt.Errorf("Transition failed for %s/%s version:%s with %w", oi.Bucket, oi.Name, oi.VersionID, err))
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateWorkers at the end of this function leaves n goroutines waiting for
|
||||||
|
// transition tasks
|
||||||
|
func (t *transitionState) UpdateWorkers(n int) {
|
||||||
|
t.mu.Lock()
|
||||||
|
defer t.mu.Unlock()
|
||||||
|
|
||||||
|
for t.numWorkers < n {
|
||||||
|
go t.worker(t.ctx, t.objAPI)
|
||||||
|
t.numWorkers++
|
||||||
|
}
|
||||||
|
|
||||||
|
for t.numWorkers > n {
|
||||||
|
go func() { t.killCh <- struct{}{} }()
|
||||||
|
t.numWorkers--
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func initBackgroundTransition(ctx context.Context, objectAPI ObjectLayer) {
|
func initBackgroundTransition(ctx context.Context, objectAPI ObjectLayer) {
|
||||||
if globalTransitionState == nil {
|
globalTransitionState = newTransitionState(ctx, objectAPI)
|
||||||
return
|
n := globalAPIConfig.getTransitionWorkers()
|
||||||
}
|
globalTransitionState.UpdateWorkers(n)
|
||||||
|
|
||||||
// Start with globalTransitionConcurrent.
|
|
||||||
for i := 0; i < globalTransitionConcurrent; i++ {
|
|
||||||
globalTransitionState.addWorker(ctx, objectAPI)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var errInvalidStorageClass = errors.New("invalid storage class")
|
var errInvalidStorageClass = errors.New("invalid storage class")
|
||||||
|
@ -92,8 +92,6 @@ func init() {
|
|||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
globalTransitionState = newTransitionState()
|
|
||||||
|
|
||||||
console.SetColor("Debug", fcolor.New())
|
console.SetColor("Debug", fcolor.New())
|
||||||
|
|
||||||
gob.Register(StorageErr(""))
|
gob.Register(StorageErr(""))
|
||||||
|
@ -39,6 +39,7 @@ type apiConfig struct {
|
|||||||
totalDriveCount int
|
totalDriveCount int
|
||||||
replicationWorkers int
|
replicationWorkers int
|
||||||
replicationFailedWorkers int
|
replicationFailedWorkers int
|
||||||
|
transitionWorkers int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *apiConfig) init(cfg api.Config, setDriveCounts []int) {
|
func (t *apiConfig) init(cfg api.Config, setDriveCounts []int) {
|
||||||
@ -87,6 +88,10 @@ func (t *apiConfig) init(cfg api.Config, setDriveCounts []int) {
|
|||||||
}
|
}
|
||||||
t.replicationFailedWorkers = cfg.ReplicationFailedWorkers
|
t.replicationFailedWorkers = cfg.ReplicationFailedWorkers
|
||||||
t.replicationWorkers = cfg.ReplicationWorkers
|
t.replicationWorkers = cfg.ReplicationWorkers
|
||||||
|
if globalTransitionState != nil && cfg.TransitionWorkers != t.transitionWorkers {
|
||||||
|
globalTransitionState.UpdateWorkers(cfg.TransitionWorkers)
|
||||||
|
}
|
||||||
|
t.transitionWorkers = cfg.TransitionWorkers
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *apiConfig) getListQuorum() int {
|
func (t *apiConfig) getListQuorum() int {
|
||||||
@ -173,3 +178,10 @@ func (t *apiConfig) getReplicationWorkers() int {
|
|||||||
|
|
||||||
return t.replicationWorkers
|
return t.replicationWorkers
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t *apiConfig) getTransitionWorkers() int {
|
||||||
|
t.mu.RLock()
|
||||||
|
defer t.mu.RUnlock()
|
||||||
|
|
||||||
|
return t.transitionWorkers
|
||||||
|
}
|
||||||
|
@ -530,7 +530,6 @@ func serverMain(ctx *cli.Context) {
|
|||||||
if globalIsErasure {
|
if globalIsErasure {
|
||||||
initAutoHeal(GlobalContext, newObject)
|
initAutoHeal(GlobalContext, newObject)
|
||||||
initHealMRF(GlobalContext, newObject)
|
initHealMRF(GlobalContext, newObject)
|
||||||
initBackgroundTransition(GlobalContext, newObject)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
initBackgroundExpiry(GlobalContext, newObject)
|
initBackgroundExpiry(GlobalContext, newObject)
|
||||||
@ -558,6 +557,7 @@ func serverMain(ctx *cli.Context) {
|
|||||||
|
|
||||||
if globalIsErasure { // to be done after config init
|
if globalIsErasure { // to be done after config init
|
||||||
initBackgroundReplication(GlobalContext, newObject)
|
initBackgroundReplication(GlobalContext, newObject)
|
||||||
|
initBackgroundTransition(GlobalContext, newObject)
|
||||||
globalTierJournal, err = initTierDeletionJournal(GlobalContext)
|
globalTierJournal, err = initTierDeletionJournal(GlobalContext)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.FatalIf(err, "Unable to initialize remote tier pending deletes journal")
|
logger.FatalIf(err, "Unable to initialize remote tier pending deletes journal")
|
||||||
|
@ -20,6 +20,7 @@ package api
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@ -38,6 +39,7 @@ const (
|
|||||||
apiListQuorum = "list_quorum"
|
apiListQuorum = "list_quorum"
|
||||||
apiReplicationWorkers = "replication_workers"
|
apiReplicationWorkers = "replication_workers"
|
||||||
apiReplicationFailedWorkers = "replication_failed_workers"
|
apiReplicationFailedWorkers = "replication_failed_workers"
|
||||||
|
apiTransitionWorkers = "transition_workers"
|
||||||
|
|
||||||
EnvAPIRequestsMax = "MINIO_API_REQUESTS_MAX"
|
EnvAPIRequestsMax = "MINIO_API_REQUESTS_MAX"
|
||||||
EnvAPIRequestsDeadline = "MINIO_API_REQUESTS_DEADLINE"
|
EnvAPIRequestsDeadline = "MINIO_API_REQUESTS_DEADLINE"
|
||||||
@ -48,6 +50,7 @@ const (
|
|||||||
EnvAPISecureCiphers = "MINIO_API_SECURE_CIPHERS"
|
EnvAPISecureCiphers = "MINIO_API_SECURE_CIPHERS"
|
||||||
EnvAPIReplicationWorkers = "MINIO_API_REPLICATION_WORKERS"
|
EnvAPIReplicationWorkers = "MINIO_API_REPLICATION_WORKERS"
|
||||||
EnvAPIReplicationFailedWorkers = "MINIO_API_REPLICATION_FAILED_WORKERS"
|
EnvAPIReplicationFailedWorkers = "MINIO_API_REPLICATION_FAILED_WORKERS"
|
||||||
|
EnvAPITransitionWorkers = "MINIO_API_TRANSITION_WORKERS"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Deprecated key and ENVs
|
// Deprecated key and ENVs
|
||||||
@ -91,6 +94,10 @@ var (
|
|||||||
Key: apiReplicationFailedWorkers,
|
Key: apiReplicationFailedWorkers,
|
||||||
Value: "8",
|
Value: "8",
|
||||||
},
|
},
|
||||||
|
config.KV{
|
||||||
|
Key: apiTransitionWorkers,
|
||||||
|
Value: "100",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -104,6 +111,7 @@ type Config struct {
|
|||||||
ListQuorum string `json:"list_quorum"`
|
ListQuorum string `json:"list_quorum"`
|
||||||
ReplicationWorkers int `json:"replication_workers"`
|
ReplicationWorkers int `json:"replication_workers"`
|
||||||
ReplicationFailedWorkers int `json:"replication_failed_workers"`
|
ReplicationFailedWorkers int `json:"replication_failed_workers"`
|
||||||
|
TransitionWorkers int `json:"transition_workers"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnmarshalJSON - Validate SS and RRS parity when unmarshalling JSON.
|
// UnmarshalJSON - Validate SS and RRS parity when unmarshalling JSON.
|
||||||
@ -195,6 +203,14 @@ func LookupConfig(kvs config.KVS) (cfg Config, err error) {
|
|||||||
return cfg, config.ErrInvalidReplicationWorkersValue(nil).Msg("Minimum number of replication failed workers should be 1")
|
return cfg, config.ErrInvalidReplicationWorkersValue(nil).Msg("Minimum number of replication failed workers should be 1")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
transitionWorkers, err := strconv.Atoi(env.Get(EnvAPITransitionWorkers, kvs.Get(apiTransitionWorkers)))
|
||||||
|
if err != nil {
|
||||||
|
return cfg, err
|
||||||
|
}
|
||||||
|
if transitionWorkers < runtime.GOMAXPROCS(0)/2 {
|
||||||
|
return cfg, config.ErrInvalidTransitionWorkersValue(nil)
|
||||||
|
}
|
||||||
|
|
||||||
return Config{
|
return Config{
|
||||||
RequestsMax: requestsMax,
|
RequestsMax: requestsMax,
|
||||||
RequestsDeadline: requestsDeadline,
|
RequestsDeadline: requestsDeadline,
|
||||||
@ -204,5 +220,6 @@ func LookupConfig(kvs config.KVS) (cfg Config, err error) {
|
|||||||
ListQuorum: listQuorum,
|
ListQuorum: listQuorum,
|
||||||
ReplicationWorkers: replicationWorkers,
|
ReplicationWorkers: replicationWorkers,
|
||||||
ReplicationFailedWorkers: replicationFailedWorkers,
|
ReplicationFailedWorkers: replicationFailedWorkers,
|
||||||
|
TransitionWorkers: transitionWorkers,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
@ -58,5 +58,11 @@ var (
|
|||||||
Optional: true,
|
Optional: true,
|
||||||
Type: "number",
|
Type: "number",
|
||||||
},
|
},
|
||||||
|
config.HelpKV{
|
||||||
|
Key: apiTransitionWorkers,
|
||||||
|
Description: `set the number of transition workers, defaults to 100`,
|
||||||
|
Optional: true,
|
||||||
|
Type: "number",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -289,4 +289,10 @@ Example 1:
|
|||||||
"",
|
"",
|
||||||
"MINIO_API_REPLICATION_WORKERS: should be > 0",
|
"MINIO_API_REPLICATION_WORKERS: should be > 0",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
ErrInvalidTransitionWorkersValue = newErrFn(
|
||||||
|
"Invalid value for transition workers",
|
||||||
|
"",
|
||||||
|
"MINIO_API_TRANSITION_WORKERS: should be >= GOMAXPROCS/2",
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user