mirror of
https://github.com/minio/minio.git
synced 2025-11-07 04:42:56 -05:00
add configuration to limit replication workers (#18601)
This commit is contained in:
@@ -1697,12 +1697,13 @@ type ReplicationPool struct {
|
||||
activeWorkers int32
|
||||
activeMRFWorkers int32
|
||||
|
||||
objLayer ObjectLayer
|
||||
ctx context.Context
|
||||
priority string
|
||||
mu sync.RWMutex
|
||||
mrfMU sync.Mutex
|
||||
resyncer *replicationResyncer
|
||||
objLayer ObjectLayer
|
||||
ctx context.Context
|
||||
priority string
|
||||
maxWorkers int
|
||||
mu sync.RWMutex
|
||||
mrfMU sync.Mutex
|
||||
resyncer *replicationResyncer
|
||||
|
||||
// workers:
|
||||
workers []chan ReplicationWorkerOperation
|
||||
@@ -1748,9 +1749,13 @@ const (
|
||||
func NewReplicationPool(ctx context.Context, o ObjectLayer, opts replicationPoolOpts) *ReplicationPool {
|
||||
var workers, failedWorkers int
|
||||
priority := "auto"
|
||||
maxWorkers := WorkerMaxLimit
|
||||
if opts.Priority != "" {
|
||||
priority = opts.Priority
|
||||
}
|
||||
if opts.MaxWorkers > 0 {
|
||||
maxWorkers = opts.MaxWorkers
|
||||
}
|
||||
switch priority {
|
||||
case "fast":
|
||||
workers = WorkerMaxLimit
|
||||
@@ -1762,7 +1767,13 @@ func NewReplicationPool(ctx context.Context, o ObjectLayer, opts replicationPool
|
||||
workers = WorkerAutoDefault
|
||||
failedWorkers = MRFWorkerAutoDefault
|
||||
}
|
||||
if maxWorkers > 0 && workers > maxWorkers {
|
||||
workers = maxWorkers
|
||||
}
|
||||
|
||||
if maxWorkers > 0 && failedWorkers > maxWorkers {
|
||||
failedWorkers = maxWorkers
|
||||
}
|
||||
pool := &ReplicationPool{
|
||||
workers: make([]chan ReplicationWorkerOperation, 0, workers),
|
||||
lrgworkers: make([]chan ReplicationWorkerOperation, 0, LargeWorkerCount),
|
||||
@@ -1774,6 +1785,7 @@ func NewReplicationPool(ctx context.Context, o ObjectLayer, opts replicationPool
|
||||
ctx: ctx,
|
||||
objLayer: o,
|
||||
priority: priority,
|
||||
maxWorkers: maxWorkers,
|
||||
}
|
||||
|
||||
pool.AddLargeWorkers()
|
||||
@@ -1929,7 +1941,7 @@ func (p *ReplicationPool) ResizeWorkers(n, checkOld int) {
|
||||
}
|
||||
|
||||
// ResizeWorkerPriority sets replication failed workers pool size
|
||||
func (p *ReplicationPool) ResizeWorkerPriority(pri string) {
|
||||
func (p *ReplicationPool) ResizeWorkerPriority(pri string, maxWorkers int) {
|
||||
var workers, mrfWorkers int
|
||||
p.mu.Lock()
|
||||
switch pri {
|
||||
@@ -1949,7 +1961,15 @@ func (p *ReplicationPool) ResizeWorkerPriority(pri string) {
|
||||
mrfWorkers = int(math.Min(float64(p.mrfWorkerSize+1), MRFWorkerAutoDefault))
|
||||
}
|
||||
}
|
||||
if maxWorkers > 0 && workers > maxWorkers {
|
||||
workers = maxWorkers
|
||||
}
|
||||
|
||||
if maxWorkers > 0 && mrfWorkers > maxWorkers {
|
||||
mrfWorkers = maxWorkers
|
||||
}
|
||||
p.priority = pri
|
||||
p.maxWorkers = maxWorkers
|
||||
p.mu.Unlock()
|
||||
p.ResizeWorkers(workers, 0)
|
||||
p.ResizeFailedWorkers(mrfWorkers)
|
||||
@@ -2023,6 +2043,7 @@ func (p *ReplicationPool) queueReplicaTask(ri ReplicateObjectInfo) {
|
||||
globalReplicationPool.queueMRFSave(ri.ToMRFEntry())
|
||||
p.mu.RLock()
|
||||
prio := p.priority
|
||||
maxWorkers := p.maxWorkers
|
||||
p.mu.RUnlock()
|
||||
switch prio {
|
||||
case "fast":
|
||||
@@ -2030,16 +2051,18 @@ func (p *ReplicationPool) queueReplicaTask(ri ReplicateObjectInfo) {
|
||||
case "slow":
|
||||
logger.LogOnceIf(GlobalContext, fmt.Errorf("WARNING: Unable to keep up with incoming traffic - we recommend increasing replication priority with `mc admin config set api replication_priority=auto`"), string(replicationSubsystem))
|
||||
default:
|
||||
if p.ActiveWorkers() < WorkerMaxLimit {
|
||||
maxWorkers = int(math.Min(float64(maxWorkers), WorkerMaxLimit))
|
||||
if p.ActiveWorkers() < maxWorkers {
|
||||
p.mu.RLock()
|
||||
workers := int(math.Min(float64(len(p.workers)+1), WorkerMaxLimit))
|
||||
workers := int(math.Min(float64(len(p.workers)+1), float64(maxWorkers)))
|
||||
existing := len(p.workers)
|
||||
p.mu.RUnlock()
|
||||
p.ResizeWorkers(workers, existing)
|
||||
}
|
||||
if p.ActiveMRFWorkers() < MRFWorkerMaxLimit {
|
||||
maxMRFWorkers := int(math.Min(float64(maxWorkers), MRFWorkerMaxLimit))
|
||||
if p.ActiveMRFWorkers() < maxMRFWorkers {
|
||||
p.mu.RLock()
|
||||
workers := int(math.Min(float64(p.mrfWorkerSize+1), MRFWorkerMaxLimit))
|
||||
workers := int(math.Min(float64(p.mrfWorkerSize+1), float64(maxMRFWorkers)))
|
||||
p.mu.RUnlock()
|
||||
p.ResizeFailedWorkers(workers)
|
||||
}
|
||||
@@ -2077,6 +2100,7 @@ func (p *ReplicationPool) queueReplicaDeleteTask(doi DeletedObjectReplicationInf
|
||||
globalReplicationPool.queueMRFSave(doi.ToMRFEntry())
|
||||
p.mu.RLock()
|
||||
prio := p.priority
|
||||
maxWorkers := p.maxWorkers
|
||||
p.mu.RUnlock()
|
||||
switch prio {
|
||||
case "fast":
|
||||
@@ -2084,9 +2108,10 @@ func (p *ReplicationPool) queueReplicaDeleteTask(doi DeletedObjectReplicationInf
|
||||
case "slow":
|
||||
logger.LogOnceIf(GlobalContext, fmt.Errorf("WARNING: Unable to keep up with incoming deletes - we recommend increasing replication priority with `mc admin config set api replication_priority=auto`"), string(replicationSubsystem))
|
||||
default:
|
||||
if p.ActiveWorkers() < WorkerMaxLimit {
|
||||
maxWorkers = int(math.Min(float64(maxWorkers), WorkerMaxLimit))
|
||||
if p.ActiveWorkers() < maxWorkers {
|
||||
p.mu.RLock()
|
||||
workers := int(math.Min(float64(len(p.workers)+1), WorkerMaxLimit))
|
||||
workers := int(math.Min(float64(len(p.workers)+1), float64(maxWorkers)))
|
||||
existing := len(p.workers)
|
||||
p.mu.RUnlock()
|
||||
p.ResizeWorkers(workers, existing)
|
||||
@@ -2096,13 +2121,12 @@ func (p *ReplicationPool) queueReplicaDeleteTask(doi DeletedObjectReplicationInf
|
||||
}
|
||||
|
||||
type replicationPoolOpts struct {
|
||||
Priority string
|
||||
Priority string
|
||||
MaxWorkers int
|
||||
}
|
||||
|
||||
func initBackgroundReplication(ctx context.Context, objectAPI ObjectLayer) {
|
||||
globalReplicationPool = NewReplicationPool(ctx, objectAPI, replicationPoolOpts{
|
||||
Priority: globalAPIConfig.getReplicationPriority(),
|
||||
})
|
||||
globalReplicationPool = NewReplicationPool(ctx, objectAPI, globalAPIConfig.getReplicationOpts())
|
||||
globalReplicationStats = NewReplicationStats(ctx, objectAPI)
|
||||
go globalReplicationStats.trackEWMA()
|
||||
}
|
||||
|
||||
@@ -42,9 +42,10 @@ type apiConfig struct {
|
||||
listQuorum string
|
||||
corsAllowOrigins []string
|
||||
// total drives per erasure set across pools.
|
||||
totalDriveCount int
|
||||
replicationPriority string
|
||||
transitionWorkers int
|
||||
totalDriveCount int
|
||||
replicationPriority string
|
||||
replicationMaxWorkers int
|
||||
transitionWorkers int
|
||||
|
||||
staleUploadsExpiry time.Duration
|
||||
staleUploadsCleanupInterval time.Duration
|
||||
@@ -152,10 +153,11 @@ func (t *apiConfig) init(cfg api.Config, setDriveCounts []int) {
|
||||
}
|
||||
t.listQuorum = listQuorum
|
||||
if globalReplicationPool != nil &&
|
||||
cfg.ReplicationPriority != t.replicationPriority {
|
||||
globalReplicationPool.ResizeWorkerPriority(cfg.ReplicationPriority)
|
||||
(cfg.ReplicationPriority != t.replicationPriority || cfg.ReplicationMaxWorkers != t.replicationMaxWorkers) {
|
||||
globalReplicationPool.ResizeWorkerPriority(cfg.ReplicationPriority, cfg.ReplicationMaxWorkers)
|
||||
}
|
||||
t.replicationPriority = cfg.ReplicationPriority
|
||||
t.replicationMaxWorkers = cfg.ReplicationMaxWorkers
|
||||
if globalTransitionState != nil && cfg.TransitionWorkers != t.transitionWorkers {
|
||||
globalTransitionState.UpdateWorkers(cfg.TransitionWorkers)
|
||||
}
|
||||
@@ -334,15 +336,21 @@ func maxClients(f http.HandlerFunc) http.HandlerFunc {
|
||||
}
|
||||
}
|
||||
|
||||
func (t *apiConfig) getReplicationPriority() string {
|
||||
func (t *apiConfig) getReplicationOpts() replicationPoolOpts {
|
||||
t.mu.RLock()
|
||||
defer t.mu.RUnlock()
|
||||
|
||||
if t.replicationPriority == "" {
|
||||
return "auto"
|
||||
return replicationPoolOpts{
|
||||
Priority: "auto",
|
||||
MaxWorkers: WorkerMaxLimit,
|
||||
}
|
||||
}
|
||||
|
||||
return t.replicationPriority
|
||||
return replicationPoolOpts{
|
||||
Priority: t.replicationPriority,
|
||||
MaxWorkers: t.replicationMaxWorkers,
|
||||
}
|
||||
}
|
||||
|
||||
func (t *apiConfig) getTransitionWorkers() int {
|
||||
|
||||
Reference in New Issue
Block a user