Add auto configuration of replication workers (#15636)

2025-04-28 05:38:00 -04:00 · 2022-09-24 16:20:28 -07:00 · 2022-09-24 16:20:28 -07:00 · 8ea6fb368d
commit 8ea6fb368d
parent 5fd5ddea23
4 changed files with 169 additions and 92 deletions
--- a/cmd/bucket-replication.go
+++ b/cmd/bucket-replication.go
@ -23,11 +23,13 @@ import (
 	"errors"
 	"fmt"
 	"io"
 	"math"
 	"net/http"
 	"path"
 	"reflect"
 	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
 	"github.com/dustin/go-humanize"
@ -1511,23 +1513,62 @@ type ReplicationPool struct {
 	mrfSaveCh               chan MRFReplicateEntry
 	saveStateCh             chan struct{}
-	workerSize    int
+	workerSize       int
-	mrfWorkerSize int
+	mrfWorkerSize    int
-	resyncState   replicationResyncState
+	activeWorkers    int32
-	workerWg      sync.WaitGroup
+	activeMRFWorkers int32
-	mrfWorkerWg   sync.WaitGroup
+	priority         string
-	once          sync.Once
+	resyncState      replicationResyncState
-	mu            sync.Mutex
+	workerWg         sync.WaitGroup
 	mrfWorkerWg      sync.WaitGroup
 	once             sync.Once
 	mu               sync.RWMutex
 }
 const (
 	// WorkerMaxLimit max number of workers per node for "fast" mode
 	WorkerMaxLimit = 500
 	// WorkerMinLimit min number of workers per node for "slow" mode
 	WorkerMinLimit = 50
 	// WorkerAutoDefault is default number of workers for "auto" mode
 	WorkerAutoDefault = 100
 	// MRFWorkerMaxLimit max number of mrf workers per node for "fast" mode
 	MRFWorkerMaxLimit = 8
 	// MRFWorkerMinLimit min number of mrf workers per node for "slow" mode
 	MRFWorkerMinLimit = 2
 	// MRFWorkerAutoDefault is default number of mrf workers for "auto" mode
 	MRFWorkerAutoDefault = 4
 )
 // NewReplicationPool creates a pool of replication workers of specified size
 func NewReplicationPool(ctx context.Context, o ObjectLayer, opts replicationPoolOpts) *ReplicationPool {
 	var workers, failedWorkers int
 	priority := "auto"
 	if opts.Priority != "" {
 		priority = opts.Priority
 	}
 	switch priority {
 	case "fast":
 		workers = WorkerMaxLimit
 		failedWorkers = MRFWorkerMaxLimit
 	case "slow":
 		workers = WorkerMinLimit
 		failedWorkers = MRFWorkerMinLimit
 	default:
 		workers = WorkerAutoDefault
 		failedWorkers = MRFWorkerAutoDefault
 	}
 	pool := &ReplicationPool{
 		replicaCh:               make(chan ReplicateObjectInfo, 100000),
 		replicaDeleteCh:         make(chan DeletedObjectReplicationInfo, 100000),
 		mrfReplicaCh:            make(chan ReplicateObjectInfo, 100000),
-		workerKillCh:            make(chan struct{}, opts.Workers),
+		workerKillCh:            make(chan struct{}, workers),
-		mrfWorkerKillCh:         make(chan struct{}, opts.FailedWorkers),
+		mrfWorkerKillCh:         make(chan struct{}, failedWorkers),
 		existingReplicaCh:       make(chan ReplicateObjectInfo, 100000),
 		existingReplicaDeleteCh: make(chan DeletedObjectReplicationInfo, 100000),
 		resyncState:             replicationResyncState{statusMap: make(map[string]BucketReplicationResyncStatus)},
@ -1535,10 +1576,11 @@ func NewReplicationPool(ctx context.Context, o ObjectLayer, opts replicationPool
 		saveStateCh:             make(chan struct{}, 1),
 		ctx:                     ctx,
 		objLayer:                o,
 		priority:                priority,
 	}
-	pool.ResizeWorkers(opts.Workers)
+	pool.ResizeWorkers(workers)
-	pool.ResizeFailedWorkers(opts.FailedWorkers)
+	pool.ResizeFailedWorkers(failedWorkers)
 	go pool.AddExistingObjectReplicateWorker()
 	go pool.updateResyncStatus(ctx, o)
 	go pool.processMRF()
@ -1559,7 +1601,9 @@ func (p *ReplicationPool) AddMRFWorker() {
 			if !ok {
 				return
 			}
 			atomic.AddInt32(&p.activeMRFWorkers, 1)
 			replicateObject(p.ctx, oi, p.objLayer)
 			atomic.AddInt32(&p.activeMRFWorkers, -1)
 		case <-p.mrfWorkerKillCh:
 			return
 		}
@ -1577,12 +1621,17 @@ func (p *ReplicationPool) AddWorker() {
 			if !ok {
 				return
 			}
 			atomic.AddInt32(&p.activeWorkers, 1)
 			replicateObject(p.ctx, oi, p.objLayer)
 			atomic.AddInt32(&p.activeWorkers, -1)
 		case doi, ok := <-p.replicaDeleteCh:
 			if !ok {
 				return
 			}
 			atomic.AddInt32(&p.activeWorkers, 1)
 			replicateDelete(p.ctx, doi, p.objLayer)
 			atomic.AddInt32(&p.activeWorkers, -1)
 		case <-p.workerKillCh:
 			return
 		}
@ -1609,6 +1658,16 @@ func (p *ReplicationPool) AddExistingObjectReplicateWorker() {
 	}
 }
 // ActiveWorkers returns the number of active workers handling replication traffic.
 func (p *ReplicationPool) ActiveWorkers() int {
 	return int(atomic.LoadInt32(&p.activeWorkers))
 }
 // ActiveMRFWorkers returns the number of active workers handling replication failures.
 func (p *ReplicationPool) ActiveMRFWorkers() int {
 	return int(atomic.LoadInt32(&p.activeMRFWorkers))
 }
 // ResizeWorkers sets replication workers pool to new size
 func (p *ReplicationPool) ResizeWorkers(n int) {
 	p.mu.Lock()
@ -1625,6 +1684,33 @@ func (p *ReplicationPool) ResizeWorkers(n int) {
 	}
 }
 // ResizeWorkerPriority sets replication failed workers pool size
 func (p *ReplicationPool) ResizeWorkerPriority(pri string) {
 	var workers, mrfWorkers int
 	p.mu.Lock()
 	switch pri {
 	case "fast":
 		workers = WorkerMaxLimit
 		mrfWorkers = MRFWorkerMaxLimit
 	case "slow":
 		workers = WorkerMinLimit
 		mrfWorkers = MRFWorkerMinLimit
 	default:
 		workers = WorkerAutoDefault
 		mrfWorkers = MRFWorkerAutoDefault
 		if p.workerSize < WorkerAutoDefault {
 			workers = int(math.Min(float64(p.workerSize+1), WorkerAutoDefault))
 		}
 		if p.mrfWorkerSize < MRFWorkerAutoDefault {
 			mrfWorkers = int(math.Min(float64(p.mrfWorkerSize+1), MRFWorkerAutoDefault))
 		}
 	}
 	p.priority = pri
 	p.mu.Unlock()
 	p.ResizeWorkers(workers)
 	p.ResizeFailedWorkers(mrfWorkers)
 }
 // ResizeFailedWorkers sets replication failed workers pool size
 func (p *ReplicationPool) ResizeFailedWorkers(n int) {
 	p.mu.Lock()
@ -1641,14 +1727,6 @@ func (p *ReplicationPool) ResizeFailedWorkers(n int) {
 	}
 }
 // suggestedWorkers recommends an increase in number of workers to meet replication load.
 func (p *ReplicationPool) suggestedWorkers(failQueue bool) int {
 	if failQueue {
 		return int(float64(p.mrfWorkerSize) * ReplicationWorkerMultiplier)
 	}
 	return int(float64(p.workerSize) * ReplicationWorkerMultiplier)
 }
 func (p *ReplicationPool) queueReplicaTask(ri ReplicateObjectInfo) {
 	if p == nil {
 		return
@ -1675,7 +1753,23 @@ func (p *ReplicationPool) queueReplicaTask(ri ReplicateObjectInfo) {
 	case ch <- ri:
 	default:
 		globalReplicationPool.queueMRFSave(ri.ToMRFEntry())
-		logger.LogOnceIf(GlobalContext, fmt.Errorf("WARNING: Unable to keep up with incoming traffic - we recommend increasing number of replicate object workers with `mc admin config set api replication_workers=%d`", p.suggestedWorkers(false)), string(replicationSubsystem))
+		p.mu.RLock()
 		switch p.priority {
 		case "fast":
 			logger.LogOnceIf(GlobalContext, fmt.Errorf("WARNING: Unable to keep up with incoming traffic"), string(replicationSubsystem))
 		case "slow":
 			logger.LogOnceIf(GlobalContext, fmt.Errorf("WARNING: Unable to keep up with incoming traffic - we recommend increasing replication priority with `mc admin config set api replication_priority=auto`"), string(replicationSubsystem))
 		default:
 			if p.ActiveWorkers() < WorkerMaxLimit {
 				workers := int(math.Min(float64(p.workerSize+1), WorkerMaxLimit))
 				p.ResizeWorkers(workers)
 			}
 			if p.ActiveMRFWorkers() < MRFWorkerMaxLimit {
 				workers := int(math.Min(float64(p.mrfWorkerSize+1), MRFWorkerMaxLimit))
 				p.ResizeFailedWorkers(workers)
 			}
 		}
 		p.mu.RUnlock()
 	}
 }
@ -1713,19 +1807,29 @@ func (p *ReplicationPool) queueReplicaDeleteTask(doi DeletedObjectReplicationInf
 	case ch <- doi:
 	default:
 		globalReplicationPool.queueMRFSave(doi.ToMRFEntry())
-		logger.LogOnceIf(GlobalContext, fmt.Errorf("WARNING: Unable to keep up with incoming deletes - we recommend increasing number of replicate workers with `mc admin config set api replication_workers=%d`", p.suggestedWorkers(false)), string(replicationSubsystem))
+		p.mu.RLock()
 		switch p.priority {
 		case "fast":
 			logger.LogOnceIf(GlobalContext, fmt.Errorf("WARNING: Unable to keep up with incoming deletes"), string(replicationSubsystem))
 		case "slow":
 			logger.LogOnceIf(GlobalContext, fmt.Errorf("WARNING: Unable to keep up with incoming deletes - we recommend increasing replication priority with `mc admin config set api replication_priority=auto`"), string(replicationSubsystem))
 		default:
 			if p.ActiveWorkers() < WorkerMaxLimit {
 				workers := int(math.Min(float64(p.workerSize+1), WorkerMaxLimit))
 				p.ResizeWorkers(workers)
 			}
 		}
 		p.mu.RUnlock()
 	}
 }
 type replicationPoolOpts struct {
-	Workers       int
+	Priority string
 	FailedWorkers int
 }
 func initBackgroundReplication(ctx context.Context, objectAPI ObjectLayer) {
 	globalReplicationPool = NewReplicationPool(ctx, objectAPI, replicationPoolOpts{
-		Workers:       globalAPIConfig.getReplicationWorkers(),
+		Priority: globalAPIConfig.getReplicationPriority(),
 		FailedWorkers: globalAPIConfig.getReplicationFailedWorkers(),
 	})
 	globalReplicationStats = NewReplicationStats(ctx, objectAPI)
 	go globalReplicationStats.loadInitialReplicationMetrics(ctx)
--- a/cmd/handler-api.go
+++ b/cmd/handler-api.go
@ -41,10 +41,9 @@ type apiConfig struct {
 	listQuorum       string
 	corsAllowOrigins []string
 	// total drives per erasure set across pools.
-	totalDriveCount          int
+	totalDriveCount     int
-	replicationWorkers       int
+	replicationPriority string
-	replicationFailedWorkers int
+	transitionWorkers   int
 	transitionWorkers        int
 	staleUploadsExpiry          time.Duration
 	staleUploadsCleanupInterval time.Duration
@ -137,12 +136,11 @@ func (t *apiConfig) init(cfg api.Config, setDriveCounts []int) {
 	t.requestsDeadline = cfg.RequestsDeadline
 	t.listQuorum = cfg.ListQuorum
 	if globalReplicationPool != nil &&
-		cfg.ReplicationWorkers != t.replicationWorkers {
+		cfg.ReplicationPriority != t.replicationPriority {
-		globalReplicationPool.ResizeFailedWorkers(cfg.ReplicationFailedWorkers)
+		globalReplicationPool.ResizeWorkerPriority(cfg.ReplicationPriority)
 		globalReplicationPool.ResizeWorkers(cfg.ReplicationWorkers)
 	}
-	t.replicationFailedWorkers = cfg.ReplicationFailedWorkers
+	t.replicationPriority = cfg.ReplicationPriority
-	t.replicationWorkers = cfg.ReplicationWorkers
+
 	if globalTransitionState != nil && cfg.TransitionWorkers != t.transitionWorkers {
 		globalTransitionState.UpdateWorkers(cfg.TransitionWorkers)
 	}
@ -289,18 +287,11 @@ func maxClients(f http.HandlerFunc) http.HandlerFunc {
 	}
 }
-func (t *apiConfig) getReplicationFailedWorkers() int {
+func (t *apiConfig) getReplicationPriority() string {
 	t.mu.RLock()
 	defer t.mu.RUnlock()
-	return t.replicationFailedWorkers
+	return t.replicationPriority
 }
 func (t *apiConfig) getReplicationWorkers() int {
 	t.mu.RLock()
 	defer t.mu.RUnlock()
 	return t.replicationWorkers
 }
 func (t *apiConfig) getTransitionWorkers() int {
--- a/internal/config/api/api.go
+++ b/internal/config/api/api.go
@ -37,8 +37,7 @@ const (
 	apiCorsAllowOrigin             = "cors_allow_origin"
 	apiRemoteTransportDeadline     = "remote_transport_deadline"
 	apiListQuorum                  = "list_quorum"
-	apiReplicationWorkers          = "replication_workers"
+	apiReplicationPriority         = "replication_priority"
 	apiReplicationFailedWorkers    = "replication_failed_workers"
 	apiTransitionWorkers           = "transition_workers"
 	apiStaleUploadsCleanupInterval = "stale_uploads_cleanup_interval"
 	apiStaleUploadsExpiry          = "stale_uploads_expiry"
@ -46,16 +45,14 @@ const (
 	apiDisableODirect              = "disable_odirect"
 	apiGzipObjects                 = "gzip_objects"
-	EnvAPIRequestsMax              = "MINIO_API_REQUESTS_MAX"
+	EnvAPIRequestsMax             = "MINIO_API_REQUESTS_MAX"
-	EnvAPIRequestsDeadline         = "MINIO_API_REQUESTS_DEADLINE"
+	EnvAPIRequestsDeadline        = "MINIO_API_REQUESTS_DEADLINE"
-	EnvAPIClusterDeadline          = "MINIO_API_CLUSTER_DEADLINE"
+	EnvAPIClusterDeadline         = "MINIO_API_CLUSTER_DEADLINE"
-	EnvAPICorsAllowOrigin          = "MINIO_API_CORS_ALLOW_ORIGIN"
+	EnvAPICorsAllowOrigin         = "MINIO_API_CORS_ALLOW_ORIGIN"
-	EnvAPIRemoteTransportDeadline  = "MINIO_API_REMOTE_TRANSPORT_DEADLINE"
+	EnvAPIRemoteTransportDeadline = "MINIO_API_REMOTE_TRANSPORT_DEADLINE"
-	EnvAPIListQuorum               = "MINIO_API_LIST_QUORUM"
+	EnvAPIListQuorum              = "MINIO_API_LIST_QUORUM"
-	EnvAPISecureCiphers            = "MINIO_API_SECURE_CIPHERS" // default "on"
+	EnvAPISecureCiphers           = "MINIO_API_SECURE_CIPHERS" // default "on"
-	EnvAPIReplicationWorkers       = "MINIO_API_REPLICATION_WORKERS"
+	EnvAPIReplicationPriority     = "MINIO_API_REPLICATION_PRIORITY"
 	EnvAPIReplicationFailedWorkers = "MINIO_API_REPLICATION_FAILED_WORKERS"
 	EnvAPITransitionWorkers        = "MINIO_API_TRANSITION_WORKERS"
 	EnvAPIStaleUploadsCleanupInterval = "MINIO_API_STALE_UPLOADS_CLEANUP_INTERVAL"
 	EnvAPIStaleUploadsExpiry          = "MINIO_API_STALE_UPLOADS_EXPIRY"
@ -67,8 +64,14 @@ const (
 // Deprecated key and ENVs
 const (
-	apiReadyDeadline    = "ready_deadline"
+	apiReadyDeadline            = "ready_deadline"
-	EnvAPIReadyDeadline = "MINIO_API_READY_DEADLINE"
+	apiReplicationWorkers       = "replication_workers"
 	apiReplicationFailedWorkers = "replication_failed_workers"
 	EnvAPIReadyDeadline            = "MINIO_API_READY_DEADLINE"
 	EnvAPIReplicationWorkers       = "MINIO_API_REPLICATION_WORKERS"
 	EnvAPIReplicationFailedWorkers = "MINIO_API_REPLICATION_FAILED_WORKERS"
 	EnvAPITransitionWorkers        = "MINIO_API_TRANSITION_WORKERS"
 )
 // DefaultKVS - default storage class config
@ -99,12 +102,8 @@ var (
 			Value: "strict",
 		},
 		config.KV{
-			Key:   apiReplicationWorkers,
+			Key:   apiReplicationPriority,
-			Value: "250",
+			Value: "auto",
 		},
 		config.KV{
 			Key:   apiReplicationFailedWorkers,
 			Value: "8",
 		},
 		config.KV{
 			Key:   apiTransitionWorkers,
@ -141,8 +140,7 @@ type Config struct {
 	CorsAllowOrigin             []string      `json:"cors_allow_origin"`
 	RemoteTransportDeadline     time.Duration `json:"remote_transport_deadline"`
 	ListQuorum                  string        `json:"list_quorum"`
-	ReplicationWorkers          int           `json:"replication_workers"`
+	ReplicationPriority         string        `json:"replication_priority"`
 	ReplicationFailedWorkers    int           `json:"replication_failed_workers"`
 	TransitionWorkers           int           `json:"transition_workers"`
 	StaleUploadsCleanupInterval time.Duration `json:"stale_uploads_cleanup_interval"`
 	StaleUploadsExpiry          time.Duration `json:"stale_uploads_expiry"`
@ -167,6 +165,8 @@ func LookupConfig(kvs config.KVS) (cfg Config, err error) {
 	// remove this since we have removed this already.
 	kvs.Delete(apiReadyDeadline)
 	kvs.Delete("extend_list_cache_life")
 	kvs.Delete(apiReplicationWorkers)
 	kvs.Delete(apiReplicationFailedWorkers)
 	if err = config.CheckValidKeys(config.APISubSys, kvs, DefaultKVS); err != nil {
 		return cfg, err
@ -206,22 +206,11 @@ func LookupConfig(kvs config.KVS) (cfg Config, err error) {
 		return cfg, errors.New("invalid value for list strict quorum")
 	}
-	replicationWorkers, err := strconv.Atoi(env.Get(EnvAPIReplicationWorkers, kvs.GetWithDefault(apiReplicationWorkers, DefaultKVS)))
+	replicationPriority := env.Get(EnvAPIReplicationPriority, kvs.GetWithDefault(apiReplicationPriority, DefaultKVS))
-	if err != nil {
+	switch replicationPriority {
-		return cfg, err
+	case "slow", "fast", "auto":
-	}
+	default:
-
+		return cfg, errors.New("invalid value for replication priority")
 	if replicationWorkers <= 0 {
 		return cfg, config.ErrInvalidReplicationWorkersValue(nil).Msg("Minimum number of replication workers should be 1")
 	}
 	replicationFailedWorkers, err := strconv.Atoi(env.Get(EnvAPIReplicationFailedWorkers, kvs.GetWithDefault(apiReplicationFailedWorkers, DefaultKVS)))
 	if err != nil {
 		return cfg, err
 	}
 	if replicationFailedWorkers <= 0 {
 		return cfg, config.ErrInvalidReplicationWorkersValue(nil).Msg("Minimum number of replication failed workers should be 1")
 	}
 	transitionWorkers, err := strconv.Atoi(env.Get(EnvAPITransitionWorkers, kvs.GetWithDefault(apiTransitionWorkers, DefaultKVS)))
@ -263,8 +252,7 @@ func LookupConfig(kvs config.KVS) (cfg Config, err error) {
 		CorsAllowOrigin:             corsAllowOrigin,
 		RemoteTransportDeadline:     remoteTransportDeadline,
 		ListQuorum:                  listQuorum,
-		ReplicationWorkers:          replicationWorkers,
+		ReplicationPriority:         replicationPriority,
 		ReplicationFailedWorkers:    replicationFailedWorkers,
 		TransitionWorkers:           transitionWorkers,
 		StaleUploadsCleanupInterval: staleUploadsCleanupInterval,
 		StaleUploadsExpiry:          staleUploadsExpiry,
--- a/internal/config/api/help.go
+++ b/internal/config/api/help.go
@ -63,16 +63,10 @@ var (
 			Type:        "string",
 		},
 		config.HelpKV{
-			Key:         apiReplicationWorkers,
+			Key:         apiReplicationPriority,
-			Description: `set the number of replication workers` + defaultHelpPostfix(apiReplicationWorkers),
+			Description: `set replication priority` + defaultHelpPostfix(apiReplicationPriority),
 			Optional:    true,
-			Type:        "number",
+			Type:        "string",
 		},
 		config.HelpKV{
 			Key:         apiReplicationFailedWorkers,
 			Description: `set the number of replication workers for recently failed replicas` + defaultHelpPostfix(apiReplicationFailedWorkers),
 			Optional:    true,
 			Type:        "number",
 		},
 		config.HelpKV{
 			Key:         apiTransitionWorkers,