Add dynamic reconfiguration of number of transition workers (#12926)

This commit is contained in:
Krishnan Parthasarathi 2021-08-11 22:23:56 -07:00 committed by GitHub
parent 9e88941515
commit 65b6f4aa31
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 88 additions and 36 deletions

View File

@ -24,7 +24,6 @@ import (
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
"runtime"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -115,9 +114,14 @@ func initBackgroundExpiry(ctx context.Context, objectAPI ObjectLayer) {
} }
type transitionState struct { type transitionState struct {
once sync.Once once sync.Once
// add future metrics here
transitionCh chan ObjectInfo transitionCh chan ObjectInfo
ctx context.Context
objAPI ObjectLayer
mu sync.Mutex
numWorkers int
killCh chan struct{}
} }
func (t *transitionState) queueTransitionTask(oi ObjectInfo) { func (t *transitionState) queueTransitionTask(oi ObjectInfo) {
@ -132,50 +136,59 @@ func (t *transitionState) queueTransitionTask(oi ObjectInfo) {
} }
var ( var (
globalTransitionState *transitionState globalTransitionState *transitionState
globalTransitionConcurrent = runtime.GOMAXPROCS(0) / 2
) )
func newTransitionState() *transitionState { func newTransitionState(ctx context.Context, objAPI ObjectLayer) *transitionState {
// fix minimum concurrent transition to 1 for single CPU setup
if globalTransitionConcurrent == 0 {
globalTransitionConcurrent = 1
}
return &transitionState{ return &transitionState{
transitionCh: make(chan ObjectInfo, 10000), transitionCh: make(chan ObjectInfo, 10000),
ctx: ctx,
objAPI: objAPI,
killCh: make(chan struct{}),
} }
} }
// addWorker creates a new worker to process tasks // worker waits for transition tasks
func (t *transitionState) addWorker(ctx context.Context, objectAPI ObjectLayer) { func (t *transitionState) worker(ctx context.Context, objectAPI ObjectLayer) {
// Add a new worker. for {
go func() { select {
for { case <-t.killCh:
select { return
case <-ctx.Done(): case <-ctx.Done():
return
case oi, ok := <-t.transitionCh:
if !ok {
return return
case oi, ok := <-t.transitionCh: }
if !ok {
return
}
if err := transitionObject(ctx, objectAPI, oi); err != nil { if err := transitionObject(ctx, objectAPI, oi); err != nil {
logger.LogIf(ctx, fmt.Errorf("Transition failed for %s/%s version:%s with %w", oi.Bucket, oi.Name, oi.VersionID, err)) logger.LogIf(ctx, fmt.Errorf("Transition failed for %s/%s version:%s with %w", oi.Bucket, oi.Name, oi.VersionID, err))
}
} }
} }
}() }
}
// UpdateWorkers at the end of this function leaves n goroutines waiting for
// transition tasks
func (t *transitionState) UpdateWorkers(n int) {
t.mu.Lock()
defer t.mu.Unlock()
for t.numWorkers < n {
go t.worker(t.ctx, t.objAPI)
t.numWorkers++
}
for t.numWorkers > n {
go func() { t.killCh <- struct{}{} }()
t.numWorkers--
}
} }
func initBackgroundTransition(ctx context.Context, objectAPI ObjectLayer) { func initBackgroundTransition(ctx context.Context, objectAPI ObjectLayer) {
if globalTransitionState == nil { globalTransitionState = newTransitionState(ctx, objectAPI)
return n := globalAPIConfig.getTransitionWorkers()
} globalTransitionState.UpdateWorkers(n)
// Start with globalTransitionConcurrent.
for i := 0; i < globalTransitionConcurrent; i++ {
globalTransitionState.addWorker(ctx, objectAPI)
}
} }
var errInvalidStorageClass = errors.New("invalid storage class") var errInvalidStorageClass = errors.New("invalid storage class")

View File

@ -92,8 +92,6 @@ func init() {
}, },
}) })
globalTransitionState = newTransitionState()
console.SetColor("Debug", fcolor.New()) console.SetColor("Debug", fcolor.New())
gob.Register(StorageErr("")) gob.Register(StorageErr(""))

View File

@ -39,6 +39,7 @@ type apiConfig struct {
totalDriveCount int totalDriveCount int
replicationWorkers int replicationWorkers int
replicationFailedWorkers int replicationFailedWorkers int
transitionWorkers int
} }
func (t *apiConfig) init(cfg api.Config, setDriveCounts []int) { func (t *apiConfig) init(cfg api.Config, setDriveCounts []int) {
@ -87,6 +88,10 @@ func (t *apiConfig) init(cfg api.Config, setDriveCounts []int) {
} }
t.replicationFailedWorkers = cfg.ReplicationFailedWorkers t.replicationFailedWorkers = cfg.ReplicationFailedWorkers
t.replicationWorkers = cfg.ReplicationWorkers t.replicationWorkers = cfg.ReplicationWorkers
if globalTransitionState != nil && cfg.TransitionWorkers != t.transitionWorkers {
globalTransitionState.UpdateWorkers(cfg.TransitionWorkers)
}
t.transitionWorkers = cfg.TransitionWorkers
} }
func (t *apiConfig) getListQuorum() int { func (t *apiConfig) getListQuorum() int {
@ -173,3 +178,10 @@ func (t *apiConfig) getReplicationWorkers() int {
return t.replicationWorkers return t.replicationWorkers
} }
func (t *apiConfig) getTransitionWorkers() int {
t.mu.RLock()
defer t.mu.RUnlock()
return t.transitionWorkers
}

View File

@ -530,7 +530,6 @@ func serverMain(ctx *cli.Context) {
if globalIsErasure { if globalIsErasure {
initAutoHeal(GlobalContext, newObject) initAutoHeal(GlobalContext, newObject)
initHealMRF(GlobalContext, newObject) initHealMRF(GlobalContext, newObject)
initBackgroundTransition(GlobalContext, newObject)
} }
initBackgroundExpiry(GlobalContext, newObject) initBackgroundExpiry(GlobalContext, newObject)
@ -558,6 +557,7 @@ func serverMain(ctx *cli.Context) {
if globalIsErasure { // to be done after config init if globalIsErasure { // to be done after config init
initBackgroundReplication(GlobalContext, newObject) initBackgroundReplication(GlobalContext, newObject)
initBackgroundTransition(GlobalContext, newObject)
globalTierJournal, err = initTierDeletionJournal(GlobalContext) globalTierJournal, err = initTierDeletionJournal(GlobalContext)
if err != nil { if err != nil {
logger.FatalIf(err, "Unable to initialize remote tier pending deletes journal") logger.FatalIf(err, "Unable to initialize remote tier pending deletes journal")

View File

@ -20,6 +20,7 @@ package api
import ( import (
"encoding/json" "encoding/json"
"errors" "errors"
"runtime"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -38,6 +39,7 @@ const (
apiListQuorum = "list_quorum" apiListQuorum = "list_quorum"
apiReplicationWorkers = "replication_workers" apiReplicationWorkers = "replication_workers"
apiReplicationFailedWorkers = "replication_failed_workers" apiReplicationFailedWorkers = "replication_failed_workers"
apiTransitionWorkers = "transition_workers"
EnvAPIRequestsMax = "MINIO_API_REQUESTS_MAX" EnvAPIRequestsMax = "MINIO_API_REQUESTS_MAX"
EnvAPIRequestsDeadline = "MINIO_API_REQUESTS_DEADLINE" EnvAPIRequestsDeadline = "MINIO_API_REQUESTS_DEADLINE"
@ -48,6 +50,7 @@ const (
EnvAPISecureCiphers = "MINIO_API_SECURE_CIPHERS" EnvAPISecureCiphers = "MINIO_API_SECURE_CIPHERS"
EnvAPIReplicationWorkers = "MINIO_API_REPLICATION_WORKERS" EnvAPIReplicationWorkers = "MINIO_API_REPLICATION_WORKERS"
EnvAPIReplicationFailedWorkers = "MINIO_API_REPLICATION_FAILED_WORKERS" EnvAPIReplicationFailedWorkers = "MINIO_API_REPLICATION_FAILED_WORKERS"
EnvAPITransitionWorkers = "MINIO_API_TRANSITION_WORKERS"
) )
// Deprecated key and ENVs // Deprecated key and ENVs
@ -91,6 +94,10 @@ var (
Key: apiReplicationFailedWorkers, Key: apiReplicationFailedWorkers,
Value: "8", Value: "8",
}, },
config.KV{
Key: apiTransitionWorkers,
Value: "100",
},
} }
) )
@ -104,6 +111,7 @@ type Config struct {
ListQuorum string `json:"list_quorum"` ListQuorum string `json:"list_quorum"`
ReplicationWorkers int `json:"replication_workers"` ReplicationWorkers int `json:"replication_workers"`
ReplicationFailedWorkers int `json:"replication_failed_workers"` ReplicationFailedWorkers int `json:"replication_failed_workers"`
TransitionWorkers int `json:"transition_workers"`
} }
// UnmarshalJSON - Validate SS and RRS parity when unmarshalling JSON. // UnmarshalJSON - Validate SS and RRS parity when unmarshalling JSON.
@ -195,6 +203,14 @@ func LookupConfig(kvs config.KVS) (cfg Config, err error) {
return cfg, config.ErrInvalidReplicationWorkersValue(nil).Msg("Minimum number of replication failed workers should be 1") return cfg, config.ErrInvalidReplicationWorkersValue(nil).Msg("Minimum number of replication failed workers should be 1")
} }
transitionWorkers, err := strconv.Atoi(env.Get(EnvAPITransitionWorkers, kvs.Get(apiTransitionWorkers)))
if err != nil {
return cfg, err
}
if transitionWorkers < runtime.GOMAXPROCS(0)/2 {
return cfg, config.ErrInvalidTransitionWorkersValue(nil)
}
return Config{ return Config{
RequestsMax: requestsMax, RequestsMax: requestsMax,
RequestsDeadline: requestsDeadline, RequestsDeadline: requestsDeadline,
@ -204,5 +220,6 @@ func LookupConfig(kvs config.KVS) (cfg Config, err error) {
ListQuorum: listQuorum, ListQuorum: listQuorum,
ReplicationWorkers: replicationWorkers, ReplicationWorkers: replicationWorkers,
ReplicationFailedWorkers: replicationFailedWorkers, ReplicationFailedWorkers: replicationFailedWorkers,
TransitionWorkers: transitionWorkers,
}, nil }, nil
} }

View File

@ -58,5 +58,11 @@ var (
Optional: true, Optional: true,
Type: "number", Type: "number",
}, },
config.HelpKV{
Key: apiTransitionWorkers,
Description: `set the number of transition workers, defaults to 100`,
Optional: true,
Type: "number",
},
} }
) )

View File

@ -289,4 +289,10 @@ Example 1:
"", "",
"MINIO_API_REPLICATION_WORKERS: should be > 0", "MINIO_API_REPLICATION_WORKERS: should be > 0",
) )
ErrInvalidTransitionWorkersValue = newErrFn(
"Invalid value for transition workers",
"",
"MINIO_API_TRANSITION_WORKERS: should be >= GOMAXPROCS/2",
)
) )