choose different max_concurrent requests per drive based on HDD/NVMe (#18254)

currently the default for all drives is 512, which is a lot
for HDDs the recent testing has revealed moving this to 32
for HDDs seems like a fair value.
This commit is contained in:
Harshavardhana 2023-10-16 17:18:13 -07:00 committed by GitHub
parent 28a2d1eb3d
commit f91b257f50
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -86,6 +86,15 @@ type xlStorageDiskIDCheck struct {
diskID string diskID string
storage *xlStorage storage *xlStorage
health *diskHealthTracker health *diskHealthTracker
// diskStartChecking is a threshold above which we will start to check
// the state of disks, generally this value is less than diskMaxConcurrent
diskStartChecking int
// diskMaxConcurrent represents maximum number of running concurrent
// operations for local and (incoming) remote disk operations.
diskMaxConcurrent int
metricsCache timedValue metricsCache timedValue
diskCtx context.Context diskCtx context.Context
cancel context.CancelFunc cancel context.CancelFunc
@ -169,9 +178,22 @@ func (e *lockedLastMinuteLatency) total() AccElem {
} }
func newXLStorageDiskIDCheck(storage *xlStorage, healthCheck bool) *xlStorageDiskIDCheck { func newXLStorageDiskIDCheck(storage *xlStorage, healthCheck bool) *xlStorageDiskIDCheck {
if diskMaxConcurrent <= 0 {
diskMaxConcurrent = 512
if storage.rotational {
diskMaxConcurrent = 32
}
}
diskStartChecking := 16 + diskMaxConcurrent/8
if diskStartChecking > diskMaxConcurrent {
diskStartChecking = diskMaxConcurrent
}
xl := xlStorageDiskIDCheck{ xl := xlStorageDiskIDCheck{
storage: storage, storage: storage,
health: newDiskHealthTracker(), health: newDiskHealthTracker(diskMaxConcurrent),
diskMaxConcurrent: diskMaxConcurrent,
diskStartChecking: diskStartChecking,
} }
xl.diskCtx, xl.cancel = context.WithCancel(context.TODO()) xl.diskCtx, xl.cancel = context.WithCancel(context.TODO())
for i := range xl.apiLatencies[:] { for i := range xl.apiLatencies[:] {
@ -709,14 +731,6 @@ const (
diskHealthFaulty diskHealthFaulty
) )
// diskMaxConcurrent is the maximum number of running concurrent operations
// for local and (incoming) remote disk ops respectively.
var diskMaxConcurrent = 512
// diskStartChecking is a threshold above which we will start to check
// the state of disks.
var diskStartChecking = 32
// diskMaxTimeoutOperation maximum wait time before we consider a drive // diskMaxTimeoutOperation maximum wait time before we consider a drive
// offline under active monitoring. // offline under active monitoring.
var diskMaxTimeout = 2 * time.Minute var diskMaxTimeout = 2 * time.Minute
@ -724,6 +738,13 @@ var diskMaxTimeout = 2 * time.Minute
// diskActiveMonitoring indicates if we have enabled "active" disk monitoring // diskActiveMonitoring indicates if we have enabled "active" disk monitoring
var diskActiveMonitoring = true var diskActiveMonitoring = true
// diskMaxConcurrent represents maximum number of running concurrent
// operations for local and (incoming) remote disk operations.
//
// this value is a placeholder it is overridden via ENV for custom settings
// or this default value is used to pick the correct value HDDs v/s NVMe's
var diskMaxConcurrent = -1
func init() { func init() {
s := env.Get("_MINIO_DRIVE_MAX_CONCURRENT", "") s := env.Get("_MINIO_DRIVE_MAX_CONCURRENT", "")
if s == "" { if s == "" {
@ -731,10 +752,6 @@ func init() {
} }
if s != "" { if s != "" {
diskMaxConcurrent, _ = strconv.Atoi(s) diskMaxConcurrent, _ = strconv.Atoi(s)
if diskMaxConcurrent <= 0 {
logger.Info("invalid _MINIO_DISK_MAX_CONCURRENT value: %s, defaulting to '512'", s)
diskMaxConcurrent = 512
}
} }
d := env.Get("_MINIO_DRIVE_MAX_TIMEOUT", "") d := env.Get("_MINIO_DRIVE_MAX_TIMEOUT", "")
@ -752,11 +769,6 @@ func init() {
diskActiveMonitoring = (env.Get("_MINIO_DRIVE_ACTIVE_MONITORING", config.EnableOn) == config.EnableOn) || diskActiveMonitoring = (env.Get("_MINIO_DRIVE_ACTIVE_MONITORING", config.EnableOn) == config.EnableOn) ||
(env.Get("_MINIO_DISK_ACTIVE_MONITORING", config.EnableOn) == config.EnableOn) (env.Get("_MINIO_DISK_ACTIVE_MONITORING", config.EnableOn) == config.EnableOn)
diskStartChecking = 16 + diskMaxConcurrent/8
if diskStartChecking > diskMaxConcurrent {
diskStartChecking = diskMaxConcurrent
}
} }
type diskHealthTracker struct { type diskHealthTracker struct {
@ -777,7 +789,7 @@ type diskHealthTracker struct {
} }
// newDiskHealthTracker creates a new disk health tracker. // newDiskHealthTracker creates a new disk health tracker.
func newDiskHealthTracker() *diskHealthTracker { func newDiskHealthTracker(diskMaxConcurrent int) *diskHealthTracker {
d := diskHealthTracker{ d := diskHealthTracker{
lastSuccess: time.Now().UnixNano(), lastSuccess: time.Now().UnixNano(),
lastStarted: time.Now().UnixNano(), lastStarted: time.Now().UnixNano(),
@ -912,7 +924,7 @@ func (p *xlStorageDiskIDCheck) checkHealth(ctx context.Context) (err error) {
return errFaultyDisk return errFaultyDisk
} }
// Check if there are tokens. // Check if there are tokens.
if diskMaxConcurrent-len(p.health.tokens) < diskStartChecking { if p.diskMaxConcurrent-len(p.health.tokens) < p.diskStartChecking {
return nil return nil
} }