2021-04-18 12:41:13 -07:00
|
|
|
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
|
|
//
|
|
|
|
// This file is part of MinIO Object Storage stack
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2020-04-14 20:46:37 +01:00
|
|
|
|
|
|
|
package cmd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"net/http"
|
|
|
|
"sync"
|
|
|
|
"time"
|
2020-06-04 14:58:34 -07:00
|
|
|
|
2021-06-01 14:59:40 -07:00
|
|
|
"github.com/minio/minio/internal/config/api"
|
|
|
|
"github.com/minio/minio/internal/logger"
|
2021-08-24 09:14:46 -07:00
|
|
|
mem "github.com/shirou/gopsutil/v3/mem"
|
2020-04-14 20:46:37 +01:00
|
|
|
)
|
|
|
|
|
2020-06-04 14:58:34 -07:00
|
|
|
type apiConfig struct {
|
|
|
|
mu sync.RWMutex
|
2020-04-14 20:46:37 +01:00
|
|
|
|
|
|
|
requestsDeadline time.Duration
|
|
|
|
requestsPool chan struct{}
|
2020-09-23 09:14:33 -07:00
|
|
|
clusterDeadline time.Duration
|
2020-11-02 17:21:56 -08:00
|
|
|
listQuorum int
|
2020-06-04 14:58:34 -07:00
|
|
|
corsAllowOrigins []string
|
2021-01-22 12:09:24 -08:00
|
|
|
// total drives per erasure set across pools.
|
2021-04-23 21:58:45 -07:00
|
|
|
totalDriveCount int
|
|
|
|
replicationWorkers int
|
|
|
|
replicationFailedWorkers int
|
2021-08-11 22:23:56 -07:00
|
|
|
transitionWorkers int
|
2020-04-14 20:46:37 +01:00
|
|
|
}
|
|
|
|
|
2021-01-22 12:09:24 -08:00
|
|
|
func (t *apiConfig) init(cfg api.Config, setDriveCounts []int) {
|
2020-06-04 14:58:34 -07:00
|
|
|
t.mu.Lock()
|
|
|
|
defer t.mu.Unlock()
|
|
|
|
|
2020-09-23 09:14:33 -07:00
|
|
|
t.clusterDeadline = cfg.ClusterDeadline
|
2020-09-11 23:03:08 -07:00
|
|
|
t.corsAllowOrigins = cfg.CorsAllowOrigin
|
2021-08-24 09:14:46 -07:00
|
|
|
maxSetDrives := 0
|
2021-01-22 12:09:24 -08:00
|
|
|
for _, setDriveCount := range setDriveCounts {
|
|
|
|
t.totalDriveCount += setDriveCount
|
2021-08-24 09:14:46 -07:00
|
|
|
if setDriveCount > maxSetDrives {
|
|
|
|
maxSetDrives = setDriveCount
|
|
|
|
}
|
2021-01-22 12:09:24 -08:00
|
|
|
}
|
2020-09-11 23:03:08 -07:00
|
|
|
|
2020-09-04 19:37:37 -07:00
|
|
|
var apiRequestsMaxPerNode int
|
2020-09-11 23:03:08 -07:00
|
|
|
if cfg.RequestsMax <= 0 {
|
2021-08-24 09:14:46 -07:00
|
|
|
var maxMem uint64
|
|
|
|
memStats, err := mem.VirtualMemory()
|
2020-09-04 19:37:37 -07:00
|
|
|
if err != nil {
|
[feat]: change erasure coding default block size from 10MiB to 1MiB (#11721)
major performance improvements in range GETs to avoid large
read amplification when ranges are tiny and random
```
-------------------
Operation: GET
Operations: 142014 -> 339421
Duration: 4m50s -> 4m56s
* Average: +139.41% (+1177.3 MiB/s) throughput, +139.11% (+658.4) obj/s
* Fastest: +125.24% (+1207.4 MiB/s) throughput, +132.32% (+612.9) obj/s
* 50% Median: +139.06% (+1175.7 MiB/s) throughput, +133.46% (+660.9) obj/s
* Slowest: +203.40% (+1267.9 MiB/s) throughput, +198.59% (+753.5) obj/s
```
TTFB from 10MiB BlockSize
```
* First Access TTFB: Avg: 81ms, Median: 61ms, Best: 20ms, Worst: 2.056s
```
TTFB from 1MiB BlockSize
```
* First Access TTFB: Avg: 22ms, Median: 21ms, Best: 8ms, Worst: 91ms
```
Full object reads however do see a slight change which won't be
noticeable in real world, so not doing any comparisons
TTFB still had improvements with full object reads with 1MiB
```
* First Access TTFB: Avg: 68ms, Median: 35ms, Best: 11ms, Worst: 1.16s
```
v/s
TTFB with 10MiB
```
* First Access TTFB: Avg: 388ms, Median: 98ms, Best: 20ms, Worst: 4.156s
```
This change should affect all new uploads, previous uploads should
continue to work with business as usual. But dramatic improvements can
be seen with these changes.
2021-03-06 14:09:34 -08:00
|
|
|
// Default to 8 GiB, not critical.
|
2021-08-24 09:14:46 -07:00
|
|
|
maxMem = 8 << 30
|
|
|
|
} else {
|
|
|
|
maxMem = memStats.Available / 2
|
2020-09-04 19:37:37 -07:00
|
|
|
}
|
2021-08-24 09:14:46 -07:00
|
|
|
|
2020-09-04 19:37:37 -07:00
|
|
|
// max requests per node is calculated as
|
|
|
|
// total_ram / ram_per_request
|
[feat]: change erasure coding default block size from 10MiB to 1MiB (#11721)
major performance improvements in range GETs to avoid large
read amplification when ranges are tiny and random
```
-------------------
Operation: GET
Operations: 142014 -> 339421
Duration: 4m50s -> 4m56s
* Average: +139.41% (+1177.3 MiB/s) throughput, +139.11% (+658.4) obj/s
* Fastest: +125.24% (+1207.4 MiB/s) throughput, +132.32% (+612.9) obj/s
* 50% Median: +139.06% (+1175.7 MiB/s) throughput, +133.46% (+660.9) obj/s
* Slowest: +203.40% (+1267.9 MiB/s) throughput, +198.59% (+753.5) obj/s
```
TTFB from 10MiB BlockSize
```
* First Access TTFB: Avg: 81ms, Median: 61ms, Best: 20ms, Worst: 2.056s
```
TTFB from 1MiB BlockSize
```
* First Access TTFB: Avg: 22ms, Median: 21ms, Best: 8ms, Worst: 91ms
```
Full object reads however do see a slight change which won't be
noticeable in real world, so not doing any comparisons
TTFB still had improvements with full object reads with 1MiB
```
* First Access TTFB: Avg: 68ms, Median: 35ms, Best: 11ms, Worst: 1.16s
```
v/s
TTFB with 10MiB
```
* First Access TTFB: Avg: 388ms, Median: 98ms, Best: 20ms, Worst: 4.156s
```
This change should affect all new uploads, previous uploads should
continue to work with business as usual. But dramatic improvements can
be seen with these changes.
2021-03-06 14:09:34 -08:00
|
|
|
// ram_per_request is (2MiB+128KiB) * driveCount \
|
|
|
|
// + 2 * 10MiB (default erasure block size v1) + 2 * 1MiB (default erasure block size v2)
|
2021-08-24 09:14:46 -07:00
|
|
|
apiRequestsMaxPerNode = int(maxMem / uint64(maxSetDrives*(blockSizeLarge+blockSizeSmall)+int(blockSizeV1*2+blockSizeV2*2)))
|
|
|
|
|
|
|
|
if globalIsErasure {
|
|
|
|
logger.Info("Automatically configured API requests per node based on available memory on the system: %d", apiRequestsMaxPerNode)
|
|
|
|
}
|
2020-09-04 19:37:37 -07:00
|
|
|
} else {
|
2020-09-11 23:03:08 -07:00
|
|
|
apiRequestsMaxPerNode = cfg.RequestsMax
|
2020-09-04 19:37:37 -07:00
|
|
|
if len(globalEndpoints.Hostnames()) > 0 {
|
|
|
|
apiRequestsMaxPerNode /= len(globalEndpoints.Hostnames())
|
|
|
|
}
|
2020-04-14 20:46:37 +01:00
|
|
|
}
|
2021-08-24 09:14:46 -07:00
|
|
|
|
2020-12-04 09:32:35 -08:00
|
|
|
if cap(t.requestsPool) < apiRequestsMaxPerNode {
|
|
|
|
// Only replace if needed.
|
|
|
|
// Existing requests will use the previous limit,
|
|
|
|
// but new requests will use the new limit.
|
|
|
|
// There will be a short overlap window,
|
|
|
|
// but this shouldn't last long.
|
|
|
|
t.requestsPool = make(chan struct{}, apiRequestsMaxPerNode)
|
|
|
|
}
|
2020-09-11 23:03:08 -07:00
|
|
|
t.requestsDeadline = cfg.RequestsDeadline
|
2020-11-02 17:21:56 -08:00
|
|
|
t.listQuorum = cfg.GetListQuorum()
|
2021-03-09 02:56:42 -08:00
|
|
|
if globalReplicationPool != nil &&
|
|
|
|
cfg.ReplicationWorkers != t.replicationWorkers {
|
2021-04-23 21:58:45 -07:00
|
|
|
globalReplicationPool.ResizeFailedWorkers(cfg.ReplicationFailedWorkers)
|
|
|
|
globalReplicationPool.ResizeWorkers(cfg.ReplicationWorkers)
|
2021-03-09 02:56:42 -08:00
|
|
|
}
|
2021-04-23 21:58:45 -07:00
|
|
|
t.replicationFailedWorkers = cfg.ReplicationFailedWorkers
|
2021-02-02 03:15:06 -08:00
|
|
|
t.replicationWorkers = cfg.ReplicationWorkers
|
2021-08-11 22:23:56 -07:00
|
|
|
if globalTransitionState != nil && cfg.TransitionWorkers != t.transitionWorkers {
|
|
|
|
globalTransitionState.UpdateWorkers(cfg.TransitionWorkers)
|
|
|
|
}
|
|
|
|
t.transitionWorkers = cfg.TransitionWorkers
|
2020-11-02 17:21:56 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (t *apiConfig) getListQuorum() int {
|
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
|
|
|
return t.listQuorum
|
2020-06-04 14:58:34 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
func (t *apiConfig) getCorsAllowOrigins() []string {
|
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
2020-09-11 23:03:08 -07:00
|
|
|
corsAllowOrigins := make([]string, len(t.corsAllowOrigins))
|
|
|
|
copy(corsAllowOrigins, t.corsAllowOrigins)
|
|
|
|
return corsAllowOrigins
|
2020-06-04 14:58:34 -07:00
|
|
|
}
|
|
|
|
|
2020-09-23 09:14:33 -07:00
|
|
|
func (t *apiConfig) getClusterDeadline() time.Duration {
|
2020-06-04 14:58:34 -07:00
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
2020-09-23 09:14:33 -07:00
|
|
|
if t.clusterDeadline == 0 {
|
2020-06-04 14:58:34 -07:00
|
|
|
return 10 * time.Second
|
|
|
|
}
|
2020-04-14 20:46:37 +01:00
|
|
|
|
2020-09-23 09:14:33 -07:00
|
|
|
return t.clusterDeadline
|
2020-04-14 20:46:37 +01:00
|
|
|
}
|
|
|
|
|
2020-12-03 19:23:19 -08:00
|
|
|
func (t *apiConfig) getRequestsPool() (chan struct{}, time.Duration) {
|
2020-04-14 20:46:37 +01:00
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
2020-06-04 14:58:34 -07:00
|
|
|
if t.requestsPool == nil {
|
2020-12-03 19:23:19 -08:00
|
|
|
return nil, time.Duration(0)
|
2020-11-04 08:25:42 -08:00
|
|
|
}
|
2020-04-14 20:46:37 +01:00
|
|
|
|
2020-12-03 19:23:19 -08:00
|
|
|
return t.requestsPool, t.requestsDeadline
|
2020-04-14 20:46:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// maxClients throttles the S3 API calls
|
|
|
|
func maxClients(f http.HandlerFunc) http.HandlerFunc {
|
|
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
2020-12-03 19:23:19 -08:00
|
|
|
pool, deadline := globalAPIConfig.getRequestsPool()
|
2020-04-14 20:46:37 +01:00
|
|
|
if pool == nil {
|
|
|
|
f.ServeHTTP(w, r)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-02-20 09:21:55 +01:00
|
|
|
globalHTTPStats.addRequestsInQueue(1)
|
|
|
|
|
2020-12-03 19:23:19 -08:00
|
|
|
deadlineTimer := time.NewTimer(deadline)
|
|
|
|
defer deadlineTimer.Stop()
|
|
|
|
|
2020-04-14 20:46:37 +01:00
|
|
|
select {
|
|
|
|
case pool <- struct{}{}:
|
|
|
|
defer func() { <-pool }()
|
2021-02-20 09:21:55 +01:00
|
|
|
globalHTTPStats.addRequestsInQueue(-1)
|
2020-04-14 20:46:37 +01:00
|
|
|
f.ServeHTTP(w, r)
|
2020-12-03 19:23:19 -08:00
|
|
|
case <-deadlineTimer.C:
|
2020-04-14 20:46:37 +01:00
|
|
|
// Send a http timeout message
|
|
|
|
writeErrorResponse(r.Context(), w,
|
|
|
|
errorCodes.ToAPIErr(ErrOperationMaxedOut),
|
2021-06-17 20:27:04 -07:00
|
|
|
r.URL)
|
2021-02-20 09:21:55 +01:00
|
|
|
globalHTTPStats.addRequestsInQueue(-1)
|
2020-04-14 20:46:37 +01:00
|
|
|
return
|
|
|
|
case <-r.Context().Done():
|
2021-02-20 09:21:55 +01:00
|
|
|
globalHTTPStats.addRequestsInQueue(-1)
|
2020-04-14 20:46:37 +01:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-02-02 03:15:06 -08:00
|
|
|
|
2021-04-23 21:58:45 -07:00
|
|
|
func (t *apiConfig) getReplicationFailedWorkers() int {
|
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
|
|
|
return t.replicationFailedWorkers
|
|
|
|
}
|
|
|
|
|
2021-02-02 03:15:06 -08:00
|
|
|
func (t *apiConfig) getReplicationWorkers() int {
|
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
|
|
|
return t.replicationWorkers
|
|
|
|
}
|
2021-08-11 22:23:56 -07:00
|
|
|
|
|
|
|
func (t *apiConfig) getTransitionWorkers() int {
|
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
|
|
|
return t.transitionWorkers
|
|
|
|
}
|