2021-04-18 15:41:13 -04:00
|
|
|
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
|
|
//
|
|
|
|
// This file is part of MinIO Object Storage stack
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2020-04-14 15:46:37 -04:00
|
|
|
|
|
|
|
package cmd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"net/http"
|
2022-09-19 14:05:16 -04:00
|
|
|
"os"
|
2021-11-17 12:55:45 -05:00
|
|
|
"runtime"
|
|
|
|
"strconv"
|
2020-04-14 15:46:37 -04:00
|
|
|
"sync"
|
|
|
|
"time"
|
2020-06-04 17:58:34 -04:00
|
|
|
|
2021-11-23 15:02:16 -05:00
|
|
|
"github.com/shirou/gopsutil/v3/mem"
|
2021-09-29 19:40:28 -04:00
|
|
|
|
2021-06-01 17:59:40 -04:00
|
|
|
"github.com/minio/minio/internal/config/api"
|
2021-09-29 19:40:28 -04:00
|
|
|
xioutil "github.com/minio/minio/internal/ioutil"
|
2021-06-01 17:59:40 -04:00
|
|
|
"github.com/minio/minio/internal/logger"
|
2020-04-14 15:46:37 -04:00
|
|
|
)
|
|
|
|
|
2020-06-04 17:58:34 -04:00
|
|
|
type apiConfig struct {
|
|
|
|
mu sync.RWMutex
|
2020-04-14 15:46:37 -04:00
|
|
|
|
|
|
|
requestsDeadline time.Duration
|
|
|
|
requestsPool chan struct{}
|
2020-09-23 12:14:33 -04:00
|
|
|
clusterDeadline time.Duration
|
2022-03-25 19:29:45 -04:00
|
|
|
listQuorum string
|
2020-06-04 17:58:34 -04:00
|
|
|
corsAllowOrigins []string
|
2021-01-22 15:09:24 -05:00
|
|
|
// total drives per erasure set across pools.
|
2022-09-24 19:20:28 -04:00
|
|
|
totalDriveCount int
|
|
|
|
replicationPriority string
|
|
|
|
transitionWorkers int
|
2021-10-04 13:52:28 -04:00
|
|
|
|
|
|
|
staleUploadsExpiry time.Duration
|
|
|
|
staleUploadsCleanupInterval time.Duration
|
|
|
|
deleteCleanupInterval time.Duration
|
2022-01-17 11:34:14 -05:00
|
|
|
disableODirect bool
|
2022-02-14 12:19:01 -05:00
|
|
|
gzipObjects bool
|
2020-04-14 15:46:37 -04:00
|
|
|
}
|
|
|
|
|
2021-11-17 12:55:45 -05:00
|
|
|
const cgroupLimitFile = "/sys/fs/cgroup/memory/memory.limit_in_bytes"
|
|
|
|
|
|
|
|
func cgroupLimit(limitFile string) (limit uint64) {
|
2022-09-19 14:05:16 -04:00
|
|
|
buf, err := os.ReadFile(limitFile)
|
2021-11-17 12:55:45 -05:00
|
|
|
if err != nil {
|
|
|
|
return 9223372036854771712
|
|
|
|
}
|
|
|
|
limit, err = strconv.ParseUint(string(buf), 10, 64)
|
|
|
|
if err != nil {
|
|
|
|
return 9223372036854771712
|
|
|
|
}
|
|
|
|
return limit
|
|
|
|
}
|
|
|
|
|
|
|
|
func availableMemory() (available uint64) {
|
|
|
|
available = 8 << 30 // Default to 8 GiB when we can't find the limits.
|
|
|
|
|
|
|
|
if runtime.GOOS == "linux" {
|
|
|
|
available = cgroupLimit(cgroupLimitFile)
|
|
|
|
|
|
|
|
// No limit set, It's the highest positive signed 64-bit
|
|
|
|
// integer (2^63-1), rounded down to multiples of 4096 (2^12),
|
|
|
|
// the most common page size on x86 systems - for cgroup_limits.
|
|
|
|
if available != 9223372036854771712 {
|
|
|
|
// This means cgroup memory limit is configured.
|
|
|
|
return
|
|
|
|
} // no-limit set proceed to set the limits based on virtual memory.
|
|
|
|
|
|
|
|
} // for all other platforms limits are based on virtual memory.
|
|
|
|
|
|
|
|
memStats, err := mem.VirtualMemory()
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
available = memStats.Available / 2
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-01-22 15:09:24 -05:00
|
|
|
func (t *apiConfig) init(cfg api.Config, setDriveCounts []int) {
|
2020-06-04 17:58:34 -04:00
|
|
|
t.mu.Lock()
|
|
|
|
defer t.mu.Unlock()
|
|
|
|
|
2020-09-23 12:14:33 -04:00
|
|
|
t.clusterDeadline = cfg.ClusterDeadline
|
2020-09-12 02:03:08 -04:00
|
|
|
t.corsAllowOrigins = cfg.CorsAllowOrigin
|
2021-08-24 12:14:46 -04:00
|
|
|
maxSetDrives := 0
|
2021-01-22 15:09:24 -05:00
|
|
|
for _, setDriveCount := range setDriveCounts {
|
|
|
|
t.totalDriveCount += setDriveCount
|
2021-08-24 12:14:46 -04:00
|
|
|
if setDriveCount > maxSetDrives {
|
|
|
|
maxSetDrives = setDriveCount
|
|
|
|
}
|
2021-01-22 15:09:24 -05:00
|
|
|
}
|
2020-09-12 02:03:08 -04:00
|
|
|
|
2020-09-04 22:37:37 -04:00
|
|
|
var apiRequestsMaxPerNode int
|
2020-09-12 02:03:08 -04:00
|
|
|
if cfg.RequestsMax <= 0 {
|
2021-11-17 12:55:45 -05:00
|
|
|
maxMem := availableMemory()
|
2021-08-24 12:14:46 -04:00
|
|
|
|
2020-09-04 22:37:37 -04:00
|
|
|
// max requests per node is calculated as
|
|
|
|
// total_ram / ram_per_request
|
[feat]: change erasure coding default block size from 10MiB to 1MiB (#11721)
major performance improvements in range GETs to avoid large
read amplification when ranges are tiny and random
```
-------------------
Operation: GET
Operations: 142014 -> 339421
Duration: 4m50s -> 4m56s
* Average: +139.41% (+1177.3 MiB/s) throughput, +139.11% (+658.4) obj/s
* Fastest: +125.24% (+1207.4 MiB/s) throughput, +132.32% (+612.9) obj/s
* 50% Median: +139.06% (+1175.7 MiB/s) throughput, +133.46% (+660.9) obj/s
* Slowest: +203.40% (+1267.9 MiB/s) throughput, +198.59% (+753.5) obj/s
```
TTFB from 10MiB BlockSize
```
* First Access TTFB: Avg: 81ms, Median: 61ms, Best: 20ms, Worst: 2.056s
```
TTFB from 1MiB BlockSize
```
* First Access TTFB: Avg: 22ms, Median: 21ms, Best: 8ms, Worst: 91ms
```
Full object reads however do see a slight change which won't be
noticeable in real world, so not doing any comparisons
TTFB still had improvements with full object reads with 1MiB
```
* First Access TTFB: Avg: 68ms, Median: 35ms, Best: 11ms, Worst: 1.16s
```
v/s
TTFB with 10MiB
```
* First Access TTFB: Avg: 388ms, Median: 98ms, Best: 20ms, Worst: 4.156s
```
This change should affect all new uploads, previous uploads should
continue to work with business as usual. But dramatic improvements can
be seen with these changes.
2021-03-06 17:09:34 -05:00
|
|
|
// ram_per_request is (2MiB+128KiB) * driveCount \
|
|
|
|
// + 2 * 10MiB (default erasure block size v1) + 2 * 1MiB (default erasure block size v2)
|
2021-09-29 19:40:28 -04:00
|
|
|
blockSize := xioutil.BlockSizeLarge + xioutil.BlockSizeSmall
|
|
|
|
apiRequestsMaxPerNode = int(maxMem / uint64(maxSetDrives*blockSize+int(blockSizeV1*2+blockSizeV2*2)))
|
2022-06-29 19:32:04 -04:00
|
|
|
if globalIsDistErasure {
|
|
|
|
logger.Info("Automatically configured API requests per node based on available memory on the system: %d", apiRequestsMaxPerNode)
|
|
|
|
}
|
2020-09-04 22:37:37 -04:00
|
|
|
} else {
|
2020-09-12 02:03:08 -04:00
|
|
|
apiRequestsMaxPerNode = cfg.RequestsMax
|
2020-09-04 22:37:37 -04:00
|
|
|
if len(globalEndpoints.Hostnames()) > 0 {
|
|
|
|
apiRequestsMaxPerNode /= len(globalEndpoints.Hostnames())
|
|
|
|
}
|
2020-04-14 15:46:37 -04:00
|
|
|
}
|
2021-08-24 12:14:46 -04:00
|
|
|
|
2022-03-22 14:28:54 -04:00
|
|
|
if cap(t.requestsPool) != apiRequestsMaxPerNode {
|
2020-12-04 12:32:35 -05:00
|
|
|
// Only replace if needed.
|
|
|
|
// Existing requests will use the previous limit,
|
|
|
|
// but new requests will use the new limit.
|
|
|
|
// There will be a short overlap window,
|
|
|
|
// but this shouldn't last long.
|
|
|
|
t.requestsPool = make(chan struct{}, apiRequestsMaxPerNode)
|
|
|
|
}
|
2020-09-12 02:03:08 -04:00
|
|
|
t.requestsDeadline = cfg.RequestsDeadline
|
2022-03-25 19:29:45 -04:00
|
|
|
t.listQuorum = cfg.ListQuorum
|
2021-03-09 05:56:42 -05:00
|
|
|
if globalReplicationPool != nil &&
|
2022-09-24 19:20:28 -04:00
|
|
|
cfg.ReplicationPriority != t.replicationPriority {
|
|
|
|
globalReplicationPool.ResizeWorkerPriority(cfg.ReplicationPriority)
|
2021-03-09 05:56:42 -05:00
|
|
|
}
|
2022-09-24 19:20:28 -04:00
|
|
|
t.replicationPriority = cfg.ReplicationPriority
|
|
|
|
|
2021-08-12 01:23:56 -04:00
|
|
|
if globalTransitionState != nil && cfg.TransitionWorkers != t.transitionWorkers {
|
|
|
|
globalTransitionState.UpdateWorkers(cfg.TransitionWorkers)
|
|
|
|
}
|
|
|
|
t.transitionWorkers = cfg.TransitionWorkers
|
2021-10-04 13:52:28 -04:00
|
|
|
|
|
|
|
t.staleUploadsExpiry = cfg.StaleUploadsExpiry
|
|
|
|
t.staleUploadsCleanupInterval = cfg.StaleUploadsCleanupInterval
|
|
|
|
t.deleteCleanupInterval = cfg.DeleteCleanupInterval
|
2022-01-17 11:34:14 -05:00
|
|
|
t.disableODirect = cfg.DisableODirect
|
2022-02-14 12:19:01 -05:00
|
|
|
t.gzipObjects = cfg.GzipObjects
|
2022-01-17 11:34:14 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
func (t *apiConfig) isDisableODirect() bool {
|
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
|
|
|
return t.disableODirect
|
2020-11-02 20:21:56 -05:00
|
|
|
}
|
|
|
|
|
2022-02-14 12:19:01 -05:00
|
|
|
func (t *apiConfig) shouldGzipObjects() bool {
|
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
|
|
|
return t.gzipObjects
|
|
|
|
}
|
|
|
|
|
2022-03-25 19:29:45 -04:00
|
|
|
func (t *apiConfig) getListQuorum() string {
|
2020-11-02 20:21:56 -05:00
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
|
|
|
return t.listQuorum
|
2020-06-04 17:58:34 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func (t *apiConfig) getCorsAllowOrigins() []string {
|
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
2020-09-12 02:03:08 -04:00
|
|
|
corsAllowOrigins := make([]string, len(t.corsAllowOrigins))
|
|
|
|
copy(corsAllowOrigins, t.corsAllowOrigins)
|
|
|
|
return corsAllowOrigins
|
2020-06-04 17:58:34 -04:00
|
|
|
}
|
|
|
|
|
2021-10-04 13:52:28 -04:00
|
|
|
func (t *apiConfig) getStaleUploadsCleanupInterval() time.Duration {
|
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
|
|
|
if t.staleUploadsCleanupInterval == 0 {
|
|
|
|
return 6 * time.Hour // default 6 hours
|
|
|
|
}
|
|
|
|
|
|
|
|
return t.staleUploadsCleanupInterval
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *apiConfig) getStaleUploadsExpiry() time.Duration {
|
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
|
|
|
if t.staleUploadsExpiry == 0 {
|
|
|
|
return 24 * time.Hour // default 24 hours
|
|
|
|
}
|
|
|
|
|
|
|
|
return t.staleUploadsExpiry
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *apiConfig) getDeleteCleanupInterval() time.Duration {
|
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
|
|
|
if t.deleteCleanupInterval == 0 {
|
|
|
|
return 5 * time.Minute // every 5 minutes
|
|
|
|
}
|
|
|
|
|
|
|
|
return t.deleteCleanupInterval
|
|
|
|
}
|
|
|
|
|
2020-09-23 12:14:33 -04:00
|
|
|
func (t *apiConfig) getClusterDeadline() time.Duration {
|
2020-06-04 17:58:34 -04:00
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
2020-09-23 12:14:33 -04:00
|
|
|
if t.clusterDeadline == 0 {
|
2020-06-04 17:58:34 -04:00
|
|
|
return 10 * time.Second
|
|
|
|
}
|
2020-04-14 15:46:37 -04:00
|
|
|
|
2020-09-23 12:14:33 -04:00
|
|
|
return t.clusterDeadline
|
2020-04-14 15:46:37 -04:00
|
|
|
}
|
|
|
|
|
2020-12-03 22:23:19 -05:00
|
|
|
func (t *apiConfig) getRequestsPool() (chan struct{}, time.Duration) {
|
2020-04-14 15:46:37 -04:00
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
2020-06-04 17:58:34 -04:00
|
|
|
if t.requestsPool == nil {
|
2020-12-03 22:23:19 -05:00
|
|
|
return nil, time.Duration(0)
|
2020-11-04 11:25:42 -05:00
|
|
|
}
|
2020-04-14 15:46:37 -04:00
|
|
|
|
2020-12-03 22:23:19 -05:00
|
|
|
return t.requestsPool, t.requestsDeadline
|
2020-04-14 15:46:37 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// maxClients throttles the S3 API calls
|
|
|
|
func maxClients(f http.HandlerFunc) http.HandlerFunc {
|
|
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
2022-02-07 19:30:14 -05:00
|
|
|
globalHTTPStats.incS3RequestsIncoming()
|
|
|
|
|
2022-04-08 00:20:40 -04:00
|
|
|
if r.Header.Get(globalObjectPerfUserMetadata) == "" {
|
|
|
|
if val := globalServiceFreeze.Load(); val != nil {
|
|
|
|
if unlock, ok := val.(chan struct{}); ok && unlock != nil {
|
|
|
|
// Wait until unfrozen.
|
2022-06-09 08:06:47 -04:00
|
|
|
select {
|
|
|
|
case <-unlock:
|
|
|
|
case <-r.Context().Done():
|
|
|
|
// if client canceled we don't need to wait here forever.
|
|
|
|
return
|
|
|
|
}
|
2022-04-08 00:20:40 -04:00
|
|
|
}
|
2021-11-23 15:02:16 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-03 22:23:19 -05:00
|
|
|
pool, deadline := globalAPIConfig.getRequestsPool()
|
2020-04-14 15:46:37 -04:00
|
|
|
if pool == nil {
|
|
|
|
f.ServeHTTP(w, r)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-02-20 03:21:55 -05:00
|
|
|
globalHTTPStats.addRequestsInQueue(1)
|
|
|
|
|
2020-12-03 22:23:19 -05:00
|
|
|
deadlineTimer := time.NewTimer(deadline)
|
|
|
|
defer deadlineTimer.Stop()
|
|
|
|
|
2020-04-14 15:46:37 -04:00
|
|
|
select {
|
|
|
|
case pool <- struct{}{}:
|
|
|
|
defer func() { <-pool }()
|
2021-02-20 03:21:55 -05:00
|
|
|
globalHTTPStats.addRequestsInQueue(-1)
|
2020-04-14 15:46:37 -04:00
|
|
|
f.ServeHTTP(w, r)
|
2020-12-03 22:23:19 -05:00
|
|
|
case <-deadlineTimer.C:
|
2020-04-14 15:46:37 -04:00
|
|
|
// Send a http timeout message
|
|
|
|
writeErrorResponse(r.Context(), w,
|
|
|
|
errorCodes.ToAPIErr(ErrOperationMaxedOut),
|
2021-06-17 23:27:04 -04:00
|
|
|
r.URL)
|
2021-02-20 03:21:55 -05:00
|
|
|
globalHTTPStats.addRequestsInQueue(-1)
|
2020-04-14 15:46:37 -04:00
|
|
|
return
|
|
|
|
case <-r.Context().Done():
|
2021-02-20 03:21:55 -05:00
|
|
|
globalHTTPStats.addRequestsInQueue(-1)
|
2020-04-14 15:46:37 -04:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-02-02 06:15:06 -05:00
|
|
|
|
2022-09-24 19:20:28 -04:00
|
|
|
func (t *apiConfig) getReplicationPriority() string {
|
2021-02-02 06:15:06 -05:00
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
2022-09-24 19:20:28 -04:00
|
|
|
return t.replicationPriority
|
2021-02-02 06:15:06 -05:00
|
|
|
}
|
2021-08-12 01:23:56 -04:00
|
|
|
|
|
|
|
func (t *apiConfig) getTransitionWorkers() int {
|
|
|
|
t.mu.RLock()
|
|
|
|
defer t.mu.RUnlock()
|
|
|
|
|
|
|
|
return t.transitionWorkers
|
|
|
|
}
|