// Copyright (c) 2015-2021 MinIO, Inc. // // This file is part of MinIO Object Storage stack // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . package cmd import ( "math" "net/http" "os" "runtime" "slices" "strconv" "strings" "sync" "time" "github.com/shirou/gopsutil/v3/mem" "github.com/minio/minio/internal/config/api" xioutil "github.com/minio/minio/internal/ioutil" "github.com/minio/minio/internal/logger" "github.com/minio/minio/internal/mcontext" ) type apiConfig struct { mu sync.RWMutex requestsPool chan struct{} clusterDeadline time.Duration listQuorum string corsAllowOrigins []string replicationPriority string replicationMaxWorkers int replicationMaxLWorkers int transitionWorkers int staleUploadsExpiry time.Duration staleUploadsCleanupInterval time.Duration deleteCleanupInterval time.Duration enableODirect bool gzipObjects bool rootAccess bool syncEvents bool objectMaxVersions int64 } const ( cgroupV1MemLimitFile = "/sys/fs/cgroup/memory/memory.limit_in_bytes" cgroupV2MemLimitFile = "/sys/fs/cgroup/memory.max" cgroupMemNoLimit = 9223372036854771712 ) func cgroupMemLimit() (limit uint64) { buf, err := os.ReadFile(cgroupV2MemLimitFile) if err != nil { buf, err = os.ReadFile(cgroupV1MemLimitFile) } if err != nil { return 0 } limit, err = strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64) if err != nil { // The kernel can return valid but non integer values // but still, no need to interpret more return 0 } if limit == cgroupMemNoLimit { // No limit set, It's the highest positive signed 64-bit // integer (2^63-1), rounded down to multiples of 4096 (2^12), // the most common page size on x86 systems - for cgroup_limits. return 0 } return limit } func availableMemory() (available uint64) { available = 2048 * blockSizeV2 * 2 // Default to 4 GiB when we can't find the limits. if runtime.GOOS == "linux" { // Honor cgroup limits if set. limit := cgroupMemLimit() if limit > 0 { // A valid value is found, return its 90% available = (limit * 9) / 10 return } } // for all other platforms limits are based on virtual memory. memStats, err := mem.VirtualMemory() if err != nil { return } // A valid value is available return its 90% available = (memStats.Available * 9) / 10 return } func (t *apiConfig) init(cfg api.Config, setDriveCounts []int, legacy bool) { t.mu.Lock() defer t.mu.Unlock() clusterDeadline := cfg.ClusterDeadline if clusterDeadline == 0 { clusterDeadline = 10 * time.Second } t.clusterDeadline = clusterDeadline corsAllowOrigin := cfg.CorsAllowOrigin if len(corsAllowOrigin) == 0 { corsAllowOrigin = []string{"*"} } t.corsAllowOrigins = corsAllowOrigin var apiRequestsMaxPerNode int if cfg.RequestsMax <= 0 { maxSetDrives := slices.Max(setDriveCounts) // Returns 75% of max memory allowed maxMem := globalServerCtxt.MemLimit // max requests per node is calculated as // total_ram / ram_per_request blockSize := xioutil.LargeBlock + xioutil.SmallBlock if legacy { // ram_per_request is (1MiB+32KiB) * driveCount \ // + 2 * 10MiB (default erasure block size v1) + 2 * 1MiB (default erasure block size v2) apiRequestsMaxPerNode = int(maxMem / uint64(maxSetDrives*blockSize+int(blockSizeV1*2+blockSizeV2*2))) } else { // ram_per_request is (1MiB+32KiB) * driveCount \ // + 2 * 1MiB (default erasure block size v2) apiRequestsMaxPerNode = int(maxMem / uint64(maxSetDrives*blockSize+int(blockSizeV2*2))) } } else { apiRequestsMaxPerNode = cfg.RequestsMax if n := totalNodeCount(); n > 0 { apiRequestsMaxPerNode /= n } } if globalIsDistErasure { logger.Info("Configured max API requests per node based on available memory: %d", apiRequestsMaxPerNode) } if cap(t.requestsPool) != apiRequestsMaxPerNode { // Only replace if needed. // Existing requests will use the previous limit, // but new requests will use the new limit. // There will be a short overlap window, // but this shouldn't last long. t.requestsPool = make(chan struct{}, apiRequestsMaxPerNode) } listQuorum := cfg.ListQuorum if listQuorum == "" { listQuorum = "strict" } t.listQuorum = listQuorum if r := globalReplicationPool.GetNonBlocking(); r != nil && (cfg.ReplicationPriority != t.replicationPriority || cfg.ReplicationMaxWorkers != t.replicationMaxWorkers || cfg.ReplicationMaxLWorkers != t.replicationMaxLWorkers) { r.ResizeWorkerPriority(cfg.ReplicationPriority, cfg.ReplicationMaxWorkers, cfg.ReplicationMaxLWorkers) } t.replicationPriority = cfg.ReplicationPriority t.replicationMaxWorkers = cfg.ReplicationMaxWorkers t.replicationMaxLWorkers = cfg.ReplicationMaxLWorkers // N B api.transition_workers will be deprecated if globalTransitionState != nil { globalTransitionState.UpdateWorkers(cfg.TransitionWorkers) } t.transitionWorkers = cfg.TransitionWorkers t.staleUploadsExpiry = cfg.StaleUploadsExpiry t.deleteCleanupInterval = cfg.DeleteCleanupInterval t.enableODirect = cfg.EnableODirect t.gzipObjects = cfg.GzipObjects t.rootAccess = cfg.RootAccess t.syncEvents = cfg.SyncEvents t.objectMaxVersions = cfg.ObjectMaxVersions if t.staleUploadsCleanupInterval != cfg.StaleUploadsCleanupInterval { t.staleUploadsCleanupInterval = cfg.StaleUploadsCleanupInterval // signal that cleanup interval has changed select { case staleUploadsCleanupIntervalChangedCh <- struct{}{}: default: // in case the channel is blocked... } } } func (t *apiConfig) odirectEnabled() bool { t.mu.RLock() defer t.mu.RUnlock() return t.enableODirect } func (t *apiConfig) shouldGzipObjects() bool { t.mu.RLock() defer t.mu.RUnlock() return t.gzipObjects } func (t *apiConfig) permitRootAccess() bool { t.mu.RLock() defer t.mu.RUnlock() return t.rootAccess } func (t *apiConfig) getListQuorum() string { t.mu.RLock() defer t.mu.RUnlock() if t.listQuorum == "" { return "strict" } return t.listQuorum } func (t *apiConfig) getCorsAllowOrigins() []string { t.mu.RLock() defer t.mu.RUnlock() if len(t.corsAllowOrigins) == 0 { return []string{"*"} } corsAllowOrigins := make([]string, len(t.corsAllowOrigins)) copy(corsAllowOrigins, t.corsAllowOrigins) return corsAllowOrigins } func (t *apiConfig) getStaleUploadsCleanupInterval() time.Duration { t.mu.RLock() defer t.mu.RUnlock() if t.staleUploadsCleanupInterval == 0 { return 6 * time.Hour // default 6 hours } return t.staleUploadsCleanupInterval } func (t *apiConfig) getStaleUploadsExpiry() time.Duration { t.mu.RLock() defer t.mu.RUnlock() if t.staleUploadsExpiry == 0 { return 24 * time.Hour // default 24 hours } return t.staleUploadsExpiry } func (t *apiConfig) getDeleteCleanupInterval() time.Duration { t.mu.RLock() defer t.mu.RUnlock() if t.deleteCleanupInterval == 0 { return 5 * time.Minute // every 5 minutes } return t.deleteCleanupInterval } func (t *apiConfig) getClusterDeadline() time.Duration { t.mu.RLock() defer t.mu.RUnlock() if t.clusterDeadline == 0 { return 10 * time.Second } return t.clusterDeadline } func (t *apiConfig) getRequestsPoolCapacity() int { t.mu.RLock() defer t.mu.RUnlock() return cap(t.requestsPool) } func (t *apiConfig) getRequestsPool() chan struct{} { t.mu.RLock() defer t.mu.RUnlock() if t.requestsPool == nil { return nil } return t.requestsPool } // maxClients throttles the S3 API calls func maxClients(f http.HandlerFunc) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { globalHTTPStats.incS3RequestsIncoming() if r.Header.Get(globalObjectPerfUserMetadata) == "" { if val := globalServiceFreeze.Load(); val != nil { if unlock, ok := val.(chan struct{}); ok && unlock != nil { // Wait until unfrozen. select { case <-unlock: case <-r.Context().Done(): // if client canceled we don't need to wait here forever. return } } } } globalHTTPStats.addRequestsInQueue(1) pool := globalAPIConfig.getRequestsPool() if pool == nil { globalHTTPStats.addRequestsInQueue(-1) f.ServeHTTP(w, r) return } if tc, ok := r.Context().Value(mcontext.ContextTraceKey).(*mcontext.TraceCtxt); ok { tc.FuncName = "s3.MaxClients" } w.Header().Set("X-RateLimit-Limit", strconv.Itoa(cap(pool))) w.Header().Set("X-RateLimit-Remaining", strconv.Itoa(cap(pool)-len(pool))) ctx := r.Context() select { case pool <- struct{}{}: defer func() { <-pool }() globalHTTPStats.addRequestsInQueue(-1) if contextCanceled(ctx) { w.WriteHeader(499) return } f.ServeHTTP(w, r) case <-r.Context().Done(): globalHTTPStats.addRequestsInQueue(-1) // When the client disconnects before getting the S3 handler // status code response, set the status code to 499 so this request // will be properly audited and traced. w.WriteHeader(499) default: globalHTTPStats.addRequestsInQueue(-1) if contextCanceled(ctx) { w.WriteHeader(499) return } // Send a http timeout message writeErrorResponse(ctx, w, errorCodes.ToAPIErr(ErrTooManyRequests), r.URL) } } } func (t *apiConfig) getReplicationOpts() replicationPoolOpts { t.mu.RLock() defer t.mu.RUnlock() if t.replicationPriority == "" { return replicationPoolOpts{ Priority: "auto", MaxWorkers: WorkerMaxLimit, MaxLWorkers: LargeWorkerCount, } } return replicationPoolOpts{ Priority: t.replicationPriority, MaxWorkers: t.replicationMaxWorkers, MaxLWorkers: t.replicationMaxLWorkers, } } func (t *apiConfig) getTransitionWorkers() int { t.mu.RLock() defer t.mu.RUnlock() if t.transitionWorkers <= 0 { return runtime.GOMAXPROCS(0) / 2 } return t.transitionWorkers } func (t *apiConfig) isSyncEventsEnabled() bool { t.mu.RLock() defer t.mu.RUnlock() return t.syncEvents } func (t *apiConfig) getObjectMaxVersions() int64 { t.mu.RLock() defer t.mu.RUnlock() if t.objectMaxVersions <= 0 { // defaults to 'IntMax' when unset. return math.MaxInt64 } return t.objectMaxVersions }