Improve expiration of tiered objects (#18926)

- Use a shared worker pool for all ILM expiry tasks
- Free version cleanup executes in a separate goroutine
- Add a free version only if removing the remote object fails
- Add ILM expiry metrics to the node namespace
- Move tier journal tasks to expiryState
- Remove unused on-disk journal for tiered objects pending deletion
- Distribute expiry tasks across workers such that the expiry of versions of
  the same object serialized
- Ability to resize worker pool without server restart
- Make scaling down of expiryState workers' concurrency safe; Thanks
  @klauspost
- Add error logs when expiryState and transition state are not
  initialized (yet)
* metrics: Add missed tier journal entry tasks
* Initialize the ILM worker pool after the object layer
This commit is contained in:
Krishnan Parthasarathi
2024-03-01 21:11:03 -08:00
committed by GitHub
parent 325fd80687
commit a7577da768
28 changed files with 611 additions and 1131 deletions

View File

@@ -41,6 +41,7 @@ const (
apiReplicationMaxWorkers = "replication_max_workers"
apiTransitionWorkers = "transition_workers"
apiExpiryWorkers = "expiry_workers"
apiStaleUploadsCleanupInterval = "stale_uploads_cleanup_interval"
apiStaleUploadsExpiry = "stale_uploads_expiry"
apiDeleteCleanupInterval = "delete_cleanup_interval"
@@ -56,6 +57,7 @@ const (
EnvAPICorsAllowOrigin = "MINIO_API_CORS_ALLOW_ORIGIN"
EnvAPIRemoteTransportDeadline = "MINIO_API_REMOTE_TRANSPORT_DEADLINE"
EnvAPITransitionWorkers = "MINIO_API_TRANSITION_WORKERS"
EnvAPIExpiryWorkers = "MINIO_API_EXPIRY_WORKERS"
EnvAPIListQuorum = "MINIO_API_LIST_QUORUM"
EnvAPISecureCiphers = "MINIO_API_SECURE_CIPHERS" // default config.EnableOn
EnvAPIReplicationPriority = "MINIO_API_REPLICATION_PRIORITY"
@@ -117,6 +119,10 @@ var (
Key: apiTransitionWorkers,
Value: "100",
},
config.KV{
Key: apiExpiryWorkers,
Value: "100",
},
config.KV{
Key: apiStaleUploadsCleanupInterval,
Value: "6h",
@@ -164,6 +170,7 @@ type Config struct {
ReplicationPriority string `json:"replication_priority"`
ReplicationMaxWorkers int `json:"replication_max_workers"`
TransitionWorkers int `json:"transition_workers"`
ExpiryWorkers int `json:"expiry_workers"`
StaleUploadsCleanupInterval time.Duration `json:"stale_uploads_cleanup_interval"`
StaleUploadsExpiry time.Duration `json:"stale_uploads_expiry"`
DeleteCleanupInterval time.Duration `json:"delete_cleanup_interval"`
@@ -281,6 +288,15 @@ func LookupConfig(kvs config.KVS) (cfg Config, err error) {
}
cfg.TransitionWorkers = transitionWorkers
expiryWorkers, err := strconv.Atoi(env.Get(EnvAPIExpiryWorkers, kvs.GetWithDefault(apiExpiryWorkers, DefaultKVS)))
if err != nil {
return cfg, err
}
if expiryWorkers <= 0 || expiryWorkers > 500 {
return cfg, config.ErrInvalidExpiryWorkersValue(nil).Msg("Number of expiry workers should be between 1 and 500")
}
cfg.ExpiryWorkers = expiryWorkers
v := env.Get(EnvAPIDeleteCleanupInterval, kvs.Get(apiDeleteCleanupInterval))
if v == "" {
v = env.Get(EnvDeleteCleanupInterval, kvs.GetWithDefault(apiDeleteCleanupInterval, DefaultKVS))

View File

@@ -19,12 +19,12 @@ package api
import "github.com/minio/minio/internal/config"
// Help template for storageclass feature.
var (
defaultHelpPostfix = func(key string) string {
return config.DefaultHelpPostfix(DefaultKVS, key)
}
// Help holds configuration keys and their default values for api subsystem.
Help = config.HelpKVS{
config.HelpKV{
Key: apiRequestsMax,
@@ -80,6 +80,12 @@ var (
Optional: true,
Type: "number",
},
config.HelpKV{
Key: apiExpiryWorkers,
Description: `set the number of expiry workers` + defaultHelpPostfix(apiExpiryWorkers),
Optional: true,
Type: "number",
},
config.HelpKV{
Key: apiStaleUploadsExpiry,
Description: `set to expire stale multipart uploads older than this values` + defaultHelpPostfix(apiStaleUploadsExpiry),