avoid hot-tier SLA to be tied to warm-tier SLA (#18581)

it is okay if the warm-tier cannot keep up, we should continue
to take I/O at hot-tier, only fail hot-tier or block it when
we are disk full.

Bonus: add metrics counter for these missed tasks, we will
know for sure if one of the node is lagging behind or is
losing too many tasks during transitioning.
This commit is contained in:
Harshavardhana 2023-12-02 13:02:12 -08:00 committed by GitHub
parent f2d063e7b9
commit e98172d72d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 226 additions and 214 deletions

View File

@ -219,24 +219,23 @@ type transitionState struct {
numWorkers int numWorkers int
killCh chan struct{} killCh chan struct{}
activeTasks int32 activeTasks atomic.Int64
missedImmediateTasks atomic.Int64
lastDayMu sync.RWMutex lastDayMu sync.RWMutex
lastDayStats map[string]*lastDayTierStats lastDayStats map[string]*lastDayTierStats
} }
func (t *transitionState) queueTransitionTask(oi ObjectInfo, event lifecycle.Event, src lcEventSrc, blocking bool) { func (t *transitionState) queueTransitionTask(oi ObjectInfo, event lifecycle.Event, src lcEventSrc) {
task := transitionTask{objInfo: oi, event: event, src: src} task := transitionTask{objInfo: oi, event: event, src: src}
if blocking {
select {
case <-t.ctx.Done():
case t.transitionCh <- task:
}
} else {
select { select {
case <-t.ctx.Done(): case <-t.ctx.Done():
case t.transitionCh <- task: case t.transitionCh <- task:
default: default:
switch src {
case lcEventSrc_s3PutObject, lcEventSrc_s3CopyObject, lcEventSrc_s3CompleteMultipartUpload:
// Update missed immediate tasks only for incoming requests.
t.missedImmediateTasks.Add(1)
} }
} }
} }
@ -272,8 +271,14 @@ func (t *transitionState) PendingTasks() int {
} }
// ActiveTasks returns the number of active (ongoing) ILM transition tasks. // ActiveTasks returns the number of active (ongoing) ILM transition tasks.
func (t *transitionState) ActiveTasks() int { func (t *transitionState) ActiveTasks() int64 {
return int(atomic.LoadInt32(&t.activeTasks)) return t.activeTasks.Load()
}
// MissedImmediateTasks returns the number of tasks - deferred to scanner due
// to tasks channel being backlogged.
func (t *transitionState) MissedImmediateTasks() int64 {
return t.missedImmediateTasks.Load()
} }
// worker waits for transition tasks // worker waits for transition tasks
@ -288,7 +293,7 @@ func (t *transitionState) worker(objectAPI ObjectLayer) {
if !ok { if !ok {
return return
} }
atomic.AddInt32(&t.activeTasks, 1) t.activeTasks.Add(1)
if err := transitionObject(t.ctx, objectAPI, task.objInfo, newLifecycleAuditEvent(task.src, task.event)); err != nil { if err := transitionObject(t.ctx, objectAPI, task.objInfo, newLifecycleAuditEvent(task.src, task.event)); err != nil {
if !isErrVersionNotFound(err) && !isErrObjectNotFound(err) && !xnet.IsNetworkOrHostDown(err, false) { if !isErrVersionNotFound(err) && !isErrObjectNotFound(err) && !xnet.IsNetworkOrHostDown(err, false) {
if !strings.Contains(err.Error(), "use of closed network connection") { if !strings.Contains(err.Error(), "use of closed network connection") {
@ -306,7 +311,7 @@ func (t *transitionState) worker(objectAPI ObjectLayer) {
} }
t.addLastDayStats(task.event.StorageClass, ts) t.addLastDayStats(task.event.StorageClass, ts)
} }
atomic.AddInt32(&t.activeTasks, -1) t.activeTasks.Add(-1)
} }
} }
} }
@ -379,7 +384,7 @@ func enqueueTransitionImmediate(obj ObjectInfo, src lcEventSrc) {
if lc, err := globalLifecycleSys.Get(obj.Bucket); err == nil { if lc, err := globalLifecycleSys.Get(obj.Bucket); err == nil {
switch event := lc.Eval(obj.ToLifecycleOpts()); event.Action { switch event := lc.Eval(obj.ToLifecycleOpts()); event.Action {
case lifecycle.TransitionAction, lifecycle.TransitionVersionAction: case lifecycle.TransitionAction, lifecycle.TransitionVersionAction:
globalTransitionState.queueTransitionTask(obj, event, src, true) globalTransitionState.queueTransitionTask(obj, event, src)
} }
} }
} }

View File

@ -1190,7 +1190,7 @@ func applyTransitionRule(event lifecycle.Event, src lcEventSrc, obj ObjectInfo)
if obj.DeleteMarker { if obj.DeleteMarker {
return false return false
} }
globalTransitionState.queueTransitionTask(obj, event, src, false) globalTransitionState.queueTransitionTask(obj, event, src)
return true return true
} }

View File

@ -256,6 +256,7 @@ const (
expiryPendingTasks MetricName = "expiry_pending_tasks" expiryPendingTasks MetricName = "expiry_pending_tasks"
transitionPendingTasks MetricName = "transition_pending_tasks" transitionPendingTasks MetricName = "transition_pending_tasks"
transitionActiveTasks MetricName = "transition_active_tasks" transitionActiveTasks MetricName = "transition_active_tasks"
transitionMissedTasks MetricName = "transition_missed_immediate_tasks"
transitionedBytes MetricName = "transitioned_bytes" transitionedBytes MetricName = "transitioned_bytes"
transitionedObjects MetricName = "transitioned_objects" transitionedObjects MetricName = "transitioned_objects"
@ -1707,6 +1708,16 @@ func getTransitionActiveTasksMD() MetricDescription {
} }
} }
func getTransitionMissedTasksMD() MetricDescription {
return MetricDescription{
Namespace: nodeMetricNamespace,
Subsystem: ilmSubsystem,
Name: transitionMissedTasks,
Help: "Number of missed immediate ILM transition tasks",
Type: gaugeMetric,
}
}
func getExpiryPendingTasksMD() MetricDescription { func getExpiryPendingTasksMD() MetricDescription {
return MetricDescription{ return MetricDescription{
Namespace: nodeMetricNamespace, Namespace: nodeMetricNamespace,
@ -1781,17 +1792,22 @@ func getILMNodeMetrics() *MetricsGroup {
trActiveTasks := Metric{ trActiveTasks := Metric{
Description: getTransitionActiveTasksMD(), Description: getTransitionActiveTasksMD(),
} }
trMissedTasks := Metric{
Description: getTransitionMissedTasksMD(),
}
if globalExpiryState != nil { if globalExpiryState != nil {
expPendingTasks.Value = float64(globalExpiryState.PendingTasks()) expPendingTasks.Value = float64(globalExpiryState.PendingTasks())
} }
if globalTransitionState != nil { if globalTransitionState != nil {
trPendingTasks.Value = float64(globalTransitionState.PendingTasks()) trPendingTasks.Value = float64(globalTransitionState.PendingTasks())
trActiveTasks.Value = float64(globalTransitionState.ActiveTasks()) trActiveTasks.Value = float64(globalTransitionState.ActiveTasks())
trMissedTasks.Value = float64(globalTransitionState.MissedImmediateTasks())
} }
return []Metric{ return []Metric{
expPendingTasks, expPendingTasks,
trPendingTasks, trPendingTasks,
trActiveTasks, trActiveTasks,
trMissedTasks,
} }
}) })
return mg return mg

View File

@ -15,26 +15,15 @@ For deployments behind a load balancer, use the load balancer hostname instead o
## Audit Metrics ## Audit Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:----------------------------------|:----------------------------------------------------------|
| `minio_audit_failed_messages` | Total number of messages that failed to send since start. | | `minio_audit_failed_messages` | Total number of messages that failed to send since start. |
| `minio_audit_target_queue_length` | Number of unsent messages in queue for target. | | `minio_audit_target_queue_length` | Number of unsent messages in queue for target. |
| `minio_audit_total_messages` | Total number of messages sent since start. | | `minio_audit_total_messages` | Total number of messages sent since start. |
## Cache Metrics
| Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------|
| `minio_cache_hits_total` | Total number of drive cache hits. |
| `minio_cache_missed_total` | Total number of drive cache misses. |
| `minio_cache_sent_bytes` | Total number of bytes served from cache. |
| `minio_cache_total_bytes` | Total size of cache drive in bytes. |
| `minio_cache_usage_info` | Total percentage cache usage, value of 1 indicates high and 0 low, label level is set as well. |
| `minio_cache_used_bytes` | Current cache usage in bytes. |
## Cluster Capacity Metrics ## Cluster Capacity Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:---------------------------------------------|:---------------------------------------------------------------|
| `minio_cluster_capacity_raw_free_bytes` | Total free capacity online in the cluster. | | `minio_cluster_capacity_raw_free_bytes` | Total free capacity online in the cluster. |
| `minio_cluster_capacity_raw_total_bytes` | Total capacity online in the cluster. | | `minio_cluster_capacity_raw_total_bytes` | Total capacity online in the cluster. |
| `minio_cluster_capacity_usable_free_bytes` | Total free usable capacity online in the cluster. | | `minio_cluster_capacity_usable_free_bytes` | Total free usable capacity online in the cluster. |
@ -51,7 +40,7 @@ For deployments behind a load balancer, use the load balancer hostname instead o
## Cluster Drive Metrics ## Cluster Drive Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:------------------------------------|:--------------------------------------|
| `minio_cluster_drive_offline_total` | Total drives offline in this cluster. | | `minio_cluster_drive_offline_total` | Total drives offline in this cluster. |
| `minio_cluster_drive_online_total` | Total drives online in this cluster. | | `minio_cluster_drive_online_total` | Total drives online in this cluster. |
| `minio_cluster_drive_total` | Total drives in this cluster. | | `minio_cluster_drive_total` | Total drives in this cluster. |
@ -59,7 +48,7 @@ For deployments behind a load balancer, use the load balancer hostname instead o
## Cluster ILM Metrics ## Cluster ILM Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:------------------------------------------|:-------------------------------------------------|
| `minio_cluster_ilm_transitioned_bytes` | Total bytes transitioned to a tier. | | `minio_cluster_ilm_transitioned_bytes` | Total bytes transitioned to a tier. |
| `minio_cluster_ilm_transitioned_objects` | Total number of objects transitioned to a tier. | | `minio_cluster_ilm_transitioned_objects` | Total number of objects transitioned to a tier. |
| `minio_cluster_ilm_transitioned_versions` | Total number of versions transitioned to a tier. | | `minio_cluster_ilm_transitioned_versions` | Total number of versions transitioned to a tier. |
@ -67,7 +56,7 @@ For deployments behind a load balancer, use the load balancer hostname instead o
## Cluster KMS Metrics ## Cluster KMS Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:------------------------------------|:-----------------------------------------------------------------------------------------|
| `minio_cluster_kms_online` | Reports whether the KMS is online (1) or offline (0). | | `minio_cluster_kms_online` | Reports whether the KMS is online (1) or offline (0). |
| `minio_cluster_kms_request_error` | Number of KMS requests that failed due to some error. (HTTP 4xx status code). | | `minio_cluster_kms_request_error` | Number of KMS requests that failed due to some error. (HTTP 4xx status code). |
| `minio_cluster_kms_request_failure` | Number of KMS requests that failed due to some internal failure. (HTTP 5xx status code). | | `minio_cluster_kms_request_failure` | Number of KMS requests that failed due to some internal failure. (HTTP 5xx status code). |
@ -77,7 +66,7 @@ For deployments behind a load balancer, use the load balancer hostname instead o
## Cluster Health Metrics ## Cluster Health Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:------------------------------------|:-----------------------------------------------|
| `minio_cluster_nodes_offline_total` | Total number of MinIO nodes offline. | | `minio_cluster_nodes_offline_total` | Total number of MinIO nodes offline. |
| `minio_cluster_nodes_online_total` | Total number of MinIO nodes online. | | `minio_cluster_nodes_online_total` | Total number of MinIO nodes online. |
| `minio_cluster_write_quorum` | Maximum write quorum across all pools and sets | | `minio_cluster_write_quorum` | Maximum write quorum across all pools and sets |
@ -88,8 +77,8 @@ For deployments behind a load balancer, use the load balancer hostname instead o
Metrics marked as ``Site Replication Only`` only populate on deployments with [Site Replication](https://min.io/docs/minio/linux/operations/install-deploy-manage/multi-site-replication.html) configurations. Metrics marked as ``Site Replication Only`` only populate on deployments with [Site Replication](https://min.io/docs/minio/linux/operations/install-deploy-manage/multi-site-replication.html) configurations.
For deployments with [bucket](https://min.io/docs/minio/linux/administration/bucket-replication.html) or [batch](https://min.io/docs/minio/linux/administration/batch-framework.html#replicate) configurations, these metrics populate instead under the [Bucket Metrics](#bucket-metrics) endpoint. For deployments with [bucket](https://min.io/docs/minio/linux/administration/bucket-replication.html) or [batch](https://min.io/docs/minio/linux/administration/batch-framework.html#replicate) configurations, these metrics populate instead under the [Bucket Metrics](#bucket-metrics) endpoint.
| Name | Description | | Name | Description
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:-----------------------------------------------------------|:---------------------------------------------------------------------------------------------------------|
| `minio_cluster_replication_current_active_workers` | Total number of active replication workers | | `minio_cluster_replication_current_active_workers` | Total number of active replication workers |
| `minio_cluster_replication_average_active_workers` | Average number of active replication workers | | `minio_cluster_replication_average_active_workers` | Average number of active replication workers |
| `minio_cluster_replication_max_active_workers` | Maximum number of active replication workers seen since server start | | `minio_cluster_replication_max_active_workers` | Maximum number of active replication workers seen since server start |
@ -117,14 +106,14 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc
| `minio_cluster_replication_total_failed_count` | (_Site Replication Only_) Total number of objects which failed replication since server start. | | `minio_cluster_replication_total_failed_count` | (_Site Replication Only_) Total number of objects which failed replication since server start. |
| `minio_cluster_replication_received_bytes` | (_Site Replication Only_) Total number of bytes replicated to this cluster from another source cluster. | | `minio_cluster_replication_received_bytes` | (_Site Replication Only_) Total number of bytes replicated to this cluster from another source cluster. |
| `minio_cluster_replication_received_count` | (_Site Replication Only_) Total number of objects received by this cluster from another source cluster. | | `minio_cluster_replication_received_count` | (_Site Replication Only_) Total number of objects received by this cluster from another source cluster. |
| `minio_cluster_replication_sent_bytes` | (_Site Replication Only_) Total number of bytes replicated to the target cluster. | | | `minio_cluster_replication_sent_bytes` | (_Site Replication Only_) Total number of bytes replicated to the target cluster. |
| `minio_cluster_replication_sent_count` | (_Site Replication Only_) Total number of objects replicated to the target cluster. | | | `minio_cluster_replication_sent_count` | (_Site Replication Only_) Total number of objects replicated to the target cluster. |
| `minio_cluster_replication_credential_errors` | (_Site Replication Only_) Total number of replication credential errors since server start | | `minio_cluster_replication_credential_errors` | (_Site Replication Only_) Total number of replication credential errors since server start |
## Healing Metrics ## Healing Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:---------------------------------------------|:-----------------------------------------------------------------|
| `minio_heal_objects_errors_total` | Objects for which healing failed in current self healing run. | | `minio_heal_objects_errors_total` | Objects for which healing failed in current self healing run. |
| `minio_heal_objects_heal_total` | Objects healed in current self healing run. | | `minio_heal_objects_heal_total` | Objects healed in current self healing run. |
| `minio_heal_objects_total` | Objects scanned in current self healing run. | | `minio_heal_objects_total` | Objects scanned in current self healing run. |
@ -133,7 +122,7 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc
## Inter Node Metrics ## Inter Node Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:------------------------------------------|:--------------------------------------------------------|
| `minio_inter_node_traffic_dial_avg_time` | Average time of internodes TCP dial calls. | | `minio_inter_node_traffic_dial_avg_time` | Average time of internodes TCP dial calls. |
| `minio_inter_node_traffic_dial_errors` | Total number of internode TCP dial timeouts and errors. | | `minio_inter_node_traffic_dial_errors` | Total number of internode TCP dial timeouts and errors. |
| `minio_inter_node_traffic_errors_total` | Total number of failed internode calls. | | `minio_inter_node_traffic_errors_total` | Total number of failed internode calls. |
@ -143,14 +132,15 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc
## Bucket Notification Metrics ## Bucket Notification Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:----------------------------------------|:-------------------------------------------------------------|
| `minio_notify_current_send_in_progress` | Number of concurrent async Send calls active to all targets. | | `minio_notify_current_send_in_progress` | Number of concurrent async Send calls active to all targets. |
| `minio_notify_target_queue_length` | Number of unsent notifications in queue for target. | | `minio_notify_target_queue_length` | Number of unsent notifications in queue for target. |
| | |
## S3 API Request Metrics ## S3 API Request Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:----------------------------------------------|:---------------------------------------------------------|
| `minio_s3_requests_4xx_errors_total` | Total number S3 requests with (4xx) errors. | | `minio_s3_requests_4xx_errors_total` | Total number S3 requests with (4xx) errors. |
| `minio_s3_requests_5xx_errors_total` | Total number S3 requests with (5xx) errors. | | `minio_s3_requests_5xx_errors_total` | Total number S3 requests with (5xx) errors. |
| `minio_s3_requests_canceled_total` | Total number S3 requests canceled by the client. | | `minio_s3_requests_canceled_total` | Total number S3 requests canceled by the client. |
@ -170,14 +160,14 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc
## Software Metrics ## Software Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:------------------------------|:---------------------------------------|
| `minio_software_commit_info` | Git commit hash for the MinIO release. | | `minio_software_commit_info` | Git commit hash for the MinIO release. |
| `minio_software_version_info` | MinIO Release tag for the server. | | `minio_software_version_info` | MinIO Release tag for the server. |
## Drive Metrics ## Drive Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:---------------------------------|:--------------------------------------------------------------------|
| `minio_node_drive_free_bytes` | Total storage available on a drive. | | `minio_node_drive_free_bytes` | Total storage available on a drive. |
| `minio_node_drive_free_inodes` | Total free inodes. | | `minio_node_drive_free_inodes` | Total free inodes. |
| `minio_node_drive_latency_us` | Average last minute latency in µs for drive API storage operations. | | `minio_node_drive_latency_us` | Average last minute latency in µs for drive API storage operations. |
@ -190,7 +180,7 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc
## Identity and Access Management (IAM) Metrics ## Identity and Access Management (IAM) Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:-------------------------------------------|:------------------------------------------------------------|
| `minio_node_iam_last_sync_duration_millis` | Last successful IAM data sync duration in milliseconds. | | `minio_node_iam_last_sync_duration_millis` | Last successful IAM data sync duration in milliseconds. |
| `minio_node_iam_since_last_sync_millis` | Time (in milliseconds) since last successful IAM data sync. | | `minio_node_iam_since_last_sync_millis` | Time (in milliseconds) since last successful IAM data sync. |
| `minio_node_iam_sync_failures` | Number of failed IAM data syncs since server start. | | `minio_node_iam_sync_failures` | Number of failed IAM data syncs since server start. |
@ -199,16 +189,17 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc
## Information Lifecycle Management (ILM) Metrics ## Information Lifecycle Management (ILM) Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:---------------------------------------------------|:----------------------------------------------------------------------------|
| `minio_node_ilm_expiry_pending_tasks` | Number of pending ILM expiry tasks in the queue. | | `minio_node_ilm_expiry_pending_tasks` | Number of pending ILM expiry tasks in the queue. |
| `minio_node_ilm_transition_active_tasks` | Number of active ILM transition tasks. | | `minio_node_ilm_transition_active_tasks` | Number of active ILM transition tasks. |
| `minio_node_ilm_transition_pending_tasks` | Number of pending ILM transition tasks in the queue. | | `minio_node_ilm_transition_pending_tasks` | Number of pending ILM transition tasks in the queue. |
| `minio_node_ilm_transition_missed_immediate_tasks` | Number of missed immediate ILM transition tasks. |
| `minio_node_ilm_versions_scanned` | Total number of object versions checked for ilm actions since server start. | | `minio_node_ilm_versions_scanned` | Total number of object versions checked for ilm actions since server start. |
## System Metrics ## System Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:-------------------------------------------|:----------------------------------------------------------------------------------------------------------------|
| `minio_node_file_descriptor_limit_total` | Limit on total number of open file descriptors for the MinIO Server process. | | `minio_node_file_descriptor_limit_total` | Limit on total number of open file descriptors for the MinIO Server process. |
| `minio_node_file_descriptor_open_total` | Total number of open file descriptors by the MinIO Server process. | | `minio_node_file_descriptor_open_total` | Total number of open file descriptors by the MinIO Server process. |
| `minio_node_go_routine_total` | Total number of go routines running. | | `minio_node_go_routine_total` | Total number of go routines running. |
@ -224,7 +215,7 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc
## Scanner Metrics ## Scanner Metrics
| Name | Description | | Name | Description |
|:----------------------------------------------|:----------------------------------------------------------------------------------------------------------------| |:-------------------------------------------|:------------------------------------------------------------|
| `minio_node_scanner_bucket_scans_finished` | Total number of bucket scans finished since server start. | | `minio_node_scanner_bucket_scans_finished` | Total number of bucket scans finished since server start. |
| `minio_node_scanner_bucket_scans_started` | Total number of bucket scans started since server start. | | `minio_node_scanner_bucket_scans_started` | Total number of bucket scans started since server start. |
| `minio_node_scanner_directories_scanned` | Total number of directories scanned since server start. | | `minio_node_scanner_directories_scanned` | Total number of directories scanned since server start. |
@ -252,7 +243,7 @@ For deployments behind a load balancer, use the load balancer hostname instead o
## Distribution Metrics ## Distribution Metrics
| Name | Description | | Name | Description |
|:--------------------------------------------------|:--------------------------------------------------------------------------------| |:--------------------------------------------|:--------------------------------------------------------------------------------|
| `minio_bucket_objects_size_distribution` | Distribution of object sizes in the bucket, includes label for the bucket name. | | `minio_bucket_objects_size_distribution` | Distribution of object sizes in the bucket, includes label for the bucket name. |
| `minio_bucket_objects_version_distribution` | Distribution of object sizes in a bucket, by number of versions | | `minio_bucket_objects_version_distribution` | Distribution of object sizes in a bucket, by number of versions |
@ -279,14 +270,14 @@ For deployments with [Site Replication](https://min.io/docs/minio/linux/operatio
## Traffic Metrics ## Traffic Metrics
| Name | Description | | Name | Description |
|:--------------------------------------------------|:--------------------------------------------------------------------------------| |:--------------------------------------|:---------------------------------------------------|
| `minio_bucket_traffic_received_bytes` | Total number of S3 bytes received for this bucket. | | `minio_bucket_traffic_received_bytes` | Total number of S3 bytes received for this bucket. |
| `minio_bucket_traffic_sent_bytes` | Total number of S3 bytes sent for this bucket. | | `minio_bucket_traffic_sent_bytes` | Total number of S3 bytes sent for this bucket. |
## Usage Metrics ## Usage Metrics
| Name | Description | | Name | Description |
|:--------------------------------------------------|:--------------------------------------------------------------------------------| |:----------------------------------------|:--------------------------------------------------|
| `minio_bucket_usage_object_total` | Total number of objects. | | `minio_bucket_usage_object_total` | Total number of objects. |
| `minio_bucket_usage_version_total` | Total number of versions (includes delete marker) | | `minio_bucket_usage_version_total` | Total number of versions (includes delete marker) |
| `minio_bucket_usage_deletemarker_total` | Total number of delete markers. | | `minio_bucket_usage_deletemarker_total` | Total number of delete markers. |
@ -296,7 +287,7 @@ For deployments with [Site Replication](https://min.io/docs/minio/linux/operatio
## Requests Metrics ## Requests Metrics
| Name | Description | | Name | Description |
|:--------------------------------------------------|:--------------------------------------------------------------------------------| |:--------------------------------------------------|:----------------------------------------------------------------|
| `minio_bucket_requests_4xx_errors_total` | Total number of S3 requests with (4xx) errors on a bucket. | | `minio_bucket_requests_4xx_errors_total` | Total number of S3 requests with (4xx) errors on a bucket. |
| `minio_bucket_requests_5xx_errors_total` | Total number of S3 requests with (5xx) errors on a bucket. | | `minio_bucket_requests_5xx_errors_total` | Total number of S3 requests with (5xx) errors on a bucket. |
| `minio_bucket_requests_inflight_total` | Total number of S3 requests currently in flight on a bucket. | | `minio_bucket_requests_inflight_total` | Total number of S3 requests currently in flight on a bucket. |
@ -322,7 +313,7 @@ For deployments behind a load balancer, use the load balancer hostname instead o
## Drive Resource Metrics ## Drive Resource Metrics
| Name | Description | | Name | Description |
| :----------------------------------- | :------------------------------------------------------- | |:-------------------------------------|:---------------------------------------------------------|
| `minio_node_drive_total_bytes` | Total bytes on a drive. | | `minio_node_drive_total_bytes` | Total bytes on a drive. |
| `minio_node_drive_used_bytes` | Used bytes on a drive. | | `minio_node_drive_used_bytes` | Used bytes on a drive. |
| `minio_node_drive_total_inodes` | Total inodes on a drive. | | `minio_node_drive_total_inodes` | Total inodes on a drive. |
@ -338,7 +329,7 @@ For deployments behind a load balancer, use the load balancer hostname instead o
## Network Interface Metrics ## Network Interface Metrics
| Name | Description | | Name | Description |
| :---------------------------- | :-------------------------------------------- | |:------------------------------|:----------------------------------------------|
| `minio_node_if_rx_bytes` | Bytes received on the interface in 60s. | | `minio_node_if_rx_bytes` | Bytes received on the interface in 60s. |
| `minio_node_if_rx_bytes_avg` | Bytes received on the interface in 60s (avg). | | `minio_node_if_rx_bytes_avg` | Bytes received on the interface in 60s (avg). |
| `minio_node_if_rx_bytes_max` | Bytes received on the interface in 60s (max). | | `minio_node_if_rx_bytes_max` | Bytes received on the interface in 60s (max). |
@ -355,7 +346,7 @@ For deployments behind a load balancer, use the load balancer hostname instead o
## CPU Metrics ## CPU Metrics
| Name | Description | | Name | Description |
| :------------------------------ | :---------------------------- | |:--------------------------------|:------------------------------|
| `minio_node_cpu_avg_user` | CPU user time. | | `minio_node_cpu_avg_user` | CPU user time. |
| `minio_node_cpu_avg_user_avg` | CPU user time (avg). | | `minio_node_cpu_avg_user_avg` | CPU user time (avg). |
| `minio_node_cpu_avg_user_max` | CPU user time (max). | | `minio_node_cpu_avg_user_max` | CPU user time (max). |
@ -387,7 +378,7 @@ For deployments behind a load balancer, use the load balancer hostname instead o
## Memory Metrics ## Memory Metrics
| Name | Description | | Name | Description |
| :----------------------------- | :---------------------------------- | |:-------------------------------|:------------------------------------|
| `minio_node_mem_available` | Available memory on the node. | | `minio_node_mem_available` | Available memory on the node. |
| `minio_node_mem_available_avg` | Available memory on the node (avg). | | `minio_node_mem_available_avg` | Available memory on the node (avg). |
| `minio_node_mem_available_max` | Available memory on the node (max). | | `minio_node_mem_available_max` | Available memory on the node (max). |