add support for bucket level request count per API (#17468)

New metrics added to calculate API request count
per bucket, per API.  Captures errors, including
4xx, 5xx HTTP status codes separately.
This commit is contained in:
Harshavardhana 2023-06-21 09:41:59 -07:00 committed by GitHub
parent ccc5801112
commit 7605d07bb2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 578 additions and 340 deletions

View File

@ -184,6 +184,7 @@ const (
ErrBucketAlreadyExists
ErrMetadataTooLarge
ErrUnsupportedMetadata
ErrUnsupportedHostHeader
ErrMaximumExpires
ErrSlowDownRead
ErrSlowDownWrite
@ -1442,6 +1443,11 @@ var errorCodes = errorCodeMap{
Description: "Your metadata headers are not supported.",
HTTPStatusCode: http.StatusBadRequest,
},
ErrUnsupportedHostHeader: {
Code: "InvalidArgument",
Description: "Your Host header is malformed.",
HTTPStatusCode: http.StatusBadRequest,
},
ErrObjectTampered: {
Code: "XMinioObjectTampered",
Description: errObjectTampered.Error(),

File diff suppressed because one or more lines are too long

View File

@ -245,8 +245,9 @@ var (
// Global HTTP request statisitics
globalHTTPStats = newHTTPStats()
// Global bucket network statistics
// Global bucket network and API statistics
globalBucketConnStats = newBucketConnStats()
globalBucketHTTPStats = newBucketHTTPStats()
// Time when the server is started
globalBootTime = UTCNow()

View File

@ -269,9 +269,26 @@ func trimAwsChunkedContentEncoding(contentEnc string) (trimmedContentEnc string)
func collectAPIStats(api string, f http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
resource, err := getResource(r.URL.Path, r.Host, globalDomainNames)
if err != nil {
defer logger.AuditLog(r.Context(), w, r, mustGetClaimsFromToken(r))
apiErr := errorCodes.ToAPIErr(ErrUnsupportedHostHeader)
apiErr.Description = fmt.Sprintf("%s: %v", apiErr.Description, err)
writeErrorResponse(r.Context(), w, apiErr, r.URL)
return
}
bucket, _ := path2BucketObject(resource)
globalHTTPStats.currentS3Requests.Inc(api)
defer globalHTTPStats.currentS3Requests.Dec(api)
if bucket != "" && bucket != minioReservedBucket {
globalBucketHTTPStats.updateHTTPStats(bucket, api, nil)
}
f.ServeHTTP(w, r)
tc, ok := r.Context().Value(mcontext.ContextTraceKey).(*mcontext.TraceCtxt)
@ -299,16 +316,10 @@ func collectAPIStats(api string, f http.HandlerFunc) http.HandlerFunc {
globalConnStats.incS3InputBytes(int64(tc.RequestRecorder.Size()))
globalConnStats.incS3OutputBytes(int64(tc.ResponseRecorder.Size()))
resource, err := getResource(r.URL.Path, r.Host, globalDomainNames)
if err != nil {
logger.LogIf(r.Context(), fmt.Errorf("Unable to get the actual resource in the incoming request: %v", err))
return
}
bucket, _ := path2BucketObject(resource)
if bucket != "" && bucket != minioReservedBucket {
globalBucketConnStats.incS3InputBytes(bucket, int64(tc.RequestRecorder.Size()))
globalBucketConnStats.incS3OutputBytes(bucket, int64(tc.ResponseRecorder.Size()))
globalBucketHTTPStats.updateHTTPStats(bucket, api, tc.ResponseRecorder)
}
}
}

View File

@ -120,6 +120,105 @@ type bucketS3RXTX struct {
s3OutputBytes uint64
}
type bucketHTTPAPIStats struct {
currentS3Requests *HTTPAPIStats
totalS3Requests *HTTPAPIStats
totalS34xxErrors *HTTPAPIStats
totalS35xxErrors *HTTPAPIStats
totalS3Canceled *HTTPAPIStats
}
type bucketHTTPStats struct {
sync.RWMutex
httpStats map[string]bucketHTTPAPIStats
}
func newBucketHTTPStats() *bucketHTTPStats {
return &bucketHTTPStats{
httpStats: make(map[string]bucketHTTPAPIStats),
}
}
func (bh *bucketHTTPStats) delete(bucket string) {
bh.Lock()
defer bh.Unlock()
delete(bh.httpStats, bucket)
}
func (bh *bucketHTTPStats) updateHTTPStats(bucket, api string, w *xhttp.ResponseRecorder) {
if bh == nil {
return
}
bh.Lock()
defer bh.Unlock()
hstats, ok := bh.httpStats[bucket]
if !ok {
hstats = bucketHTTPAPIStats{
currentS3Requests: &HTTPAPIStats{},
totalS3Requests: &HTTPAPIStats{},
totalS3Canceled: &HTTPAPIStats{},
totalS34xxErrors: &HTTPAPIStats{},
totalS35xxErrors: &HTTPAPIStats{},
}
}
if w == nil { // when response recorder nil, this is an active request
hstats.currentS3Requests.Inc(api)
bh.httpStats[bucket] = hstats
return
} // else {
hstats.currentS3Requests.Dec(api) // decrement this once we have the response recorder.
hstats.totalS3Requests.Inc(api)
code := w.StatusCode
switch {
case code == 0:
case code == 499:
// 499 is a good error, shall be counted as canceled.
hstats.totalS3Canceled.Inc(api)
case code >= http.StatusBadRequest:
if code >= http.StatusInternalServerError {
hstats.totalS35xxErrors.Inc(api)
} else {
hstats.totalS34xxErrors.Inc(api)
}
}
bh.httpStats[bucket] = hstats
}
func (bh *bucketHTTPStats) load(bucket string) bucketHTTPAPIStats {
if bh == nil {
return bucketHTTPAPIStats{
currentS3Requests: &HTTPAPIStats{},
totalS3Requests: &HTTPAPIStats{},
totalS3Canceled: &HTTPAPIStats{},
totalS34xxErrors: &HTTPAPIStats{},
totalS35xxErrors: &HTTPAPIStats{},
}
}
bh.RLock()
defer bh.RUnlock()
val, ok := bh.httpStats[bucket]
if ok {
return val
}
return bucketHTTPAPIStats{
currentS3Requests: &HTTPAPIStats{},
totalS3Requests: &HTTPAPIStats{},
totalS3Canceled: &HTTPAPIStats{},
totalS34xxErrors: &HTTPAPIStats{},
totalS35xxErrors: &HTTPAPIStats{},
}
}
type bucketConnStats struct {
sync.RWMutex
stats map[string]*bucketS3RXTX
@ -225,10 +324,32 @@ func (stats *HTTPAPIStats) Dec(api string) {
}
}
// Get returns the current counter on input API string
func (stats *HTTPAPIStats) Get(api string) int {
if stats == nil {
return 0
}
stats.RLock()
defer stats.RUnlock()
val, ok := stats.apiStats[api]
if ok {
return val
}
return 0
}
// Load returns the recorded stats.
func (stats *HTTPAPIStats) Load() map[string]int {
stats.Lock()
defer stats.Unlock()
if stats == nil {
return map[string]int{}
}
stats.RLock()
defer stats.RUnlock()
apiStats := make(map[string]int, len(stats.apiStats))
for k, v := range stats.apiStats {
apiStats[k] = v

View File

@ -483,7 +483,7 @@ func getUsageLastScanActivityMD() MetricDescription {
Namespace: minioMetricNamespace,
Subsystem: usageSubsystem,
Name: lastActivityTime,
Help: "Time elapsed (in nano seconds) since last scan activity. This is set to 0 until first scan cycle",
Help: "Time elapsed (in nano seconds) since last scan activity.",
Type: gaugeMetric,
}
}
@ -703,7 +703,7 @@ func getS3RequestsInQueueMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: waitingTotal,
Help: "Number of S3 requests in the waiting queue",
Help: "Total number of S3 requests in the waiting queue",
Type: gaugeMetric,
}
}
@ -713,7 +713,7 @@ func getIncomingS3RequestsMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: incomingTotal,
Help: "Volatile number of total incoming S3 requests",
Help: "Total number of incoming S3 requests",
Type: gaugeMetric,
}
}
@ -723,7 +723,7 @@ func getS3RequestsTotalMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: total,
Help: "Total number S3 requests",
Help: "Total number of S3 requests",
Type: counterMetric,
}
}
@ -733,7 +733,7 @@ func getS3RequestsErrorsMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: errorsTotal,
Help: "Total number S3 requests with (4xx and 5xx) errors",
Help: "Total number of S3 requests with (4xx and 5xx) errors",
Type: counterMetric,
}
}
@ -743,7 +743,7 @@ func getS3Requests4xxErrorsMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: "4xx_" + errorsTotal,
Help: "Total number S3 requests with (4xx) errors",
Help: "Total number of S3 requests with (4xx) errors",
Type: counterMetric,
}
}
@ -753,7 +753,7 @@ func getS3Requests5xxErrorsMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: "5xx_" + errorsTotal,
Help: "Total number S3 requests with (5xx) errors",
Help: "Total number of S3 requests with (5xx) errors",
Type: counterMetric,
}
}
@ -763,7 +763,7 @@ func getS3RequestsCanceledMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: canceledTotal,
Help: "Total number S3 requests that were canceled from the client while processing",
Help: "Total number of S3 requests that were canceled by the client",
Type: counterMetric,
}
}
@ -773,7 +773,7 @@ func getS3RejectedAuthRequestsTotalMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsRejectedSubsystem,
Name: authTotal,
Help: "Total number S3 requests rejected for auth failure",
Help: "Total number of S3 requests rejected for auth failure",
Type: counterMetric,
}
}
@ -783,7 +783,7 @@ func getS3RejectedHeaderRequestsTotalMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsRejectedSubsystem,
Name: headerTotal,
Help: "Total number S3 requests rejected for invalid header",
Help: "Total number of S3 requests rejected for invalid header",
Type: counterMetric,
}
}
@ -793,7 +793,7 @@ func getS3RejectedTimestampRequestsTotalMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsRejectedSubsystem,
Name: timestampTotal,
Help: "Total number S3 requests rejected for invalid timestamp",
Help: "Total number of S3 requests rejected for invalid timestamp",
Type: counterMetric,
}
}
@ -803,7 +803,7 @@ func getS3RejectedInvalidRequestsTotalMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsRejectedSubsystem,
Name: invalidTotal,
Help: "Total number S3 invalid requests",
Help: "Total number of invalid S3 requests",
Type: counterMetric,
}
}
@ -913,7 +913,7 @@ func getHealLastActivityTimeMD() MetricDescription {
Namespace: healMetricNamespace,
Subsystem: timeSubsystem,
Name: lastActivityTime,
Help: "Time elapsed (in nano seconds) since last self healing activity. This is set to -1 until initial self heal activity",
Help: "Time elapsed (in nano seconds) since last self healing activity.",
Type: gaugeMetric,
}
}
@ -1309,6 +1309,56 @@ func getExpiryPendingTasksMD() MetricDescription {
}
}
func getBucketS3RequestsInFlightMD() MetricDescription {
return MetricDescription{
Namespace: bucketMetricNamespace,
Subsystem: requestsSubsystem,
Name: inflightTotal,
Help: "Total number of S3 requests currently in flight on a bucket",
Type: gaugeMetric,
}
}
func getBucketS3RequestsTotalMD() MetricDescription {
return MetricDescription{
Namespace: bucketMetricNamespace,
Subsystem: requestsSubsystem,
Name: total,
Help: "Total number of S3 requests on a bucket",
Type: counterMetric,
}
}
func getBucketS3Requests4xxErrorsMD() MetricDescription {
return MetricDescription{
Namespace: bucketMetricNamespace,
Subsystem: requestsSubsystem,
Name: "4xx_" + errorsTotal,
Help: "Total number of S3 requests with (4xx) errors on a bucket",
Type: counterMetric,
}
}
func getBucketS3Requests5xxErrorsMD() MetricDescription {
return MetricDescription{
Namespace: bucketMetricNamespace,
Subsystem: requestsSubsystem,
Name: "5xx_" + errorsTotal,
Help: "Total number of S3 requests with (5xx) errors on a bucket",
Type: counterMetric,
}
}
func getBucketS3RequestsCanceledMD() MetricDescription {
return MetricDescription{
Namespace: bucketMetricNamespace,
Subsystem: requestsSubsystem,
Name: canceledTotal,
Help: "Total number of S3 requests that were canceled from the client while processing on a bucket",
Type: counterMetric,
}
}
func getILMNodeMetrics() *MetricsGroup {
mg := &MetricsGroup{
cacheInterval: 10 * time.Second,
@ -1456,7 +1506,7 @@ func getIAMNodeMetrics() *MetricsGroup {
Namespace: nodeMetricNamespace,
Subsystem: iamSubsystem,
Name: "since_last_sync_millis",
Help: "Time (in milliseconds) since last successful IAM data sync. This is set to 0 until the first sync after server start.",
Help: "Time (in milliseconds) since last successful IAM data sync.",
Type: gaugeMetric,
},
Value: float64(sinceLastSyncMillis),
@ -2072,6 +2122,47 @@ func getBucketUsageMetrics() *MetricsGroup {
})
}
httpStats := globalBucketHTTPStats.load(bucket)
for k, v := range httpStats.currentS3Requests.Load() {
metrics = append(metrics, Metric{
Description: getBucketS3RequestsInFlightMD(),
Value: float64(v),
VariableLabels: map[string]string{"bucket": bucket, "api": k},
})
}
for k, v := range httpStats.totalS3Requests.Load() {
metrics = append(metrics, Metric{
Description: getBucketS3RequestsTotalMD(),
Value: float64(v),
VariableLabels: map[string]string{"bucket": bucket, "api": k},
})
}
for k, v := range httpStats.totalS3Canceled.Load() {
metrics = append(metrics, Metric{
Description: getBucketS3RequestsCanceledMD(),
Value: float64(v),
VariableLabels: map[string]string{"bucket": bucket, "api": k},
})
}
for k, v := range httpStats.totalS34xxErrors.Load() {
metrics = append(metrics, Metric{
Description: getBucketS3Requests4xxErrorsMD(),
Value: float64(v),
VariableLabels: map[string]string{"bucket": bucket, "api": k},
})
}
for k, v := range httpStats.totalS35xxErrors.Load() {
metrics = append(metrics, Metric{
Description: getBucketS3Requests5xxErrorsMD(),
Value: float64(v),
VariableLabels: map[string]string{"bucket": bucket, "api": k},
})
}
if stats.hasReplicationUsage() {
for arn, stat := range stats.Stats {
metrics = append(metrics, Metric{

View File

@ -453,6 +453,7 @@ func (sys *NotificationSys) DeleteBucketMetadata(ctx context.Context, bucketName
globalBucketTargetSys.Delete(bucketName)
globalEventNotifier.RemoveNotification(bucketName)
globalBucketConnStats.delete(bucketName)
globalBucketHTTPStats.delete(bucketName)
if localMetacacheMgr != nil {
localMetacacheMgr.deleteBucketCache(bucketName)
}

View File

@ -533,6 +533,7 @@ func (s *peerRESTServer) DeleteBucketMetadataHandler(w http.ResponseWriter, r *h
globalBucketTargetSys.Delete(bucketName)
globalEventNotifier.RemoveNotification(bucketName)
globalBucketConnStats.delete(bucketName)
globalBucketHTTPStats.delete(bucketName)
if localMetacacheMgr != nil {
localMetacacheMgr.deleteBucketCache(bucketName)
}

View File

@ -1,111 +1,116 @@
# List of metrics reported cluster wide
Each metric includes a label for the server that calculated the metric.
Each metric has a label for the server that generated the metric.
Each metric includes a label for the server that calculated the metric. Each metric has a label for the server that generated the metric.
These metrics can be from any MinIO server once per collection.
These metrics can be obtained from any MinIO server once per collection.
| Name | Description |
|:---------------------------------------------|:----------------------------------------------------------------------------------------------------------------|
| `minio_audit_failed_messages` | Total number of messages that failed to send since start. |
| `minio_audit_target_queue_length` | Number of unsent messages in queue for target. |
| `minio_audit_total_messages` | Total number of messages sent since start. |
| `minio_bucket_objects_size_distribution` | Distribution of object sizes in the bucket, includes label for the bucket name. |
| `minio_bucket_quota_total_bytes` | Total bucket quota size in bytes. |
| `minio_bucket_replication_failed_bytes` | Total number of bytes failed at least once to replicate. |
| `minio_bucket_replication_failed_count` | Total number of objects which failed replication. |
| `minio_bucket_replication_latency_ms` | Replication latency in milliseconds. |
| `minio_bucket_replication_received_bytes` | Total number of bytes replicated to this bucket from another source bucket. |
| `minio_bucket_replication_sent_bytes` | Total number of bytes replicated to the target bucket. |
| `minio_bucket_traffic_received_bytes` | Total number of S3 bytes received for this bucket. |
| `minio_bucket_traffic_sent_bytes` | Total number of S3 bytes sent for this bucket. |
| `minio_bucket_usage_object_total` | Total number of objects. |
| `minio_bucket_usage_total_bytes` | Total bucket size in bytes. |
| `minio_bucket_requests_4xx_errors_total` | Total number of S3 requests with (4xx) errors on a bucket. |
| `minio_bucket_requests_5xx_errors_total` | Total number of S3 requests with (5xx) errors on a bucket. |
| `minio_bucket_requests_inflight_total` | Total number of S3 requests currently in flight on a bucket. |
| `minio_bucket_requests_total` | Total number of S3 requests on a bucket. |
| `minio_bucket_requests_canceled_total` | Total number S3 requests canceled by the client. |
| `minio_cache_hits_total` | Total number of drive cache hits. |
| `minio_cache_missed_total` | Total number of drive cache misses. |
| `minio_cache_sent_bytes` | Total number of bytes served from cache. |
| `minio_cache_total_bytes` | Total size of cache drive in bytes. |
| `minio_cache_usage_info` | Total percentage cache usage, value of 1 indicates high and 0 low, label level is set as well. |
| `minio_cache_used_bytes` | Current cache usage in bytes. |
| `minio_cluster_capacity_raw_free_bytes` | Total free capacity online in the cluster. |
| `minio_cluster_capacity_raw_total_bytes` | Total capacity online in the cluster. |
| `minio_cluster_capacity_usable_free_bytes` | Total free usable capacity online in the cluster. |
| `minio_cluster_capacity_usable_total_bytes` | Total usable capacity online in the cluster. |
| `minio_cluster_disk_offline_total` | Total drives offline. |
| `minio_cluster_disk_online_total` | Total drives online. |
| `minio_cluster_disk_total` | Total drives. |
| `minio_cluster_ilm_transitioned_bytes` | Total bytes transitioned to a tier. |
| `minio_cluster_ilm_transitioned_objects` | Total number of objects transitioned to a tier. |
| `minio_cluster_ilm_transitioned_versions` | Total number of versions transitioned to a tier. |
| `minio_cluster_kms_online` | Reports whether the KMS is online (1) or offline (0). |
| `minio_cluster_kms_request_error` | Number of KMS requests that failed due to some error. (HTTP 4xx status code). |
| `minio_cluster_kms_request_failure` | Number of KMS requests that failed due to some internal failure. (HTTP 5xx status code). |
| `minio_cluster_kms_request_success` | Number of KMS requests that succeeded. |
| `minio_cluster_kms_uptime` | The time the KMS has been up and running in seconds. |
| `minio_cluster_nodes_offline_total` | Total number of MinIO nodes offline. |
| `minio_cluster_nodes_online_total` | Total number of MinIO nodes online. |
| `minio_heal_objects_errors_total` | Objects for which healing failed in current self healing run. |
| `minio_heal_objects_heal_total` | Objects healed in current self healing run. |
| `minio_heal_objects_total` | Objects scanned in current self healing run. |
| `minio_heal_time_last_activity_nano_seconds` | Time elapsed (in nano seconds) since last self healing activity. |
| `minio_inter_node_traffic_dial_avg_time` | Average time of internodes TCP dial calls. |
| `minio_inter_node_traffic_dial_errors` | Total number of internode TCP dial timeouts and errors. |
| `minio_inter_node_traffic_errors_total` | Total number of failed internode calls. |
| `minio_inter_node_traffic_received_bytes` | Total number of bytes received from other peer nodes. |
| `minio_inter_node_traffic_sent_bytes` | Total number of bytes sent to the other peer nodes. |
| `minio_minio_update_percent` | Total percentage cache usage. |
| `minio_node_disk_free_bytes` | Total storage available on a drive. |
| `minio_node_disk_free_inodes` | Total free inodes. |
| `minio_node_disk_latency_us` | Average last minute latency in µs for drive API storage operations. |
| `minio_node_disk_offline_total` | Total drives offline. |
| `minio_node_disk_online_total` | Total drives online. |
| `minio_node_disk_total` | Total drives. |
| `minio_node_disk_total_bytes` | Total storage on a drive. |
| `minio_node_disk_used_bytes` | Total storage used on a drive. |
| `minio_node_file_descriptor_limit_total` | Limit on total number of open file descriptors for the MinIO Server process. |
| `minio_node_file_descriptor_open_total` | Total number of open file descriptors by the MinIO Server process. |
| `minio_node_go_routine_total` | Total number of go routines running. |
| `minio_node_iam_last_sync_duration_millis` | Last successful IAM data sync duration in milliseconds. |
| `minio_node_iam_since_last_sync_millis` | Time (in milliseconds) since last successful IAM data sync. |
| `minio_node_iam_sync_failures` | Number of failed IAM data syncs since server start. |
| `minio_node_iam_sync_successes` | Number of successful IAM data syncs since server start. |
| `minio_node_ilm_expiry_pending_tasks` | Number of pending ILM expiry tasks in the queue. |
| `minio_node_ilm_transition_active_tasks` | Number of active ILM transition tasks. |
| `minio_node_ilm_transition_pending_tasks` | Number of pending ILM transition tasks in the queue. |
| `minio_node_ilm_versions_scanned` | Total number of object versions checked for ilm actions since server start. |
| `minio_node_io_rchar_bytes` | Total bytes read by the process from the underlying storage system including cache, /proc/[pid]/io rchar. |
| `minio_node_io_read_bytes` | Total bytes read by the process from the underlying storage system, /proc/[pid]/io read_bytes. |
| `minio_node_io_wchar_bytes` | Total bytes written by the process to the underlying storage system including page cache, /proc/[pid]/io wchar. |
| `minio_node_io_write_bytes` | Total bytes written by the process to the underlying storage system, /proc/[pid]/io write_bytes. |
| `minio_node_process_cpu_total_seconds` | Total user and system CPU time spent in seconds. |
| `minio_node_process_resident_memory_bytes` | Resident memory size in bytes. |
| `minio_node_process_starttime_seconds` | Start time for MinIO process per node, time in seconds since Unix epoc. |
| `minio_node_process_uptime_seconds` | Uptime for MinIO process per node in seconds. |
| `minio_node_scanner_bucket_scans_finished` | Total number of bucket scans finished since server start. |
| `minio_node_scanner_bucket_scans_started` | Total number of bucket scans started since server start. |
| `minio_node_scanner_directories_scanned` | Total number of directories scanned since server start. |
| `minio_node_scanner_objects_scanned` | Total number of unique objects scanned since server start. |
| `minio_node_scanner_versions_scanned` | Total number of object versions scanned since server start. |
| `minio_node_syscall_read_total` | Total read SysCalls to the kernel. /proc/[pid]/io syscr. |
| `minio_node_syscall_write_total` | Total write SysCalls to the kernel. /proc/[pid]/io syscw. |
| `minio_notify_current_send_in_progress` | Number of concurrent async Send calls active to all targets. |
| `minio_notify_target_queue_length` | Number of unsent notifications in queue for target. |
| `minio_s3_requests_4xx_errors_total` | Total number S3 requests with (4xx) errors. |
| `minio_s3_requests_5xx_errors_total` | Total number S3 requests with (5xx) errors. |
| `minio_s3_requests_canceled_total` | Total number S3 requests canceled by the client. |
| `minio_s3_requests_errors_total` | Total number S3 requests with (4xx and 5xx) errors. |
| `minio_s3_requests_incoming_total` | Volatile number of total incoming S3 requests. |
| `minio_s3_requests_inflight_total` | Total number of S3 requests currently in flight. |
| `minio_s3_requests_rejected_auth_total` | Total number S3 requests rejected for auth failure. |
| `minio_s3_requests_rejected_header_total` | Total number S3 requests rejected for invalid header. |
| `minio_s3_requests_rejected_invalid_total` | Total number S3 invalid requests. |
| `minio_s3_requests_rejected_timestamp_total` | Total number S3 requests rejected for invalid timestamp. |
| `minio_s3_requests_total` | Total number S3 requests. |
| `minio_s3_requests_waiting_total` | Number of S3 requests in the waiting queue. |
| `minio_s3_time_ttfb_seconds_distribution` | Distribution of the time to first byte across API calls. |
| `minio_s3_traffic_received_bytes` | Total number of s3 bytes received. |
| `minio_s3_traffic_sent_bytes` | Total number of s3 bytes sent. |
| `minio_software_commit_info` | Git commit hash for the MinIO release. |
| `minio_software_version_info` | MinIO Release tag for the server. |
| `minio_usage_last_activity_nano_seconds` | Time elapsed (in nano seconds) since last scan activity. |
| Name | Description |
|:------|:------------|
| `minio_audit_failed_messages` | Total number of messages that failed to send since start. |
| `minio_audit_target_queue_length` | Number of unsent messages in queue for target. |
| `minio_audit_total_messages` | Total number of messages sent since start. |
| `minio_bucket_objects_size_distribution` | Distribution of object sizes in the bucket, includes label for the bucket name. |
| `minio_bucket_quota_total_bytes` | Total bucket quota size in bytes. |
| `minio_bucket_replication_failed_bytes` | Total number of bytes failed at least once to replicate. |
| `minio_bucket_replication_failed_count` | Total number of objects which failed replication. |
| `minio_bucket_replication_latency_ms` | Replication latency in milliseconds. |
| `minio_bucket_replication_received_bytes` | Total number of bytes replicated to this bucket from another source bucket. |
| `minio_bucket_replication_sent_bytes` | Total number of bytes replicated to the target bucket. |
| `minio_bucket_traffic_received_bytes` | Total number of S3 bytes received for this bucket. |
| `minio_bucket_traffic_sent_bytes` | Total number of S3 bytes sent for this bucket. |
| `minio_bucket_usage_object_total` | Total number of objects. |
| `minio_bucket_usage_total_bytes` | Total bucket size in bytes. |
| `minio_cache_hits_total` | Total number of drive cache hits. |
| `minio_cache_missed_total` | Total number of drive cache misses. |
| `minio_cache_sent_bytes` | Total number of bytes served from cache. |
| `minio_cache_total_bytes` | Total size of cache drive in bytes. |
| `minio_cache_usage_info` | Total percentage cache usage, value of 1 indicates high and 0 low, label level is set as well. |
| `minio_cache_used_bytes` | Current cache usage in bytes. |
| `minio_cluster_capacity_raw_free_bytes` | Total free capacity online in the cluster. |
| `minio_cluster_capacity_raw_total_bytes` | Total capacity online in the cluster. |
| `minio_cluster_capacity_usable_free_bytes` | Total free usable capacity online in the cluster. |
| `minio_cluster_capacity_usable_total_bytes` | Total usable capacity online in the cluster. |
| `minio_cluster_disk_offline_total` | Total drives offline. |
| `minio_cluster_disk_online_total` | Total drives online. |
| `minio_cluster_disk_total` | Total drives. |
| `minio_cluster_ilm_transitioned_bytes` | Total bytes transitioned to a tier. |
| `minio_cluster_ilm_transitioned_objects` | Total number of objects transitioned to a tier. |
| `minio_cluster_ilm_transitioned_versions` | Total number of versions transitioned to a tier. |
| `minio_cluster_kms_online` | Reports whether the KMS is online (1) or offline (0). |
| `minio_cluster_kms_request_error` | Number of KMS requests that failed due to some error. (HTTP 4xx status code). |
| `minio_cluster_kms_request_failure` | Number of KMS requests that failed due to some internal failure. (HTTP 5xx status code). |
| `minio_cluster_kms_request_success` | Number of KMS requests that succeeded. |
| `minio_cluster_kms_uptime` | The time the KMS has been up and running in seconds. |
| `minio_cluster_nodes_offline_total` | Total number of MinIO nodes offline. |
| `minio_cluster_nodes_online_total` | Total number of MinIO nodes online. |
| `minio_heal_objects_errors_total` | Objects for which healing failed in current self healing run. |
| `minio_heal_objects_heal_total` | Objects healed in current self healing run. |
| `minio_heal_objects_total` | Objects scanned in current self healing run. |
| `minio_heal_time_last_activity_nano_seconds` | Time elapsed (in nano seconds) since last self healing activity. This is set to -1 until initial self heal activity. |
| `minio_inter_node_traffic_dial_avg_time` | Average time of internodes TCP dial calls. |
| `minio_inter_node_traffic_dial_errors` | Total number of internode TCP dial timeouts and errors. |
| `minio_inter_node_traffic_errors_total` | Total number of failed internode calls. |
| `minio_inter_node_traffic_received_bytes` | Total number of bytes received from other peer nodes. |
| `minio_inter_node_traffic_sent_bytes` | Total number of bytes sent to the other peer nodes. |
| `minio_minio_update_percent` | Total percentage cache usage. |
| `minio_node_disk_free_bytes` | Total storage available on a drive. |
| `minio_node_disk_free_inodes` | Total free inodes. |
| `minio_node_disk_latency_us` | Average last minute latency in µs for drive API storage operations. |
| `minio_node_disk_offline_total` | Total drives offline. |
| `minio_node_disk_online_total` | Total drives online. |
| `minio_node_disk_total` | Total drives. |
| `minio_node_disk_total_bytes` | Total storage on a drive. |
| `minio_node_disk_used_bytes` | Total storage used on a drive. |
| `minio_node_file_descriptor_limit_total` | Limit on total number of open file descriptors for the MinIO Server process. |
| `minio_node_file_descriptor_open_total` | Total number of open file descriptors by the MinIO Server process. |
| `minio_node_go_routine_total` | Total number of go routines running. |
| `minio_node_iam_last_sync_duration_millis` | Last successful IAM data sync duration in milliseconds. |
| `minio_node_iam_since_last_sync_millis` | Time (in milliseconds) since last successful IAM data sync. This is set to 0 until the first sync after server start. |
| `minio_node_iam_sync_failures` | Number of failed IAM data syncs since server start. |
| `minio_node_iam_sync_successes` | Number of successful IAM data syncs since server start. |
| `minio_node_ilm_expiry_pending_tasks` | Number of pending ILM expiry tasks in the queue. |
| `minio_node_ilm_transition_active_tasks` | Number of active ILM transition tasks. |
| `minio_node_ilm_transition_pending_tasks` | Number of pending ILM transition tasks in the queue. |
| `minio_node_ilm_versions_scanned` | Total number of object versions checked for ilm actions since server start. |
| `minio_node_io_rchar_bytes` | Total bytes read by the process from the underlying storage system including cache, /proc/[pid]/io rchar. |
| `minio_node_io_read_bytes` | Total bytes read by the process from the underlying storage system, /proc/[pid]/io read_bytes. |
| `minio_node_io_wchar_bytes` | Total bytes written by the process to the underlying storage system including page cache, /proc/[pid]/io wchar. |
| `minio_node_io_write_bytes` | Total bytes written by the process to the underlying storage system, /proc/[pid]/io write_bytes. |
| `minio_node_process_cpu_total_seconds` | Total user and system CPU time spent in seconds. |
| `minio_node_process_resident_memory_bytes` | Resident memory size in bytes. |
| `minio_node_process_starttime_seconds` | Start time for MinIO process per node, time in seconds since Unix epoc. |
| `minio_node_process_uptime_seconds` | Uptime for MinIO process per node in seconds. |
| `minio_node_scanner_bucket_scans_finished` | Total number of bucket scans finished since server start. |
| `minio_node_scanner_bucket_scans_started` | Total number of bucket scans started since server start. |
| `minio_node_scanner_directories_scanned` | Total number of directories scanned since server start. |
| `minio_node_scanner_objects_scanned` | Total number of unique objects scanned since server start. |
| `minio_node_scanner_versions_scanned` | Total number of object versions scanned since server start. |
| `minio_node_syscall_read_total` | Total read SysCalls to the kernel. /proc/[pid]/io syscr. |
| `minio_node_syscall_write_total` | Total write SysCalls to the kernel. /proc/[pid]/io syscw. |
| `minio_notify_current_send_in_progress` | Number of concurrent async Send calls active to all targets. |
| `minio_notify_target_queue_length` | Number of unsent notifications in queue for target. |
| `minio_s3_requests_4xx_errors_total` | Total number S3 requests with (4xx) errors. |
| `minio_s3_requests_5xx_errors_total` | Total number S3 requests with (5xx) errors. |
| `minio_s3_requests_canceled_total` | Total number S3 requests that were canceled from the client while processing. |
| `minio_s3_requests_errors_total` | Total number S3 requests with (4xx and 5xx) errors. |
| `minio_s3_requests_incoming_total` | Volatile number of total incoming S3 requests. |
| `minio_s3_requests_inflight_total` | Total number of S3 requests currently in flight. |
| `minio_s3_requests_rejected_auth_total` | Total number S3 requests rejected for auth failure. |
| `minio_s3_requests_rejected_header_total` | Total number S3 requests rejected for invalid header. |
| `minio_s3_requests_rejected_invalid_total` | Total number S3 invalid requests. |
| `minio_s3_requests_rejected_timestamp_total` | Total number S3 requests rejected for invalid timestamp. |
| `minio_s3_requests_total` | Total number S3 requests. |
| `minio_s3_requests_waiting_total` | Number of S3 requests in the waiting queue. |
| `minio_s3_time_ttfb_seconds_distribution` | Distribution of the time to first byte across API calls. |
| `minio_s3_traffic_received_bytes` | Total number of s3 bytes received. |
| `minio_s3_traffic_sent_bytes` | Total number of s3 bytes sent. |
| `minio_software_commit_info` | Git commit hash for the MinIO release. |
| `minio_software_version_info` | MinIO Release tag for the server. |
| `minio_usage_last_activity_nano_seconds` | Time elapsed (in nano seconds) since last scan activity. This is set to 0 until first scan cycle. |