mirror of
https://github.com/minio/minio.git
synced 2025-01-25 21:53:16 -05:00
Add open FD and FD limit to cluster metrics (#11328)
This commit is contained in:
parent
43f973c4cf
commit
7575c24037
@ -20,6 +20,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@ -28,6 +29,7 @@ import (
|
|||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
dto "github.com/prometheus/client_model/go"
|
dto "github.com/prometheus/client_model/go"
|
||||||
|
"github.com/prometheus/procfs"
|
||||||
)
|
)
|
||||||
|
|
||||||
// MetricNamespace is top level grouping of metrics to create the metric name.
|
// MetricNamespace is top level grouping of metrics to create the metric name.
|
||||||
@ -51,43 +53,55 @@ const (
|
|||||||
capacityRawSubsystem MetricSubsystem = "capacity_raw"
|
capacityRawSubsystem MetricSubsystem = "capacity_raw"
|
||||||
capacityUsableSubsystem MetricSubsystem = "capacity_usable"
|
capacityUsableSubsystem MetricSubsystem = "capacity_usable"
|
||||||
diskSubsystem MetricSubsystem = "disk"
|
diskSubsystem MetricSubsystem = "disk"
|
||||||
|
goRoutines MetricSubsystem = "go_routine"
|
||||||
nodesSubsystem MetricSubsystem = "nodes"
|
nodesSubsystem MetricSubsystem = "nodes"
|
||||||
objectsSubsystem MetricSubsystem = "objects"
|
objectsSubsystem MetricSubsystem = "objects"
|
||||||
|
fileDescriptorSubsystem MetricSubsystem = "file_descriptor"
|
||||||
|
ioSubsystem MetricSubsystem = "io"
|
||||||
replicationSubsystem MetricSubsystem = "replication"
|
replicationSubsystem MetricSubsystem = "replication"
|
||||||
requestsSubsystem MetricSubsystem = "requests"
|
requestsSubsystem MetricSubsystem = "requests"
|
||||||
timeSubsystem MetricSubsystem = "time"
|
timeSubsystem MetricSubsystem = "time"
|
||||||
trafficSubsystem MetricSubsystem = "traffic"
|
trafficSubsystem MetricSubsystem = "traffic"
|
||||||
|
sysCallSubsystem MetricSubsystem = "syscall"
|
||||||
usageSubsystem MetricSubsystem = "usage"
|
usageSubsystem MetricSubsystem = "usage"
|
||||||
softwareSubsystem MetricSubsystem = "software"
|
softwareSubsystem MetricSubsystem = "software"
|
||||||
)
|
)
|
||||||
|
|
||||||
// MetricNames are the individual names for the metric.
|
// MetricName are the individual names for the metric.
|
||||||
type MetricNames string
|
type MetricName string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
errorsTotal MetricNames = "error_total"
|
errorsTotal MetricName = "error_total"
|
||||||
healTotal MetricNames = "heal_total"
|
healTotal MetricName = "heal_total"
|
||||||
hitsTotal MetricNames = "hits_total"
|
hitsTotal MetricName = "hits_total"
|
||||||
inflightTotal MetricNames = "inflight_total"
|
inflightTotal MetricName = "inflight_total"
|
||||||
missedTotal MetricNames = "missed_total"
|
limitTotal MetricName = "limit_total"
|
||||||
objectTotal MetricNames = "object_total"
|
missedTotal MetricName = "missed_total"
|
||||||
offlineTotal MetricNames = "offline_total"
|
objectTotal MetricName = "object_total"
|
||||||
onlineTotal MetricNames = "online_total"
|
offlineTotal MetricName = "offline_total"
|
||||||
total MetricNames = "total"
|
onlineTotal MetricName = "online_total"
|
||||||
|
openTotal MetricName = "open_total"
|
||||||
|
readTotal MetricName = "read_total"
|
||||||
|
writeTotal MetricName = "write_total"
|
||||||
|
total MetricName = "total"
|
||||||
|
|
||||||
failedBytes MetricNames = "failed_bytes"
|
failedBytes MetricName = "failed_bytes"
|
||||||
freeBytes MetricNames = "free_bytes"
|
freeBytes MetricName = "free_bytes"
|
||||||
pendingBytes MetricNames = "pending_bytes"
|
pendingBytes MetricName = "pending_bytes"
|
||||||
receivedBytes MetricNames = "received_bytes"
|
readBytes MetricName = "read_bytes"
|
||||||
sentBytes MetricNames = "sent_bytes"
|
rcharBytes MetricName = "rchar_bytes"
|
||||||
totalBytes MetricNames = "total_bytes"
|
receivedBytes MetricName = "received_bytes"
|
||||||
usedBytes MetricNames = "used_bytes"
|
sentBytes MetricName = "sent_bytes"
|
||||||
|
totalBytes MetricName = "total_bytes"
|
||||||
|
usedBytes MetricName = "used_bytes"
|
||||||
|
writeBytes MetricName = "write_bytes"
|
||||||
|
wcharBytes MetricName = "wchar_bytes"
|
||||||
|
|
||||||
usagePercent MetricNames = "update_percent"
|
usagePercent MetricName = "update_percent"
|
||||||
|
|
||||||
commitInfo MetricNames = "commit_info"
|
commitInfo MetricName = "commit_info"
|
||||||
usageInfo MetricNames = "usage_info"
|
usageInfo MetricName = "usage_info"
|
||||||
versionInfo MetricNames = "version_info"
|
versionInfo MetricName = "version_info"
|
||||||
|
|
||||||
sizeDistribution = "size_distribution"
|
sizeDistribution = "size_distribution"
|
||||||
ttfbDistribution = "ttbf_seconds_distribution"
|
ttfbDistribution = "ttbf_seconds_distribution"
|
||||||
@ -112,7 +126,7 @@ const (
|
|||||||
type MetricDescription struct {
|
type MetricDescription struct {
|
||||||
Namespace MetricNamespace `json:"MetricNamespace"`
|
Namespace MetricNamespace `json:"MetricNamespace"`
|
||||||
Subsystem MetricSubsystem `json:"Subsystem"`
|
Subsystem MetricSubsystem `json:"Subsystem"`
|
||||||
Name MetricNames `json:"MetricNames"`
|
Name MetricName `json:"MetricName"`
|
||||||
Help string `json:"Help"`
|
Help string `json:"Help"`
|
||||||
Type GaugeMetricType `json:"Type"`
|
Type GaugeMetricType `json:"Type"`
|
||||||
}
|
}
|
||||||
@ -157,12 +171,14 @@ func GetAllGenerators() []MetricsGenerator {
|
|||||||
// GetGeneratorsForPeer - gets the generators to report to peer.
|
// GetGeneratorsForPeer - gets the generators to report to peer.
|
||||||
func GetGeneratorsForPeer() []MetricsGenerator {
|
func GetGeneratorsForPeer() []MetricsGenerator {
|
||||||
g := []MetricsGenerator{
|
g := []MetricsGenerator{
|
||||||
getLocalStorageMetrics,
|
getCacheMetrics,
|
||||||
getMinioVersionMetrics,
|
getGoMetrics,
|
||||||
getHTTPMetrics,
|
getHTTPMetrics,
|
||||||
|
getLocalStorageMetrics,
|
||||||
|
getMinioProcMetrics,
|
||||||
|
getMinioVersionMetrics,
|
||||||
getNetworkMetrics,
|
getNetworkMetrics,
|
||||||
getS3TTFBMetric,
|
getS3TTFBMetric,
|
||||||
getCacheMetrics,
|
|
||||||
}
|
}
|
||||||
return g
|
return g
|
||||||
}
|
}
|
||||||
@ -534,7 +550,168 @@ func getS3TTFBDistributionMD() MetricDescription {
|
|||||||
Type: gaugeMetric,
|
Type: gaugeMetric,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func getMinioFDOpenMD() MetricDescription {
|
||||||
|
return MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: fileDescriptorSubsystem,
|
||||||
|
Name: openTotal,
|
||||||
|
Help: "Total number of open file descriptors by the MinIO Server process.",
|
||||||
|
Type: gaugeMetric,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func getMinioFDLimitMD() MetricDescription {
|
||||||
|
return MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: fileDescriptorSubsystem,
|
||||||
|
Name: limitTotal,
|
||||||
|
Help: "Limit on total number of open file descriptors for the MinIO Server process.",
|
||||||
|
Type: gaugeMetric,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func getMinioProcessIOWriteBytesMD() MetricDescription {
|
||||||
|
return MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: ioSubsystem,
|
||||||
|
Name: writeBytes,
|
||||||
|
Help: "Total bytes written by the process to the underlying storage system, /proc/[pid]/io write_bytes",
|
||||||
|
Type: counterMetric,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func getMinioProcessIOReadBytesMD() MetricDescription {
|
||||||
|
return MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: ioSubsystem,
|
||||||
|
Name: readBytes,
|
||||||
|
Help: "Total bytes read by the process from the underlying storage system, /proc/[pid]/io read_bytes",
|
||||||
|
Type: counterMetric,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func getMinioProcessIOWriteCachedBytesMD() MetricDescription {
|
||||||
|
return MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: ioSubsystem,
|
||||||
|
Name: wcharBytes,
|
||||||
|
Help: "Total bytes written by the process to the underlying storage system including page cache, /proc/[pid]/io wchar",
|
||||||
|
Type: counterMetric,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func getMinioProcessIOReadCachedBytesMD() MetricDescription {
|
||||||
|
return MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: ioSubsystem,
|
||||||
|
Name: rcharBytes,
|
||||||
|
Help: "Total bytes read by the process from the underlying storage system including cache, /proc/[pid]/io rchar",
|
||||||
|
Type: counterMetric,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func getMinIOProcessSysCallRMD() MetricDescription {
|
||||||
|
return MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: sysCallSubsystem,
|
||||||
|
Name: readTotal,
|
||||||
|
Help: "Total read SysCalls to the kernel. /proc/[pid]/io syscr",
|
||||||
|
Type: counterMetric,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func getMinIOProcessSysCallWMD() MetricDescription {
|
||||||
|
return MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: sysCallSubsystem,
|
||||||
|
Name: writeTotal,
|
||||||
|
Help: "Total write SysCalls to the kernel. /proc/[pid]/io syscw",
|
||||||
|
Type: counterMetric,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func getMinIOGORoutineCountMD() MetricDescription {
|
||||||
|
return MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: goRoutines,
|
||||||
|
Name: total,
|
||||||
|
Help: "Total number of go routines running.",
|
||||||
|
Type: gaugeMetric,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func getMinioProcMetrics() MetricsGroup {
|
||||||
|
return MetricsGroup{
|
||||||
|
Metrics: []Metric{},
|
||||||
|
initialize: func(ctx context.Context, metrics *MetricsGroup) {
|
||||||
|
p, err := procfs.Self()
|
||||||
|
if err != nil {
|
||||||
|
logger.LogOnceIf(ctx, err, nodeMetricNamespace)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var openFDs int
|
||||||
|
openFDs, err = p.FileDescriptorsLen()
|
||||||
|
if err != nil {
|
||||||
|
logger.LogOnceIf(ctx, err, getMinioFDOpenMD())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
l, err := p.Limits()
|
||||||
|
if err != nil {
|
||||||
|
logger.LogOnceIf(ctx, err, getMinioFDLimitMD())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
io, err := p.IO()
|
||||||
|
if err != nil {
|
||||||
|
logger.LogOnceIf(ctx, err, ioSubsystem)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics.Metrics = append(metrics.Metrics,
|
||||||
|
Metric{
|
||||||
|
Description: getMinioFDOpenMD(),
|
||||||
|
Value: float64(openFDs),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
metrics.Metrics = append(metrics.Metrics,
|
||||||
|
Metric{
|
||||||
|
Description: getMinioFDLimitMD(),
|
||||||
|
Value: float64(l.OpenFiles),
|
||||||
|
})
|
||||||
|
metrics.Metrics = append(metrics.Metrics,
|
||||||
|
Metric{
|
||||||
|
Description: getMinIOProcessSysCallRMD(),
|
||||||
|
Value: float64(io.SyscR),
|
||||||
|
})
|
||||||
|
metrics.Metrics = append(metrics.Metrics,
|
||||||
|
Metric{
|
||||||
|
Description: getMinIOProcessSysCallWMD(),
|
||||||
|
Value: float64(io.SyscW),
|
||||||
|
})
|
||||||
|
metrics.Metrics = append(metrics.Metrics,
|
||||||
|
Metric{
|
||||||
|
Description: getMinioProcessIOReadBytesMD(),
|
||||||
|
Value: float64(io.ReadBytes),
|
||||||
|
})
|
||||||
|
metrics.Metrics = append(metrics.Metrics,
|
||||||
|
Metric{
|
||||||
|
Description: getMinioProcessIOWriteBytesMD(),
|
||||||
|
Value: float64(io.WriteBytes),
|
||||||
|
})
|
||||||
|
metrics.Metrics = append(metrics.Metrics,
|
||||||
|
Metric{
|
||||||
|
Description: getMinioProcessIOReadCachedBytesMD(),
|
||||||
|
Value: float64(io.RChar),
|
||||||
|
})
|
||||||
|
metrics.Metrics = append(metrics.Metrics,
|
||||||
|
Metric{
|
||||||
|
Description: getMinioProcessIOWriteCachedBytesMD(),
|
||||||
|
Value: float64(io.WChar),
|
||||||
|
})
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func getGoMetrics() MetricsGroup {
|
||||||
|
return MetricsGroup{
|
||||||
|
Metrics: []Metric{},
|
||||||
|
initialize: func(ctx context.Context, metrics *MetricsGroup) {
|
||||||
|
metrics.Metrics = append(metrics.Metrics, Metric{
|
||||||
|
Description: getMinIOGORoutineCountMD(),
|
||||||
|
Value: float64(runtime.NumGoroutine()),
|
||||||
|
})
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
func getS3TTFBMetric() MetricsGroup {
|
func getS3TTFBMetric() MetricsGroup {
|
||||||
return MetricsGroup{
|
return MetricsGroup{
|
||||||
Metrics: []Metric{},
|
Metrics: []Metric{},
|
||||||
@ -1171,9 +1348,18 @@ func metricsNodeHandler() http.Handler {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
logger.CriticalIf(GlobalContext, err)
|
logger.CriticalIf(GlobalContext, err)
|
||||||
}
|
}
|
||||||
|
err = registry.Register(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{
|
||||||
|
Namespace: minioNamespace,
|
||||||
|
ReportErrors: true,
|
||||||
|
}))
|
||||||
|
if err != nil {
|
||||||
|
logger.CriticalIf(GlobalContext, err)
|
||||||
|
}
|
||||||
|
err = registry.Register(prometheus.NewGoCollector())
|
||||||
|
if err != nil {
|
||||||
|
logger.CriticalIf(GlobalContext, err)
|
||||||
|
}
|
||||||
gatherers := prometheus.Gatherers{
|
gatherers := prometheus.Gatherers{
|
||||||
prometheus.DefaultGatherer,
|
|
||||||
registry,
|
registry,
|
||||||
}
|
}
|
||||||
// Delegate http serving to Prometheus client library, which will call collector.Collect.
|
// Delegate http serving to Prometheus client library, which will call collector.Collect.
|
||||||
|
@ -31,6 +31,14 @@ These metrics can be from any MinIO server once per collection.
|
|||||||
|`minio_node_disk_free_bytes` |Total storage available on a disk. |
|
|`minio_node_disk_free_bytes` |Total storage available on a disk. |
|
||||||
|`minio_node_disk_total_bytes` |Total storage on a disk. |
|
|`minio_node_disk_total_bytes` |Total storage on a disk. |
|
||||||
|`minio_node_disk_used_bytes` |Total storage used on a disk. |
|
|`minio_node_disk_used_bytes` |Total storage used on a disk. |
|
||||||
|
|`minio_node_file_descriptor_limit_total` |Limit on total number of open file descriptors for the MinIO Server process. |
|
||||||
|
|`minio_node_file_descriptor_open_total` |Total number of open file descriptors by the MinIO Server process. |
|
||||||
|
|`minio_node_io_rchar_bytes` |Total bytes read by the process from the underlying storage system including cache, /proc/[pid]/io rchar |
|
||||||
|
|`minio_node_io_read_bytes` |Total bytes read by the process from the underlying storage system, /proc/[pid]/io read_bytes |
|
||||||
|
|`minio_node_io_wchar_bytes` |Total bytes written by the process to the underlying storage system including page cache, /proc/[pid]/io wchar |
|
||||||
|
|`minio_node_io_write_bytes` |Total bytes written by the process to the underlying storage system, /proc/[pid]/io write_bytes |
|
||||||
|
|`minio_node_syscall_read_total` |Total read SysCalls to the kernel. /proc/[pid]/io syscr |
|
||||||
|
|`minio_node_syscall_write_total` |Total write SysCalls to the kernel. /proc/[pid]/io syscw |
|
||||||
|`minio_s3_requests_error_total` |Total number S3 requests with errors |
|
|`minio_s3_requests_error_total` |Total number S3 requests with errors |
|
||||||
|`minio_s3_requests_inflight_total` |Total number of S3 requests currently in flight. |
|
|`minio_s3_requests_inflight_total` |Total number of S3 requests currently in flight. |
|
||||||
|`minio_s3_requests_total` |Total number S3 requests |
|
|`minio_s3_requests_total` |Total number S3 requests |
|
||||||
|
1
go.mod
1
go.mod
@ -66,6 +66,7 @@ require (
|
|||||||
github.com/pkg/errors v0.9.1
|
github.com/pkg/errors v0.9.1
|
||||||
github.com/prometheus/client_golang v1.8.0
|
github.com/prometheus/client_golang v1.8.0
|
||||||
github.com/prometheus/client_model v0.2.0
|
github.com/prometheus/client_model v0.2.0
|
||||||
|
github.com/prometheus/procfs v0.2.0
|
||||||
github.com/rjeczalik/notify v0.9.2
|
github.com/rjeczalik/notify v0.9.2
|
||||||
github.com/rs/cors v1.7.0
|
github.com/rs/cors v1.7.0
|
||||||
github.com/secure-io/sio-go v0.3.0
|
github.com/secure-io/sio-go v0.3.0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user