mirror of
https://github.com/minio/minio.git
synced 2025-01-11 23:13:23 -05:00
Export tier metrics (#18678)
minio_node_tier_ttlb_seconds - Distribution of time to last byte for streaming objects from warm tier minio_node_tier_requests_success - Number of requests to download object from warm tier that were successful minio_node_tier_requests_failure - Number of requests to download object from warm tier that failed
This commit is contained in:
parent
b1a109a611
commit
56b7045c20
@ -507,9 +507,13 @@ func auditTierActions(ctx context.Context, tier string, bytes int64) func(err er
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
op.TimeToResponseNS = time.Since(startTime).Nanoseconds()
|
||||
since := time.Since(startTime)
|
||||
op.TimeToResponseNS = since.Nanoseconds()
|
||||
globalTierMetrics.Observe(tier, since)
|
||||
globalTierMetrics.logSuccess(tier)
|
||||
} else {
|
||||
op.Error = err.Error()
|
||||
globalTierMetrics.logFailure(tier)
|
||||
}
|
||||
|
||||
logger.GetReqInfo(ctx).AppendTags("tierStats", op)
|
||||
|
@ -90,6 +90,7 @@ func init() {
|
||||
getNetworkMetrics(),
|
||||
getMinioVersionMetrics(),
|
||||
getS3TTFBMetric(),
|
||||
getTierMetrics(),
|
||||
getNotificationMetrics(),
|
||||
getDistLockMetrics(),
|
||||
getIAMNodeMetrics(),
|
||||
@ -155,6 +156,7 @@ const (
|
||||
usageSubsystem MetricSubsystem = "usage"
|
||||
quotaSubsystem MetricSubsystem = "quota"
|
||||
ilmSubsystem MetricSubsystem = "ilm"
|
||||
tierSubsystem MetricSubsystem = "tier"
|
||||
scannerSubsystem MetricSubsystem = "scanner"
|
||||
iamSubsystem MetricSubsystem = "iam"
|
||||
kmsSubsystem MetricSubsystem = "kms"
|
||||
@ -246,6 +248,7 @@ const (
|
||||
sizeDistribution = "size_distribution"
|
||||
versionDistribution = "version_distribution"
|
||||
ttfbDistribution = "seconds_distribution"
|
||||
ttlbDistribution = "ttlb_seconds_distribution"
|
||||
|
||||
lastActivityTime = "last_activity_nano_seconds"
|
||||
startTime = "starttime_seconds"
|
||||
@ -262,6 +265,9 @@ const (
|
||||
transitionedObjects MetricName = "transitioned_objects"
|
||||
transitionedVersions MetricName = "transitioned_versions"
|
||||
|
||||
tierRequestsSuccess MetricName = "requests_success"
|
||||
tierRequestsFailure MetricName = "requests_failure"
|
||||
|
||||
kmsOnline = "online"
|
||||
kmsRequestsSuccess = "request_success"
|
||||
kmsRequestsError = "request_error"
|
||||
@ -1658,6 +1664,16 @@ func getS3TTFBMetric() *MetricsGroup {
|
||||
return mg
|
||||
}
|
||||
|
||||
func getTierMetrics() *MetricsGroup {
|
||||
mg := &MetricsGroup{
|
||||
cacheInterval: 10 * time.Second,
|
||||
}
|
||||
mg.RegisterRead(func(ctx context.Context) []Metric {
|
||||
return globalTierMetrics.Report()
|
||||
})
|
||||
return mg
|
||||
}
|
||||
|
||||
func getTransitionPendingTasksMD() MetricDescription {
|
||||
return MetricDescription{
|
||||
Namespace: nodeMetricNamespace,
|
||||
|
94
cmd/tier.go
94
cmd/tier.go
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2015-2021 MinIO, Inc.
|
||||
// Copyright (c) 2015-2023 MinIO, Inc.
|
||||
//
|
||||
// This file is part of MinIO Object Storage stack
|
||||
//
|
||||
@ -27,11 +27,13 @@ import (
|
||||
"path"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/minio/madmin-go/v3"
|
||||
"github.com/minio/minio/internal/crypto"
|
||||
"github.com/minio/minio/internal/hash"
|
||||
"github.com/minio/minio/internal/kms"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
//go:generate msgp -file $GOFILE
|
||||
@ -80,6 +82,96 @@ type TierConfigMgr struct {
|
||||
Tiers map[string]madmin.TierConfig `json:"tiers"`
|
||||
}
|
||||
|
||||
type tierMetrics struct {
|
||||
sync.RWMutex // protects requestsCount only
|
||||
requestsCount map[string]struct {
|
||||
success int64
|
||||
failure int64
|
||||
}
|
||||
histogram *prometheus.HistogramVec
|
||||
}
|
||||
|
||||
var globalTierMetrics = tierMetrics{
|
||||
requestsCount: make(map[string]struct {
|
||||
success int64
|
||||
failure int64
|
||||
}),
|
||||
histogram: prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Name: "tier_ttlb_seconds",
|
||||
Help: "Time taken by requests served by warm tier",
|
||||
Buckets: []float64{0.01, 0.1, 1, 2, 5, 10, 60, 5 * 60, 15 * 60, 30 * 60},
|
||||
}, []string{"tier"}),
|
||||
}
|
||||
|
||||
func (t *tierMetrics) Observe(tier string, dur time.Duration) {
|
||||
t.histogram.With(prometheus.Labels{"tier": tier}).Observe(dur.Seconds())
|
||||
}
|
||||
|
||||
func (t *tierMetrics) logSuccess(tier string) {
|
||||
t.Lock()
|
||||
defer t.Unlock()
|
||||
|
||||
stat := t.requestsCount[tier]
|
||||
stat.success++
|
||||
t.requestsCount[tier] = stat
|
||||
}
|
||||
|
||||
func (t *tierMetrics) logFailure(tier string) {
|
||||
t.Lock()
|
||||
defer t.Unlock()
|
||||
|
||||
stat := t.requestsCount[tier]
|
||||
stat.failure++
|
||||
t.requestsCount[tier] = stat
|
||||
}
|
||||
|
||||
var (
|
||||
// {minio_node}_{tier}_{ttlb_seconds_distribution}
|
||||
tierTTLBMD = MetricDescription{
|
||||
Namespace: nodeMetricNamespace,
|
||||
Subsystem: tierSubsystem,
|
||||
Name: ttlbDistribution,
|
||||
Help: "Distribution of time to last byte for objects downloaded from warm tier",
|
||||
Type: gaugeMetric,
|
||||
}
|
||||
|
||||
// {minio_node}_{tier}_{requests_success}
|
||||
tierRequestsSuccessMD = MetricDescription{
|
||||
Namespace: nodeMetricNamespace,
|
||||
Subsystem: tierSubsystem,
|
||||
Name: tierRequestsSuccess,
|
||||
Help: "Number of requests to download object from warm tier that were successful",
|
||||
Type: counterMetric,
|
||||
}
|
||||
// {minio_node}_{tier}_{requests_failure}
|
||||
tierRequestsFailureMD = MetricDescription{
|
||||
Namespace: nodeMetricNamespace,
|
||||
Subsystem: tierSubsystem,
|
||||
Name: tierRequestsFailure,
|
||||
Help: "Number of requests to download object from warm tier that failed",
|
||||
Type: counterMetric,
|
||||
}
|
||||
)
|
||||
|
||||
func (t *tierMetrics) Report() []Metric {
|
||||
metrics := getHistogramMetrics(t.histogram, tierTTLBMD)
|
||||
t.RLock()
|
||||
defer t.RUnlock()
|
||||
for tier, stat := range t.requestsCount {
|
||||
metrics = append(metrics, Metric{
|
||||
Description: tierRequestsSuccessMD,
|
||||
Value: float64(stat.success),
|
||||
VariableLabels: map[string]string{"tier": tier},
|
||||
})
|
||||
metrics = append(metrics, Metric{
|
||||
Description: tierRequestsFailureMD,
|
||||
Value: float64(stat.failure),
|
||||
VariableLabels: map[string]string{"tier": tier},
|
||||
})
|
||||
}
|
||||
return metrics
|
||||
}
|
||||
|
||||
// IsTierValid returns true if there exists a remote tier by name tierName,
|
||||
// otherwise returns false.
|
||||
func (config *TierConfigMgr) IsTierValid(tierName string) bool {
|
||||
|
52
cmd/tier_test.go
Normal file
52
cmd/tier_test.go
Normal file
@ -0,0 +1,52 @@
|
||||
// Copyright (c) 2015-2023 MinIO, Inc.
|
||||
//
|
||||
// This file is part of MinIO Object Storage stack
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestTierMetrics(t *testing.T) {
|
||||
tier := "WARM-1"
|
||||
globalTierMetrics.Observe(tier, 200*time.Millisecond)
|
||||
expSuccess := 10
|
||||
expFailure := 5
|
||||
for i := 0; i < expSuccess; i++ {
|
||||
globalTierMetrics.logSuccess(tier)
|
||||
}
|
||||
for i := 0; i < expFailure; i++ {
|
||||
globalTierMetrics.logFailure(tier)
|
||||
}
|
||||
metrics := globalTierMetrics.Report()
|
||||
var succ, fail float64
|
||||
for _, metric := range metrics {
|
||||
switch metric.Description.Name {
|
||||
case tierRequestsSuccess:
|
||||
succ += metric.Value
|
||||
case tierRequestsFailure:
|
||||
fail += metric.Value
|
||||
}
|
||||
}
|
||||
if int(succ) != expSuccess {
|
||||
t.Fatalf("Expected %d successes but got %f", expSuccess, succ)
|
||||
}
|
||||
if int(fail) != expFailure {
|
||||
t.Fatalf("Expected %d failures but got %f", expFailure, fail)
|
||||
}
|
||||
}
|
@ -200,6 +200,14 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc
|
||||
| `minio_node_ilm_transition_missed_immediate_tasks` | Number of missed immediate ILM transition tasks. |
|
||||
| `minio_node_ilm_versions_scanned` | Total number of object versions checked for ilm actions since server start. |
|
||||
|
||||
## Tier Metrics
|
||||
|
||||
| Name | Description |
|
||||
|:---------------------------------------------------|:----------------------------------------------------------------------------|
|
||||
| `minio_node_tier_tier_ttlb_seconds_distribution` | Distribution of time to last byte for objects downloaded from warm tier |
|
||||
| `minio_node_tier_requests_success` | Number of requests to download object from warm tier that were successful |
|
||||
| `minio_node_tier_requests_failure` | Number of requests to download object from warm tier that were failure |
|
||||
|
||||
## System Metrics
|
||||
|
||||
| Name | Description |
|
||||
|
Loading…
Reference in New Issue
Block a user