mirror of
https://github.com/minio/minio.git
synced 2025-01-24 05:03:16 -05:00
Label the notification target metrics by their target IDs (#18633)
This patch adds the targetID to the existing notification target metrics and deprecates the current target metrics which points to the overall event notification subsystem
This commit is contained in:
parent
b3314e97a6
commit
10ca0a6936
@ -2460,7 +2460,7 @@ func getNotificationMetrics() *MetricsGroup {
|
||||
Namespace: minioNamespace,
|
||||
Subsystem: notifySubsystem,
|
||||
Name: "current_send_in_progress",
|
||||
Help: "Number of concurrent async Send calls active to all targets",
|
||||
Help: "Number of concurrent async Send calls active to all targets (deprecated, please use 'minio_notify_target_current_send_in_progress' instead)",
|
||||
Type: gaugeMetric,
|
||||
},
|
||||
Value: float64(nstats.CurrentSendCalls),
|
||||
@ -2470,7 +2470,7 @@ func getNotificationMetrics() *MetricsGroup {
|
||||
Namespace: minioNamespace,
|
||||
Subsystem: notifySubsystem,
|
||||
Name: "events_skipped_total",
|
||||
Help: "Events that were skipped due to full queue",
|
||||
Help: "Events that were skipped to be sent to the targets due to the in-memory queue being full",
|
||||
Type: counterMetric,
|
||||
},
|
||||
Value: float64(nstats.EventsSkipped),
|
||||
@ -2480,7 +2480,7 @@ func getNotificationMetrics() *MetricsGroup {
|
||||
Namespace: minioNamespace,
|
||||
Subsystem: notifySubsystem,
|
||||
Name: "events_errors_total",
|
||||
Help: "Events that were failed while sending to target",
|
||||
Help: "Events that were failed to be sent to the targets (deprecated, please use 'minio_notify_target_failed_events' instead)",
|
||||
Type: counterMetric,
|
||||
},
|
||||
Value: float64(nstats.EventsErrorsTotal),
|
||||
@ -2490,21 +2490,54 @@ func getNotificationMetrics() *MetricsGroup {
|
||||
Namespace: minioNamespace,
|
||||
Subsystem: notifySubsystem,
|
||||
Name: "events_sent_total",
|
||||
Help: "Total number of events sent since start",
|
||||
Help: "Total number of events sent to the targets (deprecated, please use 'minio_notify_target_total_events' instead)",
|
||||
Type: counterMetric,
|
||||
},
|
||||
Value: float64(nstats.TotalEvents),
|
||||
})
|
||||
for _, st := range nstats.TargetStats {
|
||||
for id, st := range nstats.TargetStats {
|
||||
metrics = append(metrics, Metric{
|
||||
Description: MetricDescription{
|
||||
Namespace: minioNamespace,
|
||||
Subsystem: notifySubsystem,
|
||||
Name: "target_total_events",
|
||||
Help: "Total number of events sent (or) queued to the target",
|
||||
Type: counterMetric,
|
||||
},
|
||||
VariableLabels: map[string]string{"target_id": id.ID, "target_name": id.Name},
|
||||
Value: float64(st.TotalEvents),
|
||||
})
|
||||
metrics = append(metrics, Metric{
|
||||
Description: MetricDescription{
|
||||
Namespace: minioNamespace,
|
||||
Subsystem: notifySubsystem,
|
||||
Name: "target_failed_events",
|
||||
Help: "Number of events failed to be sent (or) queued to the target",
|
||||
Type: counterMetric,
|
||||
},
|
||||
VariableLabels: map[string]string{"target_id": id.ID, "target_name": id.Name},
|
||||
Value: float64(st.FailedEvents),
|
||||
})
|
||||
metrics = append(metrics, Metric{
|
||||
Description: MetricDescription{
|
||||
Namespace: minioNamespace,
|
||||
Subsystem: notifySubsystem,
|
||||
Name: "target_current_send_in_progress",
|
||||
Help: "Number of concurrent async Send calls active to the target",
|
||||
Type: gaugeMetric,
|
||||
},
|
||||
VariableLabels: map[string]string{"target_id": id.ID, "target_name": id.Name},
|
||||
Value: float64(st.CurrentSendCalls),
|
||||
})
|
||||
metrics = append(metrics, Metric{
|
||||
Description: MetricDescription{
|
||||
Namespace: minioNamespace,
|
||||
Subsystem: notifySubsystem,
|
||||
Name: "target_queue_length",
|
||||
Help: "Number of unsent notifications in queue for target",
|
||||
Help: "Number of events currently staged in the queue_dir configured for the target",
|
||||
Type: gaugeMetric,
|
||||
},
|
||||
VariableLabels: map[string]string{"target_id": st.ID.ID, "target_name": st.ID.Name},
|
||||
VariableLabels: map[string]string{"target_id": id.ID, "target_name": id.Name},
|
||||
Value: float64(st.CurrentQueue),
|
||||
})
|
||||
}
|
||||
|
@ -132,10 +132,14 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc
|
||||
## Bucket Notification Metrics
|
||||
|
||||
| Name | Description |
|
||||
|:----------------------------------------|:-------------------------------------------------------------|
|
||||
| `minio_notify_current_send_in_progress` | Number of concurrent async Send calls active to all targets. |
|
||||
| `minio_notify_target_queue_length` | Number of unsent notifications in queue for target. |
|
||||
| | |
|
||||
|:-----------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `minio_notify_current_send_in_progress` | Number of concurrent async Send calls active to all targets (deprecated, please use 'minio_notify_target_current_send_in_progress' instead) |
|
||||
| `minio_notify_events_errors_total` | Events that were failed to be sent to the targets (deprecated, please use 'minio_notify_target_failed_events' instead) |
|
||||
| `minio_notify_events_sent_total` | Total number of events sent to the targets (deprecated, please use 'minio_notify_target_total_events' instead) |
|
||||
| `minio_notify_events_skipped_total` | Events that were skipped to be sent to the targets due to the in-memory queue being full |
|
||||
| `minio_notify_target_current_send_in_progress` | Number of concurrent async Send calls active to the target |
|
||||
| `minio_notify_target_queue_length` | Number of events currently staged in the queue_dir configured for the target. |
|
||||
| `minio_notify_target_total_events` | Total number of events sent (or) queued to the target |
|
||||
|
||||
## S3 API Request Metrics
|
||||
|
||||
|
@ -21,7 +21,6 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
@ -50,22 +49,23 @@ type TargetStore interface {
|
||||
Len() int
|
||||
}
|
||||
|
||||
// TargetStats is a collection of stats for multiple targets.
|
||||
type TargetStats struct {
|
||||
// CurrentSendCalls is the number of concurrent async Send calls to all targets
|
||||
CurrentSendCalls int64
|
||||
TotalEvents int64
|
||||
// Stats is a collection of stats for multiple targets.
|
||||
type Stats struct {
|
||||
TotalEvents int64 // Deprecated
|
||||
EventsSkipped int64
|
||||
CurrentQueuedCalls int64
|
||||
EventsErrorsTotal int64
|
||||
CurrentQueuedCalls int64 // Deprecated
|
||||
EventsErrorsTotal int64 // Deprecated
|
||||
CurrentSendCalls int64 // Deprecated
|
||||
|
||||
TargetStats map[string]TargetStat
|
||||
TargetStats map[TargetID]TargetStat
|
||||
}
|
||||
|
||||
// TargetStat is the stats of a single target.
|
||||
type TargetStat struct {
|
||||
ID TargetID
|
||||
CurrentSendCalls int64 // CurrentSendCalls is the number of concurrent async Send calls to all targets
|
||||
CurrentQueue int // Populated if target has a store.
|
||||
TotalEvents int64
|
||||
FailedEvents int64 // Number of failed events per target
|
||||
}
|
||||
|
||||
// TargetList - holds list of targets indexed by target ID.
|
||||
@ -80,6 +80,82 @@ type TargetList struct {
|
||||
targets map[TargetID]Target
|
||||
queue chan asyncEvent
|
||||
ctx context.Context
|
||||
|
||||
statLock sync.RWMutex
|
||||
targetStats map[TargetID]targetStat
|
||||
}
|
||||
|
||||
type targetStat struct {
|
||||
// The number of concurrent async Send calls per targets
|
||||
currentSendCalls int64
|
||||
// The number of total events per target
|
||||
totalEvents int64
|
||||
// The number of failed events per target
|
||||
failedEvents int64
|
||||
}
|
||||
|
||||
func (list *TargetList) getStatsByTargetID(id TargetID) (stat targetStat) {
|
||||
list.statLock.RLock()
|
||||
defer list.statLock.RUnlock()
|
||||
|
||||
return list.targetStats[id]
|
||||
}
|
||||
|
||||
func (list *TargetList) incCurrentSendCalls(id TargetID) {
|
||||
list.statLock.Lock()
|
||||
defer list.statLock.Unlock()
|
||||
|
||||
stats, ok := list.targetStats[id]
|
||||
if !ok {
|
||||
stats = targetStat{}
|
||||
}
|
||||
|
||||
stats.currentSendCalls++
|
||||
list.targetStats[id] = stats
|
||||
return
|
||||
}
|
||||
|
||||
func (list *TargetList) decCurrentSendCalls(id TargetID) {
|
||||
list.statLock.Lock()
|
||||
defer list.statLock.Unlock()
|
||||
|
||||
stats, ok := list.targetStats[id]
|
||||
if !ok {
|
||||
// should not happen
|
||||
return
|
||||
}
|
||||
|
||||
stats.currentSendCalls--
|
||||
list.targetStats[id] = stats
|
||||
return
|
||||
}
|
||||
|
||||
func (list *TargetList) incFailedEvents(id TargetID) {
|
||||
list.statLock.Lock()
|
||||
defer list.statLock.Unlock()
|
||||
|
||||
stats, ok := list.targetStats[id]
|
||||
if !ok {
|
||||
stats = targetStat{}
|
||||
}
|
||||
|
||||
stats.failedEvents++
|
||||
list.targetStats[id] = stats
|
||||
return
|
||||
}
|
||||
|
||||
func (list *TargetList) incTotalEvents(id TargetID) {
|
||||
list.statLock.Lock()
|
||||
defer list.statLock.Unlock()
|
||||
|
||||
stats, ok := list.targetStats[id]
|
||||
if !ok {
|
||||
stats = targetStat{}
|
||||
}
|
||||
|
||||
stats.totalEvents++
|
||||
list.targetStats[id] = stats
|
||||
return
|
||||
}
|
||||
|
||||
type asyncEvent struct {
|
||||
@ -203,11 +279,15 @@ func (list *TargetList) sendSync(event Event, targetIDset TargetIDSet) {
|
||||
wg.Add(1)
|
||||
go func(id TargetID, target Target) {
|
||||
list.currentSendCalls.Add(1)
|
||||
list.incCurrentSendCalls(id)
|
||||
list.incTotalEvents(id)
|
||||
defer list.decCurrentSendCalls(id)
|
||||
defer list.currentSendCalls.Add(-1)
|
||||
defer wg.Done()
|
||||
|
||||
if err := target.Save(event); err != nil {
|
||||
list.eventsErrorsTotal.Add(1)
|
||||
list.incFailedEvents(id)
|
||||
reqInfo := &logger.ReqInfo{}
|
||||
reqInfo.AppendTags("targetID", id.String())
|
||||
logger.LogOnceIf(logger.SetReqInfo(context.Background(), reqInfo), err, id.String())
|
||||
@ -240,8 +320,8 @@ func (list *TargetList) sendAsync(event Event, targetIDset TargetIDSet) {
|
||||
}
|
||||
|
||||
// Stats returns stats for targets.
|
||||
func (list *TargetList) Stats() TargetStats {
|
||||
t := TargetStats{}
|
||||
func (list *TargetList) Stats() Stats {
|
||||
t := Stats{}
|
||||
if list == nil {
|
||||
return t
|
||||
}
|
||||
@ -253,14 +333,21 @@ func (list *TargetList) Stats() TargetStats {
|
||||
|
||||
list.RLock()
|
||||
defer list.RUnlock()
|
||||
t.TargetStats = make(map[string]TargetStat, len(list.targets))
|
||||
t.TargetStats = make(map[TargetID]TargetStat, len(list.targets))
|
||||
for id, target := range list.targets {
|
||||
ts := TargetStat{ID: id}
|
||||
var currentQueue int
|
||||
if st := target.Store(); st != nil {
|
||||
ts.CurrentQueue = st.Len()
|
||||
currentQueue = st.Len()
|
||||
}
|
||||
t.TargetStats[strings.ReplaceAll(id.String(), ":", "_")] = ts
|
||||
stats := list.getStatsByTargetID(id)
|
||||
t.TargetStats[id] = TargetStat{
|
||||
CurrentSendCalls: stats.currentSendCalls,
|
||||
CurrentQueue: currentQueue,
|
||||
FailedEvents: stats.failedEvents,
|
||||
TotalEvents: stats.totalEvents,
|
||||
}
|
||||
}
|
||||
|
||||
return t
|
||||
}
|
||||
|
||||
@ -305,6 +392,7 @@ func NewTargetList(ctx context.Context) *TargetList {
|
||||
list := &TargetList{
|
||||
targets: make(map[TargetID]Target),
|
||||
queue: make(chan asyncEvent, maxConcurrentAsyncSend),
|
||||
targetStats: make(map[TargetID]targetStat),
|
||||
ctx: ctx,
|
||||
}
|
||||
return list
|
||||
|
Loading…
x
Reference in New Issue
Block a user