diff --git a/cmd/metrics-v2.go b/cmd/metrics-v2.go index 4fcd33b3f..9b9996ce9 100644 --- a/cmd/metrics-v2.go +++ b/cmd/metrics-v2.go @@ -2460,7 +2460,7 @@ func getNotificationMetrics() *MetricsGroup { Namespace: minioNamespace, Subsystem: notifySubsystem, Name: "current_send_in_progress", - Help: "Number of concurrent async Send calls active to all targets", + Help: "Number of concurrent async Send calls active to all targets (deprecated, please use 'minio_notify_target_current_send_in_progress' instead)", Type: gaugeMetric, }, Value: float64(nstats.CurrentSendCalls), @@ -2470,7 +2470,7 @@ func getNotificationMetrics() *MetricsGroup { Namespace: minioNamespace, Subsystem: notifySubsystem, Name: "events_skipped_total", - Help: "Events that were skipped due to full queue", + Help: "Events that were skipped to be sent to the targets due to the in-memory queue being full", Type: counterMetric, }, Value: float64(nstats.EventsSkipped), @@ -2480,7 +2480,7 @@ func getNotificationMetrics() *MetricsGroup { Namespace: minioNamespace, Subsystem: notifySubsystem, Name: "events_errors_total", - Help: "Events that were failed while sending to target", + Help: "Events that were failed to be sent to the targets (deprecated, please use 'minio_notify_target_failed_events' instead)", Type: counterMetric, }, Value: float64(nstats.EventsErrorsTotal), @@ -2490,21 +2490,54 @@ func getNotificationMetrics() *MetricsGroup { Namespace: minioNamespace, Subsystem: notifySubsystem, Name: "events_sent_total", - Help: "Total number of events sent since start", + Help: "Total number of events sent to the targets (deprecated, please use 'minio_notify_target_total_events' instead)", Type: counterMetric, }, Value: float64(nstats.TotalEvents), }) - for _, st := range nstats.TargetStats { + for id, st := range nstats.TargetStats { + metrics = append(metrics, Metric{ + Description: MetricDescription{ + Namespace: minioNamespace, + Subsystem: notifySubsystem, + Name: "target_total_events", + Help: "Total number of events sent (or) queued to the target", + Type: counterMetric, + }, + VariableLabels: map[string]string{"target_id": id.ID, "target_name": id.Name}, + Value: float64(st.TotalEvents), + }) + metrics = append(metrics, Metric{ + Description: MetricDescription{ + Namespace: minioNamespace, + Subsystem: notifySubsystem, + Name: "target_failed_events", + Help: "Number of events failed to be sent (or) queued to the target", + Type: counterMetric, + }, + VariableLabels: map[string]string{"target_id": id.ID, "target_name": id.Name}, + Value: float64(st.FailedEvents), + }) + metrics = append(metrics, Metric{ + Description: MetricDescription{ + Namespace: minioNamespace, + Subsystem: notifySubsystem, + Name: "target_current_send_in_progress", + Help: "Number of concurrent async Send calls active to the target", + Type: gaugeMetric, + }, + VariableLabels: map[string]string{"target_id": id.ID, "target_name": id.Name}, + Value: float64(st.CurrentSendCalls), + }) metrics = append(metrics, Metric{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: notifySubsystem, Name: "target_queue_length", - Help: "Number of unsent notifications in queue for target", + Help: "Number of events currently staged in the queue_dir configured for the target", Type: gaugeMetric, }, - VariableLabels: map[string]string{"target_id": st.ID.ID, "target_name": st.ID.Name}, + VariableLabels: map[string]string{"target_id": id.ID, "target_name": id.Name}, Value: float64(st.CurrentQueue), }) } diff --git a/docs/metrics/prometheus/list.md b/docs/metrics/prometheus/list.md index 3cb7edfe5..f9872e169 100644 --- a/docs/metrics/prometheus/list.md +++ b/docs/metrics/prometheus/list.md @@ -131,11 +131,15 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc ## Bucket Notification Metrics -| Name | Description | -|:----------------------------------------|:-------------------------------------------------------------| -| `minio_notify_current_send_in_progress` | Number of concurrent async Send calls active to all targets. | -| `minio_notify_target_queue_length` | Number of unsent notifications in queue for target. | -| | | +| Name | Description | +|:-----------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------| +| `minio_notify_current_send_in_progress` | Number of concurrent async Send calls active to all targets (deprecated, please use 'minio_notify_target_current_send_in_progress' instead) | +| `minio_notify_events_errors_total` | Events that were failed to be sent to the targets (deprecated, please use 'minio_notify_target_failed_events' instead) | +| `minio_notify_events_sent_total` | Total number of events sent to the targets (deprecated, please use 'minio_notify_target_total_events' instead) | +| `minio_notify_events_skipped_total` | Events that were skipped to be sent to the targets due to the in-memory queue being full | +| `minio_notify_target_current_send_in_progress` | Number of concurrent async Send calls active to the target | +| `minio_notify_target_queue_length` | Number of events currently staged in the queue_dir configured for the target. | +| `minio_notify_target_total_events` | Total number of events sent (or) queued to the target | ## S3 API Request Metrics diff --git a/internal/event/targetlist.go b/internal/event/targetlist.go index 9c3c62ab5..5d261cd6c 100644 --- a/internal/event/targetlist.go +++ b/internal/event/targetlist.go @@ -21,7 +21,6 @@ import ( "context" "fmt" "runtime" - "strings" "sync" "sync/atomic" @@ -50,22 +49,23 @@ type TargetStore interface { Len() int } -// TargetStats is a collection of stats for multiple targets. -type TargetStats struct { - // CurrentSendCalls is the number of concurrent async Send calls to all targets - CurrentSendCalls int64 - TotalEvents int64 +// Stats is a collection of stats for multiple targets. +type Stats struct { + TotalEvents int64 // Deprecated EventsSkipped int64 - CurrentQueuedCalls int64 - EventsErrorsTotal int64 + CurrentQueuedCalls int64 // Deprecated + EventsErrorsTotal int64 // Deprecated + CurrentSendCalls int64 // Deprecated - TargetStats map[string]TargetStat + TargetStats map[TargetID]TargetStat } // TargetStat is the stats of a single target. type TargetStat struct { - ID TargetID - CurrentQueue int // Populated if target has a store. + CurrentSendCalls int64 // CurrentSendCalls is the number of concurrent async Send calls to all targets + CurrentQueue int // Populated if target has a store. + TotalEvents int64 + FailedEvents int64 // Number of failed events per target } // TargetList - holds list of targets indexed by target ID. @@ -80,6 +80,82 @@ type TargetList struct { targets map[TargetID]Target queue chan asyncEvent ctx context.Context + + statLock sync.RWMutex + targetStats map[TargetID]targetStat +} + +type targetStat struct { + // The number of concurrent async Send calls per targets + currentSendCalls int64 + // The number of total events per target + totalEvents int64 + // The number of failed events per target + failedEvents int64 +} + +func (list *TargetList) getStatsByTargetID(id TargetID) (stat targetStat) { + list.statLock.RLock() + defer list.statLock.RUnlock() + + return list.targetStats[id] +} + +func (list *TargetList) incCurrentSendCalls(id TargetID) { + list.statLock.Lock() + defer list.statLock.Unlock() + + stats, ok := list.targetStats[id] + if !ok { + stats = targetStat{} + } + + stats.currentSendCalls++ + list.targetStats[id] = stats + return +} + +func (list *TargetList) decCurrentSendCalls(id TargetID) { + list.statLock.Lock() + defer list.statLock.Unlock() + + stats, ok := list.targetStats[id] + if !ok { + // should not happen + return + } + + stats.currentSendCalls-- + list.targetStats[id] = stats + return +} + +func (list *TargetList) incFailedEvents(id TargetID) { + list.statLock.Lock() + defer list.statLock.Unlock() + + stats, ok := list.targetStats[id] + if !ok { + stats = targetStat{} + } + + stats.failedEvents++ + list.targetStats[id] = stats + return +} + +func (list *TargetList) incTotalEvents(id TargetID) { + list.statLock.Lock() + defer list.statLock.Unlock() + + stats, ok := list.targetStats[id] + if !ok { + stats = targetStat{} + } + + stats.totalEvents++ + list.targetStats[id] = stats + return } type asyncEvent struct { @@ -203,11 +279,15 @@ func (list *TargetList) sendSync(event Event, targetIDset TargetIDSet) { wg.Add(1) go func(id TargetID, target Target) { list.currentSendCalls.Add(1) + list.incCurrentSendCalls(id) + list.incTotalEvents(id) + defer list.decCurrentSendCalls(id) defer list.currentSendCalls.Add(-1) defer wg.Done() if err := target.Save(event); err != nil { list.eventsErrorsTotal.Add(1) + list.incFailedEvents(id) reqInfo := &logger.ReqInfo{} reqInfo.AppendTags("targetID", id.String()) logger.LogOnceIf(logger.SetReqInfo(context.Background(), reqInfo), err, id.String()) @@ -240,8 +320,8 @@ func (list *TargetList) sendAsync(event Event, targetIDset TargetIDSet) { } // Stats returns stats for targets. -func (list *TargetList) Stats() TargetStats { - t := TargetStats{} +func (list *TargetList) Stats() Stats { + t := Stats{} if list == nil { return t } @@ -253,14 +333,21 @@ func (list *TargetList) Stats() TargetStats { list.RLock() defer list.RUnlock() - t.TargetStats = make(map[string]TargetStat, len(list.targets)) + t.TargetStats = make(map[TargetID]TargetStat, len(list.targets)) for id, target := range list.targets { - ts := TargetStat{ID: id} + var currentQueue int if st := target.Store(); st != nil { - ts.CurrentQueue = st.Len() + currentQueue = st.Len() + } + stats := list.getStatsByTargetID(id) + t.TargetStats[id] = TargetStat{ + CurrentSendCalls: stats.currentSendCalls, + CurrentQueue: currentQueue, + FailedEvents: stats.failedEvents, + TotalEvents: stats.totalEvents, } - t.TargetStats[strings.ReplaceAll(id.String(), ":", "_")] = ts } + return t } @@ -303,9 +390,10 @@ func (list *TargetList) Init(workers int) *TargetList { // NewTargetList - creates TargetList. func NewTargetList(ctx context.Context) *TargetList { list := &TargetList{ - targets: make(map[TargetID]Target), - queue: make(chan asyncEvent, maxConcurrentAsyncSend), - ctx: ctx, + targets: make(map[TargetID]Target), + queue: make(chan asyncEvent, maxConcurrentAsyncSend), + targetStats: make(map[TargetID]targetStat), + ctx: ctx, } return list }