mirror of
https://github.com/minio/minio.git
synced 2025-03-30 17:23:42 -04:00
fix: failed messages counting in audit_http metrics (#18075)
all retries must not be counted as failed messages, a failed message is a single counter not for all retries, this PR fixes this. Also we do not need to retry 10-times, instead we should retry at max 3 times with some jitter to deliver the messages.
This commit is contained in:
parent
74cfb207c1
commit
1472875670
@ -24,6 +24,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"math/rand"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
@ -153,7 +154,7 @@ func (h *Target) Init(ctx context.Context) (err error) {
|
|||||||
if h.config.QueueDir != "" {
|
if h.config.QueueDir != "" {
|
||||||
return h.initQueueStoreOnce.DoWithContext(ctx, h.initQueueStore)
|
return h.initQueueStoreOnce.DoWithContext(ctx, h.initQueueStore)
|
||||||
}
|
}
|
||||||
return h.initLogChannel(ctx)
|
return h.init(ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *Target) initQueueStore(ctx context.Context) (err error) {
|
func (h *Target) initQueueStore(ctx context.Context) (err error) {
|
||||||
@ -170,7 +171,7 @@ func (h *Target) initQueueStore(ctx context.Context) (err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *Target) initLogChannel(ctx context.Context) (err error) {
|
func (h *Target) init(ctx context.Context) (err error) {
|
||||||
switch atomic.LoadInt32(&h.status) {
|
switch atomic.LoadInt32(&h.status) {
|
||||||
case statusOnline:
|
case statusOnline:
|
||||||
return nil
|
return nil
|
||||||
@ -182,8 +183,10 @@ func (h *Target) initLogChannel(ctx context.Context) (err error) {
|
|||||||
// Start a goroutine that will continue to check if we can reach
|
// Start a goroutine that will continue to check if we can reach
|
||||||
h.revive.Do(func() {
|
h.revive.Do(func() {
|
||||||
go func() {
|
go func() {
|
||||||
t := time.NewTicker(time.Second)
|
// Avoid stamping herd, add jitter.
|
||||||
|
t := time.NewTicker(time.Second + time.Duration(rand.Int63n(int64(5*time.Second))))
|
||||||
defer t.Stop()
|
defer t.Stop()
|
||||||
|
|
||||||
for range t.C {
|
for range t.C {
|
||||||
if atomic.LoadInt32(&h.status) != statusOffline {
|
if atomic.LoadInt32(&h.status) != statusOffline {
|
||||||
return
|
return
|
||||||
@ -261,27 +264,29 @@ func (h *Target) logEntry(ctx context.Context, entry interface{}) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const maxTries = 3
|
||||||
tries := 0
|
tries := 0
|
||||||
for {
|
for tries < maxTries {
|
||||||
if tries > 0 {
|
if atomic.LoadInt32(&h.status) == statusClosed {
|
||||||
if tries >= 10 || atomic.LoadInt32(&h.status) == statusClosed {
|
// Don't retry when closing...
|
||||||
// Don't retry when closing...
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// sleep = (tries+2) ^ 2 milliseconds.
|
|
||||||
sleep := time.Duration(math.Pow(float64(tries+2), 2)) * time.Millisecond
|
|
||||||
if sleep > time.Second {
|
|
||||||
sleep = time.Second
|
|
||||||
}
|
|
||||||
time.Sleep(sleep)
|
|
||||||
}
|
|
||||||
tries++
|
|
||||||
if err := h.send(ctx, logJSON, webhookCallTimeout); err != nil {
|
|
||||||
h.config.LogOnce(ctx, err, h.Endpoint())
|
|
||||||
atomic.AddInt64(&h.failedMessages, 1)
|
|
||||||
} else {
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
// sleep = (tries+2) ^ 2 milliseconds.
|
||||||
|
sleep := time.Duration(math.Pow(float64(tries+2), 2)) * time.Millisecond
|
||||||
|
if sleep > time.Second {
|
||||||
|
sleep = time.Second
|
||||||
|
}
|
||||||
|
time.Sleep(sleep)
|
||||||
|
tries++
|
||||||
|
err := h.send(ctx, logJSON, webhookCallTimeout)
|
||||||
|
if err == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
h.config.LogOnce(ctx, err, h.Endpoint())
|
||||||
|
}
|
||||||
|
if tries == maxTries {
|
||||||
|
// Even with multiple retries, count failed messages as only one.
|
||||||
|
atomic.AddInt64(&h.failedMessages, 1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user