mirror of
https://github.com/minio/minio.git
synced 2025-11-07 21:02:58 -05:00
Add support for audit/logger max retry and retry interval (#20402)
Current implementation retries forever until our log buffer is full, and we start dropping events. This PR allows you to set a value until we give up on existing audit/logger batches to proceed to process the new ones. Bonus: - do not blow up buffers beyond batchSize value - do not leak the ticker if the worker returns
This commit is contained in:
@@ -79,6 +79,8 @@ type Config struct {
|
||||
BatchSize int `json:"batchSize"`
|
||||
QueueSize int `json:"queueSize"`
|
||||
QueueDir string `json:"queueDir"`
|
||||
MaxRetry int `json:"maxRetry"`
|
||||
RetryIntvl time.Duration `json:"retryInterval"`
|
||||
Proxy string `json:"string"`
|
||||
Transport http.RoundTripper `json:"-"`
|
||||
|
||||
@@ -227,6 +229,7 @@ func (h *Target) send(ctx context.Context, payload []byte, payloadCount int, pay
|
||||
if xnet.IsNetworkOrHostDown(err, false) {
|
||||
h.status.Store(statusOffline)
|
||||
}
|
||||
h.failedMessages.Add(int64(payloadCount))
|
||||
} else {
|
||||
h.status.Store(statusOnline)
|
||||
}
|
||||
@@ -257,7 +260,6 @@ func (h *Target) send(ctx context.Context, payload []byte, payloadCount int, pay
|
||||
|
||||
resp, err := h.client.Do(req)
|
||||
if err != nil {
|
||||
h.failedMessages.Add(int64(payloadCount))
|
||||
return fmt.Errorf("%s returned '%w', please check your endpoint configuration", h.Endpoint(), err)
|
||||
}
|
||||
|
||||
@@ -268,10 +270,8 @@ func (h *Target) send(ctx context.Context, payload []byte, payloadCount int, pay
|
||||
// accepted HTTP status codes.
|
||||
return nil
|
||||
} else if resp.StatusCode == http.StatusForbidden {
|
||||
h.failedMessages.Add(int64(payloadCount))
|
||||
return fmt.Errorf("%s returned '%s', please check if your auth token is correctly set", h.Endpoint(), resp.Status)
|
||||
}
|
||||
h.failedMessages.Add(int64(payloadCount))
|
||||
return fmt.Errorf("%s returned '%s', please check your endpoint configuration", h.Endpoint(), resp.Status)
|
||||
}
|
||||
|
||||
@@ -326,9 +326,6 @@ func (h *Target) startQueueProcessor(ctx context.Context, mainWorker bool) {
|
||||
}
|
||||
}()
|
||||
|
||||
var entry interface{}
|
||||
var ok bool
|
||||
var err error
|
||||
lastBatchProcess := time.Now()
|
||||
|
||||
buf := bytebufferpool.Get()
|
||||
@@ -343,61 +340,76 @@ func (h *Target) startQueueProcessor(ctx context.Context, mainWorker bool) {
|
||||
globalBuffer := logChBuffers[name]
|
||||
logChLock.Unlock()
|
||||
|
||||
newTicker := time.NewTicker(time.Second)
|
||||
isTick := false
|
||||
var count int
|
||||
ticker := time.NewTicker(time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
var count int
|
||||
for {
|
||||
isTick = false
|
||||
select {
|
||||
case _ = <-newTicker.C:
|
||||
isTick = true
|
||||
case entry, _ = <-globalBuffer:
|
||||
case entry, ok = <-h.logCh:
|
||||
if !ok {
|
||||
var (
|
||||
ok bool
|
||||
entry any
|
||||
)
|
||||
|
||||
if count < h.batchSize {
|
||||
tickered := false
|
||||
select {
|
||||
case _ = <-ticker.C:
|
||||
tickered = true
|
||||
case entry, _ = <-globalBuffer:
|
||||
case entry, ok = <-h.logCh:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
|
||||
if !isTick {
|
||||
h.totalMessages.Add(1)
|
||||
|
||||
if !isDirQueue {
|
||||
if err := enc.Encode(&entry); err != nil {
|
||||
h.config.LogOnceIf(
|
||||
ctx,
|
||||
fmt.Errorf("unable to encode webhook log entry, err '%w' entry: %v\n", err, entry),
|
||||
h.Name(),
|
||||
)
|
||||
h.failedMessages.Add(1)
|
||||
continue
|
||||
if !tickered {
|
||||
h.totalMessages.Add(1)
|
||||
if !isDirQueue {
|
||||
if err := enc.Encode(&entry); err != nil {
|
||||
h.config.LogOnceIf(
|
||||
ctx,
|
||||
fmt.Errorf("unable to encode webhook log entry, err '%w' entry: %v\n", err, entry),
|
||||
h.Name(),
|
||||
)
|
||||
h.failedMessages.Add(1)
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
count++
|
||||
} else {
|
||||
entries = append(entries, entry)
|
||||
count++
|
||||
}
|
||||
}
|
||||
|
||||
if count != h.batchSize {
|
||||
if len(h.logCh) > 0 || len(globalBuffer) > 0 || count == 0 {
|
||||
// there is something in the log queue
|
||||
// process it first, even if we tickered
|
||||
// first, or we have not received any events
|
||||
// yet, still wait on it.
|
||||
continue
|
||||
}
|
||||
|
||||
if h.batchSize > 1 {
|
||||
// If we are doing batching, we should wait
|
||||
// at least one second before sending.
|
||||
// Even if there is nothing in the queue.
|
||||
if time.Since(lastBatchProcess).Seconds() < 1 {
|
||||
continue
|
||||
}
|
||||
// If we are doing batching, we should wait
|
||||
// at least for a second, before sending.
|
||||
// Even if there is nothing in the queue.
|
||||
if h.batchSize > 1 && time.Since(lastBatchProcess) < time.Second {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// if we have reached the count send at once
|
||||
// or we have crossed last second before batch was sent, send at once
|
||||
lastBatchProcess = time.Now()
|
||||
|
||||
var retries int
|
||||
retryIntvl := h.config.RetryIntvl
|
||||
if retryIntvl <= 0 {
|
||||
retryIntvl = 3 * time.Second
|
||||
}
|
||||
|
||||
maxRetries := h.config.MaxRetry
|
||||
|
||||
retry:
|
||||
// If the channel reaches above half capacity
|
||||
// we spawn more workers. The workers spawned
|
||||
@@ -415,6 +427,7 @@ func (h *Target) startQueueProcessor(ctx context.Context, mainWorker bool) {
|
||||
}
|
||||
}
|
||||
|
||||
var err error
|
||||
if !isDirQueue {
|
||||
err = h.send(ctx, buf.Bytes(), count, h.payloadType, webhookCallTimeout)
|
||||
} else {
|
||||
@@ -422,18 +435,24 @@ func (h *Target) startQueueProcessor(ctx context.Context, mainWorker bool) {
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
h.config.LogOnceIf(
|
||||
context.Background(),
|
||||
fmt.Errorf("unable to send webhook log entry(s) to '%s' err '%w': %d", name, err, count),
|
||||
name,
|
||||
)
|
||||
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return
|
||||
}
|
||||
|
||||
time.Sleep(3 * time.Second)
|
||||
goto retry
|
||||
h.config.LogOnceIf(
|
||||
context.Background(),
|
||||
fmt.Errorf("unable to send audit/log entry(s) to '%s' err '%w': %d", name, err, count),
|
||||
name,
|
||||
)
|
||||
|
||||
time.Sleep(retryIntvl)
|
||||
if maxRetries == 0 {
|
||||
goto retry
|
||||
}
|
||||
retries++
|
||||
if retries <= maxRetries {
|
||||
goto retry
|
||||
}
|
||||
}
|
||||
|
||||
entries = make([]interface{}, 0)
|
||||
|
||||
Reference in New Issue
Block a user