merge startHTTPLogger() many callers into a simpler pattern (#19211)

simplify audit webhook worker model

fixes couple of bugs like

- ping(ctx) was creating a logger without updating
  number of workers leading to incorrect nWorkers
  scaling, causing an additional worker that is not
  tracked properly.

- h.logCh <- entry could potentially hang for when
  the queue is full on heavily loaded systems.
This commit is contained in:
Harshavardhana 2024-03-06 08:09:46 -08:00 committed by GitHub
parent 74ccee6619
commit e91a4a414c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -138,6 +138,10 @@ func (h *Target) ping(ctx context.Context) bool {
if err := h.send(ctx, []byte(`{}`), webhookCallTimeout); err != nil { if err := h.send(ctx, []byte(`{}`), webhookCallTimeout); err != nil {
return !xnet.IsNetworkOrHostDown(err, false) && !xnet.IsConnRefusedErr(err) return !xnet.IsNetworkOrHostDown(err, false) && !xnet.IsConnRefusedErr(err)
} }
// We are online.
h.workerStartMu.Lock()
h.lastStarted = time.Now()
h.workerStartMu.Unlock()
go h.startHTTPLogger(ctx) go h.startHTTPLogger(ctx)
return true return true
} }
@ -199,14 +203,6 @@ func (h *Target) init(ctx context.Context) (err error) {
return return
} }
if h.ping(ctx) { if h.ping(ctx) {
// We are online.
if atomic.CompareAndSwapInt32(&h.status, statusOffline, statusOnline) {
h.workerStartMu.Lock()
h.lastStarted = time.Now()
h.workerStartMu.Unlock()
atomic.AddInt64(&h.workers, 1)
go h.startHTTPLogger(ctx)
}
return return
} }
} }
@ -214,14 +210,6 @@ func (h *Target) init(ctx context.Context) (err error) {
}) })
return err return err
} }
if atomic.CompareAndSwapInt32(&h.status, statusOffline, statusOnline) {
h.workerStartMu.Lock()
h.lastStarted = time.Now()
h.workerStartMu.Unlock()
atomic.AddInt64(&h.workers, 1)
go h.startHTTPLogger(ctx)
}
return nil return nil
} }
@ -306,6 +294,9 @@ func (h *Target) logEntry(ctx context.Context, entry interface{}) {
} }
func (h *Target) startHTTPLogger(ctx context.Context) { func (h *Target) startHTTPLogger(ctx context.Context) {
atomic.AddInt64(&h.workers, 1)
defer atomic.AddInt64(&h.workers, -1)
h.logChMu.RLock() h.logChMu.RLock()
logCh := h.logCh logCh := h.logCh
if logCh != nil { if logCh != nil {
@ -314,15 +305,12 @@ func (h *Target) startHTTPLogger(ctx context.Context) {
defer h.wg.Done() defer h.wg.Done()
} }
h.logChMu.RUnlock() h.logChMu.RUnlock()
defer atomic.AddInt64(&h.workers, -1)
if logCh == nil { if logCh == nil {
return return
} }
// Send messages until channel is closed. // Send messages until channel is closed.
for entry := range logCh { for entry := range logCh {
atomic.AddInt64(&h.totalMessages, 1)
h.logEntry(ctx, entry) h.logEntry(ctx, entry)
} }
} }
@ -394,8 +382,10 @@ func (h *Target) Send(ctx context.Context, entry interface{}) error {
return nil return nil
} }
retry:
select { select {
case h.logCh <- entry: case h.logCh <- entry:
atomic.AddInt64(&h.totalMessages, 1)
case <-ctx.Done(): case <-ctx.Done():
// return error only for context timedout. // return error only for context timedout.
if errors.Is(ctx.Err(), context.DeadlineExceeded) { if errors.Is(ctx.Err(), context.DeadlineExceeded) {
@ -407,24 +397,13 @@ func (h *Target) Send(ctx context.Context, entry interface{}) error {
if nWorkers < maxWorkers { if nWorkers < maxWorkers {
// Only have one try to start at the same time. // Only have one try to start at the same time.
h.workerStartMu.Lock() h.workerStartMu.Lock()
defer h.workerStartMu.Unlock()
// Start one max every second.
if time.Since(h.lastStarted) > time.Second { if time.Since(h.lastStarted) > time.Second {
if atomic.CompareAndSwapInt64(&h.workers, nWorkers, nWorkers+1) {
// Start another logger.
h.lastStarted = time.Now() h.lastStarted = time.Now()
go h.startHTTPLogger(ctx) go h.startHTTPLogger(ctx)
} }
} h.workerStartMu.Unlock()
select {
case h.logCh <- entry: goto retry
case <-ctx.Done():
// return error only for context timedout.
if errors.Is(ctx.Err(), context.DeadlineExceeded) {
return ctx.Err()
}
}
return nil
} }
atomic.AddInt64(&h.totalMessages, 1) atomic.AddInt64(&h.totalMessages, 1)
atomic.AddInt64(&h.failedMessages, 1) atomic.AddInt64(&h.failedMessages, 1)