Add lazy init of audit logger (#16842)

This commit is contained in:
Klaus Post 2023-03-21 10:50:40 -07:00 committed by GitHub
parent 0448728228
commit 11d04279c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -23,6 +23,7 @@ import (
"encoding/json"
"errors"
"fmt"
"math"
"net/http"
"net/url"
"strings"
@ -39,7 +40,13 @@ const (
webhookCallTimeout = 5 * time.Second
// maxWorkers is the maximum number of concurrent operations.
maxWorkers = 8
maxWorkers = 16
)
const (
statusOffline = iota
statusOnline
statusClosed
)
// Config http logger target
@ -67,6 +74,7 @@ type Config struct {
type Target struct {
totalMessages int64
failedMessages int64
status int32
// Worker control
workers int64
@ -74,13 +82,16 @@ type Target struct {
lastStarted time.Time
wg sync.WaitGroup
doneCh chan struct{}
// Channel of log entries
// Channel of log entries.
// Reading logCh must hold read lock on logChMu (to avoid read race)
// Sending a value on logCh must hold read lock on logChMu (to avoid closing)
logCh chan interface{}
logChMu sync.RWMutex
// is the target online?
online bool
// If the first init fails, this starts a goroutine that
// will attempt to establish the connection.
revive sync.Once
config Config
client *http.Client
@ -97,20 +108,34 @@ func (h *Target) String() string {
// IsOnline returns true if the initialization was successful
func (h *Target) IsOnline() bool {
return h.online
return atomic.LoadInt32(&h.status) == statusOnline
}
// Stats returns the target statistics.
func (h *Target) Stats() types.TargetStats {
return types.TargetStats{
h.logChMu.RLock()
logCh := h.logCh
h.logChMu.RUnlock()
stats := types.TargetStats{
TotalMessages: atomic.LoadInt64(&h.totalMessages),
FailedMessages: atomic.LoadInt64(&h.failedMessages),
QueueLength: len(h.logCh),
QueueLength: len(logCh),
}
return stats
}
// Init validate and initialize the http target
func (h *Target) Init() error {
func (h *Target) Init() (err error) {
switch atomic.LoadInt32(&h.status) {
case statusOnline:
return nil
case statusClosed:
return errors.New("target is closed")
}
// This will check if we can reach the remote.
checkAlive := func() error {
ctx, cancel := context.WithTimeout(context.Background(), 2*webhookCallTimeout)
defer cancel()
@ -129,22 +154,10 @@ func (h *Target) Init() error {
req.Header.Set("Authorization", h.config.AuthToken)
}
// If proxy available, set the same
if h.config.Proxy != "" {
proxyURL, _ := url.Parse(h.config.Proxy)
transport := h.config.Transport
ctransport := transport.(*http.Transport).Clone()
ctransport.Proxy = http.ProxyURL(proxyURL)
h.config.Transport = ctransport
}
client := http.Client{Transport: h.config.Transport}
resp, err := client.Do(req)
resp, err := h.client.Do(req)
if err != nil {
return err
}
h.client = &client
// Drain any response.
xhttp.DrainBody(resp.Body)
@ -156,11 +169,43 @@ func (h *Target) Init() error {
return fmt.Errorf("%s returned '%s', please check your endpoint configuration",
h.config.Endpoint, resp.Status)
}
return nil
}
err = checkAlive()
if err != nil {
// Start a goroutine that will continue to check if we can reach
h.revive.Do(func() {
go func() {
t := time.NewTicker(time.Second)
defer t.Stop()
for range t.C {
if atomic.LoadInt32(&h.status) != statusOffline {
return
}
if err := checkAlive(); err == nil {
// We are online.
if atomic.CompareAndSwapInt32(&h.status, statusOffline, statusOnline) {
h.workerStartMu.Lock()
h.lastStarted = time.Now()
h.online = true
h.workerStartMu.Unlock()
atomic.AddInt64(&h.workers, 1)
go h.startHTTPLogger()
}
return
}
}
}()
})
return err
}
if atomic.CompareAndSwapInt32(&h.status, statusOffline, statusOnline) {
h.workerStartMu.Lock()
h.lastStarted = time.Now()
h.workerStartMu.Unlock()
go h.startHTTPLogger()
}
return nil
}
@ -178,6 +223,21 @@ func (h *Target) logEntry(entry interface{}) {
return
}
tries := 0
for {
if tries > 0 {
if tries >= 10 || atomic.LoadInt32(&h.status) == statusClosed {
// Don't retry when closing...
return
}
// sleep = (tries+2) ^ 2 milliseconds.
sleep := time.Duration(math.Pow(float64(tries+2), 2)) * time.Millisecond
if sleep > time.Second {
sleep = time.Second
}
time.Sleep(sleep)
}
tries++
ctx, cancel := context.WithTimeout(context.Background(), webhookCallTimeout)
defer cancel()
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
@ -185,7 +245,7 @@ func (h *Target) logEntry(entry interface{}) {
if err != nil {
h.config.LogOnce(ctx, fmt.Errorf("%s returned '%w', please check your endpoint configuration", h.config.Endpoint, err), h.config.Endpoint)
atomic.AddInt64(&h.failedMessages, 1)
return
continue
}
req.Header.Set(xhttp.ContentType, "application/json")
req.Header.Set(xhttp.MinIOVersion, xhttp.GlobalMinIOVersion)
@ -203,13 +263,16 @@ func (h *Target) logEntry(entry interface{}) {
if err != nil {
atomic.AddInt64(&h.failedMessages, 1)
h.config.LogOnce(ctx, fmt.Errorf("%s returned '%w', please check your endpoint configuration", h.config.Endpoint, err), h.config.Endpoint)
return
continue
}
// Drain any response.
xhttp.DrainBody(resp.Body)
if !acceptedResponseStatusCode(resp.StatusCode) {
if acceptedResponseStatusCode(resp.StatusCode) {
return
}
// Log failure, retry
atomic.AddInt64(&h.failedMessages, 1)
switch resp.StatusCode {
case http.StatusForbidden:
@ -221,25 +284,25 @@ func (h *Target) logEntry(entry interface{}) {
}
func (h *Target) startHTTPLogger() {
// Create a routine which sends json logs received
// from an internal channel.
h.logChMu.RLock()
logCh := h.logCh
if logCh != nil {
// We are not allowed to add when logCh is nil
h.wg.Add(1)
go func() {
defer func() {
h.wg.Done()
atomic.AddInt64(&h.workers, -1)
}()
defer h.wg.Done()
}
h.logChMu.RUnlock()
for {
select {
case entry := <-h.logCh:
atomic.AddInt64(&h.totalMessages, 1)
h.logEntry(entry)
case <-h.doneCh:
defer atomic.AddInt64(&h.workers, -1)
if logCh == nil {
return
}
// Send messages until channel is closed.
for entry := range logCh {
atomic.AddInt64(&h.totalMessages, 1)
h.logEntry(entry)
}
}()
}
// New initializes a new logger target which
@ -247,30 +310,43 @@ func (h *Target) startHTTPLogger() {
func New(config Config) *Target {
h := &Target{
logCh: make(chan interface{}, config.QueueSize),
doneCh: make(chan struct{}),
config: config,
online: false,
status: statusOffline,
}
// If proxy available, set the same
if h.config.Proxy != "" {
proxyURL, _ := url.Parse(h.config.Proxy)
transport := h.config.Transport
ctransport := transport.(*http.Transport).Clone()
ctransport.Proxy = http.ProxyURL(proxyURL)
h.config.Transport = ctransport
}
h.client = &http.Client{Transport: h.config.Transport}
return h
}
// Send log message 'e' to http target.
// If servers are offline messages are queued until queue is full.
// If Cancel has been called the message is ignored.
func (h *Target) Send(entry interface{}) error {
if !h.online {
if atomic.LoadInt32(&h.status) == statusClosed {
return nil
}
select {
case <-h.doneCh:
h.logChMu.RLock()
defer h.logChMu.RUnlock()
if h.logCh == nil {
// We are closing...
return nil
default:
}
select {
case <-h.doneCh:
case h.logCh <- entry:
default:
// Drop messages until we are online.
if !h.IsOnline() {
return errors.New("log buffer full and remote offline")
}
nWorkers := atomic.LoadInt64(&h.workers)
if nWorkers < maxWorkers {
// Only have one try to start at the same time.
@ -284,11 +360,7 @@ func (h *Target) Send(entry interface{}) error {
go h.startHTTPLogger()
}
}
// Block to send
select {
case <-h.doneCh:
case h.logCh <- entry:
}
h.logCh <- entry
return nil
}
// log channel is full, do not wait and return
@ -301,10 +373,22 @@ func (h *Target) Send(entry interface{}) error {
return nil
}
// Cancel - cancels the target
// Cancel - cancels the target.
// All queued messages are flushed and the function returns afterwards.
// All messages sent to the target after this function has been called will be dropped.
func (h *Target) Cancel() {
close(h.doneCh)
atomic.StoreInt32(&h.status, statusClosed)
// Set logch to nil and close it.
// This will block all Send operations,
// and finish the existing ones.
// All future ones will be discarded.
h.logChMu.Lock()
close(h.logCh)
h.logCh = nil
h.logChMu.Unlock()
// Wait for messages to be sent...
h.wg.Wait()
}