From 31fba6f434c0450f3590c7a6a73ecce3a6276db0 Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Fri, 17 Mar 2023 16:01:03 -0700 Subject: [PATCH] Save bootstrap trace events in a circular buffer (#16823) --- cmd/admin-handlers.go | 5 ++ cmd/bootstrap-messages.go | 116 +++++++++++++++++++++++++++++++++ cmd/bootstrap-messages_test.go | 60 +++++++++++++++++ cmd/peer-rest-server.go | 6 ++ cmd/server-main.go | 2 + 5 files changed, 189 insertions(+) create mode 100644 cmd/bootstrap-messages.go create mode 100644 cmd/bootstrap-messages_test.go diff --git a/cmd/admin-handlers.go b/cmd/admin-handlers.go index 0e2b93c31..4f2f9f063 100644 --- a/cmd/admin-handlers.go +++ b/cmd/admin-handlers.go @@ -1526,6 +1526,11 @@ func (a adminAPIHandlers) TraceHandler(w http.ResponseWriter, r *http.Request) { return } + // Publish bootstrap events that have already occurred before client could subscribe. + if traceOpts.TraceTypes().Contains(madmin.TraceBootstrap) { + go globalBootstrapTracer.Publish(ctx, globalTrace) + } + for _, peer := range peers { if peer == nil { continue diff --git a/cmd/bootstrap-messages.go b/cmd/bootstrap-messages.go new file mode 100644 index 000000000..55f7a0739 --- /dev/null +++ b/cmd/bootstrap-messages.go @@ -0,0 +1,116 @@ +// Copyright (c) 2015-2023 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/minio/madmin-go/v2" + "github.com/minio/minio/internal/pubsub" +) + +const bootstrapMsgsLimit = 4 << 10 + +type bootstrapInfo struct { + msg string + ts time.Time + source string +} +type bootstrapTracer struct { + mu sync.RWMutex + idx int + info [bootstrapMsgsLimit]bootstrapInfo + lastUpdate time.Time +} + +var globalBootstrapTracer = &bootstrapTracer{} + +func (bs *bootstrapTracer) DropEvents() { + bs.mu.Lock() + defer bs.mu.Unlock() + + if time.Now().UTC().Sub(bs.lastUpdate) > 24*time.Hour { + bs.info = [4096]bootstrapInfo{} + bs.idx = 0 + } +} + +func (bs *bootstrapTracer) Empty() bool { + var empty bool + bs.mu.RLock() + empty = bs.info[0].msg == "" + bs.mu.RUnlock() + + return empty +} + +func (bs *bootstrapTracer) Record(msg string) { + source := getSource(2) + bs.mu.Lock() + now := time.Now().UTC() + bs.info[bs.idx] = bootstrapInfo{ + msg: msg, + ts: now, + source: source, + } + bs.lastUpdate = now + bs.idx = (bs.idx + 1) % bootstrapMsgsLimit + bs.mu.Unlock() +} + +func (bs *bootstrapTracer) Events() []madmin.TraceInfo { + traceInfo := make([]madmin.TraceInfo, 0, bootstrapMsgsLimit) + + // Add all messages in order + addAll := func(info []bootstrapInfo) { + for _, msg := range info { + if msg.ts.IsZero() { + continue // skip empty events + } + traceInfo = append(traceInfo, madmin.TraceInfo{ + TraceType: madmin.TraceBootstrap, + Time: msg.ts, + NodeName: globalLocalNodeName, + FuncName: "BOOTSTRAP", + Message: fmt.Sprintf("%s %s", msg.source, msg.msg), + }) + } + } + + bs.mu.RLock() + addAll(bs.info[bs.idx:]) + addAll(bs.info[:bs.idx]) + bs.mu.RUnlock() + return traceInfo +} + +func (bs *bootstrapTracer) Publish(ctx context.Context, trace *pubsub.PubSub[madmin.TraceInfo, madmin.TraceType]) { + if bs.Empty() { + return + } + for _, bsEvent := range bs.Events() { + select { + case <-ctx.Done(): + default: + trace.Publish(bsEvent) + } + } +} diff --git a/cmd/bootstrap-messages_test.go b/cmd/bootstrap-messages_test.go new file mode 100644 index 000000000..2aa47e756 --- /dev/null +++ b/cmd/bootstrap-messages_test.go @@ -0,0 +1,60 @@ +// Copyright (c) 2015-2023 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "fmt" + "strings" + "testing" + "time" +) + +func TestBootstrap(t *testing.T) { + // Bootstrap events exceed bootstrap messages limit + bsTracer := &bootstrapTracer{} + for i := 0; i < bootstrapMsgsLimit+10; i++ { + bsTracer.Record(fmt.Sprintf("msg-%d", i)) + } + + traceInfos := bsTracer.Events() + if len(traceInfos) != bootstrapMsgsLimit { + t.Fatalf("Expected length of events %d but got %d", bootstrapMsgsLimit, len(traceInfos)) + } + + // Simulate the case where bootstrap events were updated a day ago + bsTracer.lastUpdate = time.Now().UTC().Add(-25 * time.Hour) + bsTracer.DropEvents() + if !bsTracer.Empty() { + t.Fatalf("Expected all bootstrap events to have been dropped, but found %d events", len(bsTracer.Events())) + } + + // Fewer than 4K bootstrap events + for i := 0; i < 10; i++ { + bsTracer.Record(fmt.Sprintf("msg-%d", i)) + } + events := bsTracer.Events() + if len(events) != 10 { + t.Fatalf("Expected length of events %d but got %d", 10, len(events)) + } + for i, traceInfo := range bsTracer.Events() { + msg := fmt.Sprintf("msg-%d", i) + if !strings.HasSuffix(traceInfo.Message, msg) { + t.Fatalf("Expected %s but got %s", msg, traceInfo.Message) + } + } +} diff --git a/cmd/peer-rest-server.go b/cmd/peer-rest-server.go index 3af2d399e..c9c7d1add 100644 --- a/cmd/peer-rest-server.go +++ b/cmd/peer-rest-server.go @@ -977,6 +977,12 @@ func (s *peerRESTServer) TraceHandler(w http.ResponseWriter, r *http.Request) { s.writeErrorResponse(w, err) return } + + // Publish bootstrap events that have already occurred before client could subscribe. + if traceOpts.TraceTypes().Contains(madmin.TraceBootstrap) { + go globalBootstrapTracer.Publish(r.Context(), globalTrace) + } + keepAliveTicker := time.NewTicker(500 * time.Millisecond) defer keepAliveTicker.Stop() diff --git a/cmd/server-main.go b/cmd/server-main.go index cdc050d49..46f598f04 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -352,6 +352,8 @@ func configRetriableErrors(err error) bool { } func bootstrapTrace(msg string) { + globalBootstrapTracer.Record(msg) + if globalTrace.NumSubscribers(madmin.TraceBootstrap) == 0 { return }