avoid too much auditing during decom/rebalance make it more robust (#19174)

there can be a sudden spike in tiny allocations, due to too much auditing being done, also don't hang on the ``` h.logCh <- entry ``` after initializing workers if you do not have a way to dequeue for some reason.
2025-11-07 12:52:58 -05:00 · 2024-03-06 03:43:16 -08:00
parent c26b8d4eb8
commit 74ccee6619
10 changed files with 202 additions and 100 deletions
--- a/cmd/erasure-server-pool-rebalance.go
+++ b/cmd/erasure-server-pool-rebalance.go
@@ -26,17 +26,17 @@ import (
 	"math"
 	"math/rand"
 	"net/http"
-	"strconv"
 	"strings"
-	"sync"
 	"time"

+	"github.com/dustin/go-humanize"
 	"github.com/lithammer/shortuuid/v4"
 	"github.com/minio/madmin-go/v3"
 	"github.com/minio/minio/internal/hash"
 	xioutil "github.com/minio/minio/internal/ioutil"
 	"github.com/minio/minio/internal/logger"
 	"github.com/minio/pkg/v2/env"
+	"github.com/minio/pkg/v2/workers"
 )

 //go:generate msgp -file $GOFILE -unexported
@@ -485,6 +485,41 @@ func (z *erasureServerPools) checkIfRebalanceDone(poolIdx int) bool {
 	return false
 }

+func (set *erasureObjects) listObjectsToRebalance(ctx context.Context, bucketName string, fn func(entry metaCacheEntry)) error {
+	disks, _ := set.getOnlineDisksWithHealing(false)
+	if len(disks) == 0 {
+		return fmt.Errorf("no online drives found for set with endpoints %s", set.getEndpoints())
+	}
+
+	// However many we ask, versions must exist on ~50%
+	listingQuorum := (set.setDriveCount + 1) / 2
+
+	// How to resolve partial results.
+	resolver := metadataResolutionParams{
+		dirQuorum: listingQuorum, // make sure to capture all quorum ratios
+		objQuorum: listingQuorum, // make sure to capture all quorum ratios
+		bucket:    bucketName,
+	}
+
+	err := listPathRaw(ctx, listPathRawOptions{
+		disks:          disks,
+		bucket:         bucketName,
+		recursive:      true,
+		forwardTo:      "",
+		minDisks:       listingQuorum,
+		reportNotFound: false,
+		agreed:         fn,
+		partial: func(entries metaCacheEntries, _ []error) {
+			entry, ok := entries.resolve(&resolver)
+			if ok {
+				fn(*entry)
+			}
+		},
+		finished: nil,
+	})
+	return err
+}
+
 // rebalanceBucket rebalances objects under bucket in poolIdx pool
 func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string, poolIdx int) error {
 	ctx = logger.SetReqInfo(ctx, &logger.ReqInfo{})
@@ -496,23 +531,25 @@ func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string,
 	rcfg, _ := getReplicationConfig(ctx, bucket)

 	pool := z.serverPools[poolIdx]
+
 	const envRebalanceWorkers = "_MINIO_REBALANCE_WORKERS"
-	wStr := env.Get(envRebalanceWorkers, strconv.Itoa(len(pool.sets)))
-	workerSize, err := strconv.Atoi(wStr)
+	workerSize, err := env.GetInt(envRebalanceWorkers, len(pool.sets))
 	if err != nil {
-		logger.LogIf(ctx, fmt.Errorf("invalid %s value: %s err: %v, defaulting to %d", envRebalanceWorkers, wStr, err, len(pool.sets)))
+		logger.LogIf(ctx, fmt.Errorf("invalid workers value err: %v, defaulting to %d", err, len(pool.sets)))
 		workerSize = len(pool.sets)
 	}
-	workers := make(chan struct{}, workerSize)
-	var wg sync.WaitGroup
-	for _, set := range pool.sets {
+
+	// Each decom worker needs one List() goroutine/worker
+	// add that many extra workers.
+	workerSize += len(pool.sets)
+
+	wk, err := workers.New(workerSize)
+	if err != nil {
+		return err
+	}
+
+	for setIdx, set := range pool.sets {
 		set := set
-		disks := set.getOnlineDisks()
-		if len(disks) == 0 {
-			logger.LogIf(ctx, fmt.Errorf("no online disks found for set with endpoints %s",
-				set.getEndpoints()))
-			continue
-		}

 		filterLifecycle := func(bucket, object string, fi FileInfo) bool {
 			if lc == nil {
@@ -531,10 +568,7 @@ func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string,
 		}

 		rebalanceEntry := func(entry metaCacheEntry) {
-			defer func() {
-				<-workers
-				wg.Done()
-			}()
+			defer wk.Give()

 			if entry.isDir() {
 				return
@@ -592,6 +626,7 @@ func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string,
 							DeleteReplication: version.ReplicationState,
 							DeleteMarker:      true, // make sure we create a delete marker
 							SkipRebalancing:   true, // make sure we skip the decommissioned pool
+							NoAuditLog:        true,
 						})
 					var failure bool
 					if err != nil && !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
@@ -603,6 +638,7 @@ func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string,
 						z.updatePoolStats(poolIdx, bucket, version)
 						rebalanced++
 					}
+					auditLogRebalance(ctx, "Rebalance:DeleteMarker", bucket, version.Name, versionID, err)
 					continue
 				}

@@ -619,6 +655,7 @@ func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string,
 							VersionID:    versionID,
 							NoDecryption: true,
 							NoLock:       true,
+							NoAuditLog:   true,
 						})
 					if isErrObjectNotFound(err) || isErrVersionNotFound(err) {
 						// object deleted by the application, nothing to do here we move on.
@@ -663,6 +700,7 @@ func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string,
 					ObjectOptions{
 						DeletePrefix:       true, // use prefix delete to delete all versions at once.
 						DeletePrefixObject: true, // use prefix delete on exact object (this is an optimization to avoid fan-out calls)
+						NoAuditLog:         true,
 					},
 				)
 				stopFn(err)
@@ -673,44 +711,24 @@ func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string,
 			}
 		}

-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-
-			listQuorum := (len(disks) + 1) / 2
-
-			// How to resolve partial results.
-			resolver := metadataResolutionParams{
-				dirQuorum: listQuorum, // make sure to capture all quorum ratios
-				objQuorum: listQuorum, // make sure to capture all quorum ratios
-				bucket:    bucket,
-			}
-			err := listPathRaw(ctx, listPathRawOptions{
-				disks:          disks,
-				bucket:         bucket,
-				recursive:      true,
-				forwardTo:      "",
-				minDisks:       listQuorum,
-				reportNotFound: false,
-				agreed: func(entry metaCacheEntry) {
-					workers <- struct{}{}
-					wg.Add(1)
+		wk.Take()
+		go func(setIdx int) {
+			defer wk.Give()
+			err := set.listObjectsToRebalance(ctx, bucket,
+				func(entry metaCacheEntry) {
+					wk.Take()
 					go rebalanceEntry(entry)
 				},
-				partial: func(entries metaCacheEntries, _ []error) {
-					entry, ok := entries.resolve(&resolver)
-					if ok {
-						workers <- struct{}{}
-						wg.Add(1)
-						go rebalanceEntry(*entry)
-					}
-				},
-				finished: nil,
-			})
-			logger.LogIf(ctx, err)
-		}()
+			)
+			if err == nil || errors.Is(err, context.Canceled) {
+				return
+			}
+			setN := humanize.Ordinal(setIdx + 1)
+			logger.LogOnceIf(ctx, fmt.Errorf("listing objects from %s set failed with %v", setN, err), "rebalance-listing-failed"+setN)
+		}(setIdx)
 	}
-	wg.Wait()
+
+	wk.Wait()
 	return nil
 }

@@ -783,11 +801,12 @@ func (z *erasureServerPools) rebalanceObject(ctx context.Context, bucket string,
 		res, err := z.NewMultipartUpload(ctx, bucket, oi.Name, ObjectOptions{
 			VersionID:   oi.VersionID,
 			UserDefined: oi.UserDefined,
+			NoAuditLog:  true,
 		})
 		if err != nil {
 			return fmt.Errorf("rebalanceObject: NewMultipartUpload() %w", err)
 		}
-		defer z.AbortMultipartUpload(ctx, bucket, oi.Name, res.UploadID, ObjectOptions{})
+		defer z.AbortMultipartUpload(ctx, bucket, oi.Name, res.UploadID, ObjectOptions{NoAuditLog: true})

 		parts := make([]CompletePart, len(oi.Parts))
 		for i, part := range oi.Parts {
@@ -803,6 +822,7 @@ func (z *erasureServerPools) rebalanceObject(ctx context.Context, bucket string,
 					IndexCB: func() []byte {
 						return part.Index // Preserve part Index to ensure decompression works.
 					},
+					NoAuditLog: true,
 				})
 			if err != nil {
 				return fmt.Errorf("rebalanceObject: PutObjectPart() %w", err)
@@ -815,6 +835,7 @@ func (z *erasureServerPools) rebalanceObject(ctx context.Context, bucket string,
 		_, err = z.CompleteMultipartUpload(ctx, bucket, oi.Name, res.UploadID, parts, ObjectOptions{
 			DataMovement: true,
 			MTime:        oi.ModTime,
+			NoAuditLog:   true,
 		})
 		if err != nil {
 			err = fmt.Errorf("rebalanceObject: CompleteMultipartUpload() %w", err)
@@ -840,6 +861,7 @@ func (z *erasureServerPools) rebalanceObject(ctx context.Context, bucket string,
 			IndexCB: func() []byte {
 				return oi.Parts[0].Index // Preserve part Index to ensure decompression works.
 			},
+			NoAuditLog: true,
 		})
 	if err != nil {
 		err = fmt.Errorf("rebalanceObject: PutObject() %w", err)