avoid too much auditing during decom/rebalance make it more robust (#19174)

there can be a sudden spike in tiny allocations,
due to too much auditing being done, also don't hang
on the

```
h.logCh <- entry
```

after initializing workers if you do not have a way to
dequeue for some reason.
This commit is contained in:
Harshavardhana
2024-03-06 03:43:16 -08:00
committed by GitHub
parent c26b8d4eb8
commit 74ccee6619
10 changed files with 202 additions and 100 deletions

View File

@@ -26,7 +26,6 @@ import (
"math/rand"
"net/http"
"sort"
"strconv"
"strings"
"time"
@@ -620,11 +619,12 @@ func (z *erasureServerPools) decommissionObject(ctx context.Context, bucket stri
res, err := z.NewMultipartUpload(ctx, bucket, objInfo.Name, ObjectOptions{
VersionID: objInfo.VersionID,
UserDefined: objInfo.UserDefined,
NoAuditLog: true,
})
if err != nil {
return fmt.Errorf("decommissionObject: NewMultipartUpload() %w", err)
}
defer z.AbortMultipartUpload(ctx, bucket, objInfo.Name, res.UploadID, ObjectOptions{})
defer z.AbortMultipartUpload(ctx, bucket, objInfo.Name, res.UploadID, ObjectOptions{NoAuditLog: true})
parts := make([]CompletePart, len(objInfo.Parts))
for i, part := range objInfo.Parts {
hr, err := hash.NewReader(ctx, io.LimitReader(gr, part.Size), part.Size, "", "", part.ActualSize)
@@ -639,6 +639,7 @@ func (z *erasureServerPools) decommissionObject(ctx context.Context, bucket stri
IndexCB: func() []byte {
return part.Index // Preserve part Index to ensure decompression works.
},
NoAuditLog: true,
})
if err != nil {
return fmt.Errorf("decommissionObject: PutObjectPart() %w", err)
@@ -655,6 +656,7 @@ func (z *erasureServerPools) decommissionObject(ctx context.Context, bucket stri
_, err = z.CompleteMultipartUpload(ctx, bucket, objInfo.Name, res.UploadID, parts, ObjectOptions{
DataMovement: true,
MTime: objInfo.ModTime,
NoAuditLog: true,
})
if err != nil {
err = fmt.Errorf("decommissionObject: CompleteMultipartUpload() %w", err)
@@ -680,6 +682,7 @@ func (z *erasureServerPools) decommissionObject(ctx context.Context, bucket stri
IndexCB: func() []byte {
return objInfo.Parts[0].Index // Preserve part Index to ensure decompression works.
},
NoAuditLog: true,
})
if err != nil {
err = fmt.Errorf("decommissionObject: PutObject() %w", err)
@@ -699,17 +702,18 @@ func (v versionsSorter) reverse() {
}
func (set *erasureObjects) listObjectsToDecommission(ctx context.Context, bi decomBucketInfo, fn func(entry metaCacheEntry)) error {
disks := set.getOnlineDisks()
disks, _ := set.getOnlineDisksWithHealing(false)
if len(disks) == 0 {
return fmt.Errorf("no online drives found for set with endpoints %s", set.getEndpoints())
}
listQuorum := (len(disks) + 1) / 2
// However many we ask, versions must exist on ~50%
listingQuorum := (set.setDriveCount + 1) / 2
// How to resolve partial results.
resolver := metadataResolutionParams{
dirQuorum: listQuorum, // make sure to capture all quorum ratios
objQuorum: listQuorum, // make sure to capture all quorum ratios
dirQuorum: listingQuorum, // make sure to capture all quorum ratios
objQuorum: listingQuorum, // make sure to capture all quorum ratios
bucket: bi.Name,
}
@@ -719,7 +723,7 @@ func (set *erasureObjects) listObjectsToDecommission(ctx context.Context, bi dec
path: bi.Prefix,
recursive: true,
forwardTo: "",
minDisks: listQuorum,
minDisks: listingQuorum,
reportNotFound: false,
agreed: fn,
partial: func(entries metaCacheEntries, _ []error) {
@@ -736,14 +740,15 @@ func (set *erasureObjects) listObjectsToDecommission(ctx context.Context, bi dec
func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool *erasureSets, bi decomBucketInfo) error {
ctx = logger.SetReqInfo(ctx, &logger.ReqInfo{})
wStr := env.Get("_MINIO_DECOMMISSION_WORKERS", strconv.Itoa(len(pool.sets)))
workerSize, err := strconv.Atoi(wStr)
const envDecomWorkers = "_MINIO_DECOMMISSION_WORKERS"
workerSize, err := env.GetInt(envDecomWorkers, len(pool.sets))
if err != nil {
return err
logger.LogIf(ctx, fmt.Errorf("invalid workers value err: %v, defaulting to %d", err, len(pool.sets)))
workerSize = len(pool.sets)
}
// each set get its own thread separate from the concurrent
// objects/versions being decommissioned.
// Each decom worker needs one List() goroutine/worker
// add that many extra workers.
workerSize += len(pool.sets)
wk, err := workers.New(workerSize)
@@ -839,6 +844,7 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool
DeleteReplication: version.ReplicationState,
DeleteMarker: true, // make sure we create a delete marker
SkipDecommissioned: true, // make sure we skip the decommissioned pool
NoAuditLog: true,
})
var failure bool
if err != nil {
@@ -858,6 +864,7 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool
// Success keep a count.
decommissioned++
}
auditLogDecom(ctx, "DecomCopyDeleteMarker", bi.Name, version.Name, versionID, err)
continue
}
@@ -888,6 +895,7 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool
VersionID: versionID,
NoDecryption: true,
NoLock: true,
NoAuditLog: true,
})
if isErrObjectNotFound(err) || isErrVersionNotFound(err) {
// object deleted by the application, nothing to do here we move on.
@@ -941,6 +949,7 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool
ObjectOptions{
DeletePrefix: true, // use prefix delete to delete all versions at once.
DeletePrefixObject: true, // use prefix delete on exact object (this is an optimization to avoid fan-out calls)
NoAuditLog: true,
},
)
stopFn(err)
@@ -965,6 +974,10 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool
// We will perpetually retry listing if it fails, since we cannot
// possibly give up in this matter
for {
if contextCanceled(ctx) {
break
}
err := set.listObjectsToDecommission(ctx, bi,
func(entry metaCacheEntry) {
wk.Take()