do not panic on rebalance during server restarts (#19563)

This PR makes a feasible approach to handle all the scenarios
that we must face to avoid returning "panic."

Instead, we must return "errServerNotInitialized" when a
bucketMetadataSys.Get() is called, allowing the caller to
retry their operation and wait.

Bonus fix the way data-usage-cache stores the object.
Instead of storing usage-cache.bin with the bucket as
`.minio.sys/buckets`, the `buckets` must be relative
to the bucket `.minio.sys` as part of the object name.

Otherwise, there is no way to decommission entries at
`.minio.sys/buckets` and their final erasure set positions.

A bucket must never have a `/` in it. Adds code to read()
from existing data-usage.bin upon upgrade.
This commit is contained in:
Harshavardhana
2024-04-22 10:49:30 -07:00
committed by GitHub
parent 6bfff7532e
commit 95c65f4e8f
14 changed files with 418 additions and 230 deletions

View File

@@ -32,6 +32,10 @@ import (
"github.com/dustin/go-humanize"
"github.com/lithammer/shortuuid/v4"
"github.com/minio/madmin-go/v3"
"github.com/minio/minio/internal/bucket/lifecycle"
objectlock "github.com/minio/minio/internal/bucket/object/lock"
"github.com/minio/minio/internal/bucket/replication"
"github.com/minio/minio/internal/bucket/versioning"
"github.com/minio/minio/internal/hash"
xioutil "github.com/minio/minio/internal/ioutil"
"github.com/minio/minio/internal/logger"
@@ -448,9 +452,11 @@ func (z *erasureServerPools) rebalanceBuckets(ctx context.Context, poolIdx int)
}
stopFn := globalRebalanceMetrics.log(rebalanceMetricRebalanceBucket, poolIdx, bucket)
err = z.rebalanceBucket(ctx, bucket, poolIdx)
if err != nil {
if err = z.rebalanceBucket(ctx, bucket, poolIdx); err != nil {
stopFn(err)
if errors.Is(err, errServerNotInitialized) || errors.Is(err, errBucketMetadataNotInitialized) {
continue
}
rebalanceLogIf(ctx, err)
return
}
@@ -521,14 +527,36 @@ func (set *erasureObjects) listObjectsToRebalance(ctx context.Context, bucketNam
}
// rebalanceBucket rebalances objects under bucket in poolIdx pool
func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string, poolIdx int) error {
func (z *erasureServerPools) rebalanceBucket(ctx context.Context, bucket string, poolIdx int) (err error) {
ctx = logger.SetReqInfo(ctx, &logger.ReqInfo{})
vc, _ := globalBucketVersioningSys.Get(bucket)
// Check if the current bucket has a configured lifecycle policy
lc, _ := globalLifecycleSys.Get(bucket)
// Check if bucket is object locked.
lr, _ := globalBucketObjectLockSys.Get(bucket)
rcfg, _ := getReplicationConfig(ctx, bucket)
var vc *versioning.Versioning
var lc *lifecycle.Lifecycle
var lr objectlock.Retention
var rcfg *replication.Config
if bucket != minioMetaBucket {
vc, err = globalBucketVersioningSys.Get(bucket)
if err != nil {
return err
}
// Check if the current bucket has a configured lifecycle policy
lc, err = globalLifecycleSys.Get(bucket)
if err != nil && !errors.Is(err, BucketLifecycleNotFound{Bucket: bucket}) {
return err
}
// Check if bucket is object locked.
lr, err = globalBucketObjectLockSys.Get(bucket)
if err != nil {
return err
}
rcfg, err = getReplicationConfig(ctx, bucket)
if err != nil {
return err
}
}
pool := z.serverPools[poolIdx]