fix: HealBucket regression for empty buckets, simplify it (#18815)

This commit is contained in:
Harshavardhana 2024-01-17 15:19:09 -08:00 committed by GitHub
parent 479940b7d0
commit 9588978028
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 93 additions and 94 deletions

View File

@ -1931,76 +1931,12 @@ func (z *erasureServerPools) HealFormat(ctx context.Context, dryRun bool) (madmi
} }
func (z *erasureServerPools) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) { func (z *erasureServerPools) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) {
r := madmin.HealResultItem{
Type: madmin.HealItemBucket,
Bucket: bucket,
}
// Attempt heal on the bucket metadata, ignore any failures // Attempt heal on the bucket metadata, ignore any failures
hopts := opts hopts := opts
hopts.Recreate = false hopts.Recreate = false
defer z.HealObject(ctx, minioMetaBucket, pathJoin(bucketMetaPrefix, bucket, bucketMetadataFile), "", hopts) defer z.HealObject(ctx, minioMetaBucket, pathJoin(bucketMetaPrefix, bucket, bucketMetadataFile), "", hopts)
type DiskStat struct { return z.s3Peer.HealBucket(ctx, bucket, opts)
VolInfos []VolInfo
Errs []error
}
for _, pool := range z.serverPools {
// map of node wise disk stats
diskStats := make(map[string]DiskStat)
for _, set := range pool.sets {
for _, disk := range set.getDisks() {
if disk == OfflineDisk {
continue
}
vi, err := disk.StatVol(ctx, bucket)
hostName := disk.Hostname()
if disk.IsLocal() {
hostName = "local"
}
ds, ok := diskStats[hostName]
if !ok {
newds := DiskStat{
VolInfos: []VolInfo{vi},
Errs: []error{err},
}
diskStats[hostName] = newds
} else {
ds.VolInfos = append(ds.VolInfos, vi)
ds.Errs = append(ds.Errs, err)
diskStats[hostName] = ds
}
}
}
nodeCount := len(diskStats)
bktNotFoundCount := 0
for _, ds := range diskStats {
if isAllBucketsNotFound(ds.Errs) {
bktNotFoundCount++
}
}
// if the bucket if not found on more than hslf the no of nodes, its dangling
if bktNotFoundCount > nodeCount/2 {
opts.Remove = true
} else {
opts.Recreate = true
}
result, err := z.s3Peer.HealBucket(ctx, bucket, opts)
if err != nil {
if _, ok := err.(BucketNotFound); ok {
continue
}
return result, err
}
r.DiskCount += result.DiskCount
r.SetCount += result.SetCount
r.Before.Drives = append(r.Before.Drives, result.Before.Drives...)
r.After.Drives = append(r.After.Drives, result.After.Drives...)
}
return r, nil
} }
// Walk a bucket, optionally prefix recursively, until we have returned // Walk a bucket, optionally prefix recursively, until we have returned

View File

@ -135,6 +135,35 @@ func NewS3PeerSys(endpoints EndpointServerPools) *S3PeerSys {
func (sys *S3PeerSys) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) { func (sys *S3PeerSys) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) {
g := errgroup.WithNErrs(len(sys.peerClients)) g := errgroup.WithNErrs(len(sys.peerClients))
for idx, client := range sys.peerClients {
idx := idx
client := client
g.Go(func() error {
if client == nil {
return errPeerOffline
}
_, err := client.GetBucketInfo(ctx, bucket, BucketOptions{})
return err
}, idx)
}
errs := g.Wait()
var poolErrs []error
for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ {
perPoolErrs := make([]error, 0, len(sys.peerClients))
for i, client := range sys.peerClients {
if slices.Contains(client.GetPools(), poolIdx) {
perPoolErrs = append(perPoolErrs, errs[i])
}
}
quorum := len(perPoolErrs) / 2
poolErrs = append(poolErrs, reduceWriteQuorumErrs(ctx, perPoolErrs, bucketOpIgnoredErrs, quorum))
}
opts.Remove = isAllBucketsNotFound(poolErrs)
opts.Recreate = !opts.Remove
healBucketResults := make([]madmin.HealResultItem, len(sys.peerClients)) healBucketResults := make([]madmin.HealResultItem, len(sys.peerClients))
for idx, client := range sys.peerClients { for idx, client := range sys.peerClients {
idx := idx idx := idx
@ -152,7 +181,7 @@ func (sys *S3PeerSys) HealBucket(ctx context.Context, bucket string, opts madmin
}, idx) }, idx)
} }
errs := g.Wait() errs = g.Wait()
for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ { for poolIdx := 0; poolIdx < sys.poolsCount; poolIdx++ {
perPoolErrs := make([]error, 0, len(sys.peerClients)) perPoolErrs := make([]error, 0, len(sys.peerClients))

View File

@ -21,7 +21,7 @@ export MINIO_KMS_AUTO_ENCRYPTION=on
export MINIO_KMS_SECRET_KEY=my-minio-key:OSMM+vkKUTCvQs9YL/CVMIMt43HFhkUpqJxTmGl6rYw= export MINIO_KMS_SECRET_KEY=my-minio-key:OSMM+vkKUTCvQs9YL/CVMIMt43HFhkUpqJxTmGl6rYw=
export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9000/" export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9000/"
(minio server /tmp/xl/{1...10}/disk{0...1} 2>&1 >/dev/null) & (minio server http://localhost:9000/tmp/xl/{1...10}/disk{0...1} 2>&1 >/dev/null) &
pid=$! pid=$!
sleep 30 sleep 30
@ -52,8 +52,11 @@ policy_count=$(./mc admin policy list myminio/ | wc -l)
kill $pid kill $pid
(minio server /tmp/xl/{1...10}/disk{0...1} /tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded.log) & (minio server http://localhost:9000/tmp/xl/{1...10}/disk{0...1} http://localhost:9001/tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded_1.log) &
pid=$! pid_1=$!
(minio server --address ":9001" http://localhost:9000/tmp/xl/{1...10}/disk{0...1} http://localhost:9001/tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded_2.log) &
pid_2=$!
sleep 30 sleep 30
@ -82,20 +85,25 @@ fi
./mc ls -r myminio/versioned/ >expanded_ns.txt ./mc ls -r myminio/versioned/ >expanded_ns.txt
./mc ls -r --versions myminio/versioned/ >expanded_ns_versions.txt ./mc ls -r --versions myminio/versioned/ >expanded_ns_versions.txt
./mc admin decom start myminio/ /tmp/xl/{1...10}/disk{0...1} ./mc admin decom start myminio/ http://localhost:9000/tmp/xl/{1...10}/disk{0...1}
until $(./mc admin decom status myminio/ | grep -q Complete); do until $(./mc admin decom status myminio/ | grep -q Complete); do
echo "waiting for decom to finish..." echo "waiting for decom to finish..."
sleep 1 sleep 1
done done
kill $pid kill $pid_1
kill $pid_2
(minio server /tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/removed.log) & sleep 5
(minio server --address ":9001" http://localhost:9001/tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/removed.log) &
pid=$! pid=$!
sleep 30 sleep 30
export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9001/"
decom_user_count=$(./mc admin user list myminio/ | wc -l) decom_user_count=$(./mc admin user list myminio/ | wc -l)
decom_policy_count=$(./mc admin policy list myminio/ | wc -l) decom_policy_count=$(./mc admin policy list myminio/ | wc -l)

View File

@ -16,7 +16,7 @@ export CI=true
export MINIO_KMS_SECRET_KEY=my-minio-key:OSMM+vkKUTCvQs9YL/CVMIMt43HFhkUpqJxTmGl6rYw= export MINIO_KMS_SECRET_KEY=my-minio-key:OSMM+vkKUTCvQs9YL/CVMIMt43HFhkUpqJxTmGl6rYw=
export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9000/" export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9000/"
(minio server /tmp/xl/{1...10}/disk{0...1} 2>&1 >/dev/null) & (minio server http://localhost:9000/tmp/xl/{1...10}/disk{0...1} 2>&1 >/dev/null) &
pid=$! pid=$!
sleep 30 sleep 30
@ -49,8 +49,11 @@ policy_count=$(./mc admin policy list myminio/ | wc -l)
kill $pid kill $pid
(minio server /tmp/xl/{1...10}/disk{0...1} /tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded.log) & (minio server http://localhost:9000/tmp/xl/{1...10}/disk{0...1} http://localhost:9001/tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded_1.log) &
pid=$! pid_1=$!
(minio server --address ":9001" http://localhost:9000/tmp/xl/{1...10}/disk{0...1} http://localhost:9001/tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded_2.log) &
pid_2=$!
sleep 30 sleep 30
@ -86,20 +89,25 @@ fi
./mc ls -r myminio/versioned/ >expanded_ns.txt ./mc ls -r myminio/versioned/ >expanded_ns.txt
./mc ls -r --versions myminio/versioned/ >expanded_ns_versions.txt ./mc ls -r --versions myminio/versioned/ >expanded_ns_versions.txt
./mc admin decom start myminio/ /tmp/xl/{1...10}/disk{0...1} ./mc admin decom start myminio/ http://localhost:9000/tmp/xl/{1...10}/disk{0...1}
until $(./mc admin decom status myminio/ | grep -q Complete); do until $(./mc admin decom status myminio/ | grep -q Complete); do
echo "waiting for decom to finish..." echo "waiting for decom to finish..."
sleep 1 sleep 1
done done
kill $pid kill $pid_1
kill $pid_2
(minio server /tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/removed.log) & sleep 5
(minio server --address ":9001" http://localhost:9001/tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/removed.log) &
pid=$! pid=$!
sleep 30 sleep 30
export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9001/"
decom_user_count=$(./mc admin user list myminio/ | wc -l) decom_user_count=$(./mc admin user list myminio/ | wc -l)
decom_policy_count=$(./mc admin policy list myminio/ | wc -l) decom_policy_count=$(./mc admin policy list myminio/ | wc -l)

View File

@ -16,7 +16,7 @@ export CI=true
export MINIO_KMS_AUTO_ENCRYPTION=on export MINIO_KMS_AUTO_ENCRYPTION=on
export MINIO_KMS_SECRET_KEY=my-minio-key:OSMM+vkKUTCvQs9YL/CVMIMt43HFhkUpqJxTmGl6rYw= export MINIO_KMS_SECRET_KEY=my-minio-key:OSMM+vkKUTCvQs9YL/CVMIMt43HFhkUpqJxTmGl6rYw=
(minio server /tmp/xl/{1...10}/disk{0...1} 2>&1 >/dev/null) & (minio server http://localhost:9000/tmp/xl/{1...10}/disk{0...1} 2>&1 >/dev/null) &
pid=$! pid=$!
sleep 30 sleep 30
@ -48,8 +48,12 @@ user_count=$(./mc admin user list myminio/ | wc -l)
policy_count=$(./mc admin policy list myminio/ | wc -l) policy_count=$(./mc admin policy list myminio/ | wc -l)
kill $pid kill $pid
(minio server /tmp/xl/{1...10}/disk{0...1} /tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded.log) &
pid=$! (minio server http://localhost:9000/tmp/xl/{1...10}/disk{0...1} http://localhost:9001/tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded_1.log) &
pid_1=$!
(minio server --address ":9001" http://localhost:9000/tmp/xl/{1...10}/disk{0...1} http://localhost:9001/tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded_2.log) &
pid_2=$!
sleep 30 sleep 30
@ -78,20 +82,25 @@ fi
./mc ls -r myminio/versioned/ >expanded_ns.txt ./mc ls -r myminio/versioned/ >expanded_ns.txt
./mc ls -r --versions myminio/versioned/ >expanded_ns_versions.txt ./mc ls -r --versions myminio/versioned/ >expanded_ns_versions.txt
./mc admin decom start myminio/ /tmp/xl/{1...10}/disk{0...1} ./mc admin decom start myminio/ http://localhost:9000/tmp/xl/{1...10}/disk{0...1}
until $(./mc admin decom status myminio/ | grep -q Complete); do until $(./mc admin decom status myminio/ | grep -q Complete); do
echo "waiting for decom to finish..." echo "waiting for decom to finish..."
sleep 1 sleep 1
done done
kill $pid kill $pid_1
kill $pid_2
(minio server /tmp/xl/{11...30}/disk{0...3} 2>&1 >/dev/null) & sleep 5
(minio server --address ":9001" http://localhost:9001/tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/removed.log) &
pid=$! pid=$!
sleep 30 sleep 30
export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9001/"
decom_user_count=$(./mc admin user list myminio/ | wc -l) decom_user_count=$(./mc admin user list myminio/ | wc -l)
decom_policy_count=$(./mc admin policy list myminio/ | wc -l) decom_policy_count=$(./mc admin policy list myminio/ | wc -l)

View File

@ -16,7 +16,7 @@ fi
export CI=true export CI=true
export MINIO_SCANNER_SPEED=fastest export MINIO_SCANNER_SPEED=fastest
(minio server /tmp/xl/{1...10}/disk{0...1} 2>&1 >/tmp/decom.log) & (minio server http://localhost:9000/tmp/xl/{1...10}/disk{0...1} 2>&1 >/tmp/decom.log) &
pid=$! pid=$!
sleep 30 sleep 30
@ -48,16 +48,16 @@ user_count=$(./mc admin user list myminio/ | wc -l)
policy_count=$(./mc admin policy list myminio/ | wc -l) policy_count=$(./mc admin policy list myminio/ | wc -l)
## create a warm tier instance ## create a warm tier instance
(minio server /tmp/xltier/{1...4}/disk{0...1} --address :9001 2>&1 >/dev/null) & (minio server /tmp/xltier/{1...4}/disk{0...1} --address :9002 2>&1 >/dev/null) &
sleep 30 sleep 30
export MC_HOST_mytier="http://minioadmin:minioadmin@localhost:9001/" export MC_HOST_mytier="http://minioadmin:minioadmin@localhost:9002/"
./mc mb -l myminio/bucket2 ./mc mb -l myminio/bucket2
./mc mb -l mytier/tiered ./mc mb -l mytier/tiered
## create a tier and set up ilm policy to tier immediately ## create a tier and set up ilm policy to tier immediately
./mc admin tier add minio myminio TIER1 --endpoint http://localhost:9001 --access-key minioadmin --secret-key minioadmin --bucket tiered --prefix prefix5/ ./mc admin tier add minio myminio TIER1 --endpoint http://localhost:9002 --access-key minioadmin --secret-key minioadmin --bucket tiered --prefix prefix5/
./mc ilm add myminio/bucket2 --transition-days 0 --transition-tier TIER1 --transition-days 0 ./mc ilm add myminio/bucket2 --transition-days 0 --transition-tier TIER1 --transition-days 0
## mirror some content to bucket2 and capture versions tiered ## mirror some content to bucket2 and capture versions tiered
@ -70,8 +70,12 @@ sleep 30
./mc ls -r --versions mytier/tiered/ >tiered_ns_versions.txt ./mc ls -r --versions mytier/tiered/ >tiered_ns_versions.txt
kill $pid kill $pid
(minio server /tmp/xl/{1...10}/disk{0...1} /tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded.log) &
pid=$! (minio server http://localhost:9000/tmp/xl/{1...10}/disk{0...1} http://localhost:9001/tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded_1.log) &
pid_1=$!
(minio server --address ":9001" http://localhost:9000/tmp/xl/{1...10}/disk{0...1} http://localhost:9001/tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded_2.log) &
pid_2=$!
sleep 30 sleep 30
@ -100,25 +104,30 @@ fi
./mc ls -r myminio/versioned/ >expanded_ns.txt ./mc ls -r myminio/versioned/ >expanded_ns.txt
./mc ls -r --versions myminio/versioned/ >expanded_ns_versions.txt ./mc ls -r --versions myminio/versioned/ >expanded_ns_versions.txt
./mc admin decom start myminio/ /tmp/xl/{1...10}/disk{0...1} ./mc admin decom start myminio/ http://localhost:9000/tmp/xl/{1...10}/disk{0...1}
count=0 count=0
until $(./mc admin decom status myminio/ | grep -q Complete); do until $(./mc admin decom status myminio/ | grep -q Complete); do
echo "waiting for decom to finish..." echo "waiting for decom to finish..."
count=$((count + 1)) count=$((count + 1))
if [ ${count} -eq 120 ]; then if [ ${count} -eq 120 ]; then
./mc cat /tmp/expanded.log ./mc cat /tmp/expanded_*.log
fi fi
sleep 1 sleep 1
done done
kill $pid kill $pid_1
kill $pid_2
(minio server /tmp/xl/{11...30}/disk{0...3} 2>&1 >/dev/null) & sleep 5
(minio server --address ":9001" http://localhost:9001/tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/removed.log) &
pid=$! pid=$!
sleep 30 sleep 30
export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9001/"
decom_user_count=$(./mc admin user list myminio/ | wc -l) decom_user_count=$(./mc admin user list myminio/ | wc -l)
decom_policy_count=$(./mc admin policy list myminio/ | wc -l) decom_policy_count=$(./mc admin policy list myminio/ | wc -l)