mirror of
https://github.com/minio/minio.git
synced 2025-11-07 21:02:58 -05:00
shuffle buckets randomly before being scanned (#17644)
this randomness is needed to avoid scanning the same buckets across different erasure sets, in the same order. allow random buckets to be scanned instead allowing a wider spread of ILM, replication checks. Additionally do not loop over twice to fill the channel, fill the channel regardless of having bucket new or old.
This commit is contained in:
@@ -277,31 +277,33 @@ func (er erasureObjects) getOnlineDisksWithHealing() (newDisks []StorageAPI, hea
|
||||
var wg sync.WaitGroup
|
||||
disks := er.getDisks()
|
||||
infos := make([]DiskInfo, len(disks))
|
||||
for _, i := range hashOrder(UTCNow().String(), len(disks)) {
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
for _, i := range r.Perm(len(disks)) {
|
||||
i := i
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
disk := disks[i-1]
|
||||
|
||||
disk := disks[i]
|
||||
if disk == nil {
|
||||
infos[i-1].Error = "nil drive"
|
||||
infos[i].Error = "offline drive"
|
||||
return
|
||||
}
|
||||
|
||||
di, err := disk.DiskInfo(context.Background())
|
||||
if err != nil {
|
||||
if err != nil || di.Healing {
|
||||
// - Do not consume disks which are not reachable
|
||||
// unformatted or simply not accessible for some reason.
|
||||
//
|
||||
//
|
||||
// - Future: skip busy disks
|
||||
infos[i-1].Error = err.Error()
|
||||
if err != nil {
|
||||
infos[i].Error = err.Error()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
infos[i-1] = di
|
||||
infos[i] = di
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
@@ -373,23 +375,30 @@ func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, wa
|
||||
|
||||
// Put all buckets into channel.
|
||||
bucketCh := make(chan BucketInfo, len(buckets))
|
||||
|
||||
// Shuffle buckets to ensure total randomness of buckets, being scanned.
|
||||
// Otherwise same set of buckets get scanned across erasure sets always.
|
||||
// at any given point in time. This allows different buckets to be scanned
|
||||
// in different order per erasure set, this wider spread is needed when
|
||||
// there are lots of buckets with different order of objects in them.
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
permutes := r.Perm(len(buckets))
|
||||
// Add new buckets first
|
||||
for _, b := range buckets {
|
||||
if oldCache.find(b.Name) == nil {
|
||||
for _, idx := range permutes {
|
||||
b := buckets[idx]
|
||||
if e := oldCache.find(b.Name); e == nil {
|
||||
bucketCh <- b
|
||||
}
|
||||
}
|
||||
|
||||
// Add existing buckets.
|
||||
for _, b := range buckets {
|
||||
e := oldCache.find(b.Name)
|
||||
if e != nil {
|
||||
for _, idx := range permutes {
|
||||
b := buckets[idx]
|
||||
if e := oldCache.find(b.Name); e != nil {
|
||||
cache.replace(b.Name, dataUsageRoot, *e)
|
||||
bucketCh <- b
|
||||
}
|
||||
}
|
||||
|
||||
close(bucketCh)
|
||||
|
||||
bucketResults := make(chan dataUsageEntryInfo, len(disks))
|
||||
|
||||
// Start async collector/saver.
|
||||
@@ -428,11 +437,6 @@ func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, wa
|
||||
}
|
||||
}()
|
||||
|
||||
// Shuffle disks to ensure a total randomness of bucket/disk association to ensure
|
||||
// that objects that are not present in all disks are accounted and ILM applied.
|
||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
r.Shuffle(len(disks), func(i, j int) { disks[i], disks[j] = disks[j], disks[i] })
|
||||
|
||||
// Restrict parallelism for disk usage scanner
|
||||
// upto GOMAXPROCS if GOMAXPROCS is < len(disks)
|
||||
maxProcs := runtime.GOMAXPROCS(0)
|
||||
|
||||
Reference in New Issue
Block a user