avoid ListBuckets returning quorum errors when node is down (#10555)

Also, revamp the way ListBuckets work make few portions
of the healing logic parallel

- walk objects for healing disks in parallel
- collect the list of buckets in parallel across drives
- provide consistent view for listBuckets()
This commit is contained in:
Harshavardhana
2020-09-24 09:53:38 -07:00
committed by GitHub
parent d778d034e7
commit ca989eb0b3
8 changed files with 78 additions and 126 deletions

View File

@@ -155,34 +155,37 @@ func healBucket(ctx context.Context, storageDisks []StorageAPI, storageEndpoints
// listAllBuckets lists all buckets from all disks. It also
// returns the occurrence of each buckets in all disks
func listAllBuckets(storageDisks []StorageAPI, healBuckets map[string]VolInfo) (err error) {
for _, disk := range storageDisks {
if disk == nil {
continue
}
var volsInfo []VolInfo
volsInfo, err = disk.ListVols(context.TODO())
if err != nil {
if IsErrIgnored(err, bucketMetadataOpIgnoredErrs...) {
continue
func listAllBuckets(ctx context.Context, storageDisks []StorageAPI, healBuckets map[string]VolInfo) error {
g := errgroup.WithNErrs(len(storageDisks))
var mu sync.Mutex
for index := range storageDisks {
index := index
g.Go(func() error {
if storageDisks[index] == nil {
// we ignore disk not found errors
return nil
}
return err
}
for _, volInfo := range volsInfo {
// StorageAPI can send volume names which are
// incompatible with buckets - these are
// skipped, like the meta-bucket.
if isReservedOrInvalidBucket(volInfo.Name, false) {
continue
volsInfo, err := storageDisks[index].ListVols(ctx)
if err != nil {
return err
}
// always save unique buckets across drives.
if _, ok := healBuckets[volInfo.Name]; !ok {
healBuckets[volInfo.Name] = volInfo
for _, volInfo := range volsInfo {
// StorageAPI can send volume names which are
// incompatible with buckets - these are
// skipped, like the meta-bucket.
if isReservedOrInvalidBucket(volInfo.Name, false) {
continue
}
mu.Lock()
if _, ok := healBuckets[volInfo.Name]; !ok {
healBuckets[volInfo.Name] = volInfo
}
mu.Unlock()
}
}
return nil
}, index)
}
return nil
return reduceReadQuorumErrs(ctx, g.Wait(), bucketMetadataOpIgnoredErrs, len(storageDisks)/2)
}
// Only heal on disks where we are sure that healing is needed. We can expand