Add large bucket support for erasure coded backend (#5160)

This PR implements an object layer which
combines input erasure sets of XL layers
into a unified namespace.

This object layer extends the existing
erasure coded implementation, it is assumed
in this design that providing > 16 disks is
a static configuration as well i.e if you started
the setup with 32 disks with 4 sets 8 disks per
pack then you would need to provide 4 sets always.

Some design details and restrictions:

- Objects are distributed using consistent ordering
  to a unique erasure coded layer.
- Each pack has its own dsync so locks are synchronized
  properly at pack (erasure layer).
- Each pack still has a maximum of 16 disks
  requirement, you can start with multiple
  such sets statically.
- Static sets set of disks and cannot be
  changed, there is no elastic expansion allowed.
- Static sets set of disks and cannot be
  changed, there is no elastic removal allowed.
- ListObjects() across sets can be noticeably
  slower since List happens on all servers,
  and is merged at this sets layer.

Fixes #5465
Fixes #5464
Fixes #5461
Fixes #5460
Fixes #5459
Fixes #5458
Fixes #5460
Fixes #5488
Fixes #5489
Fixes #5497
Fixes #5496
This commit is contained in:
Harshavardhana
2018-02-15 17:45:57 -08:00
committed by kannappanr
parent dd80256151
commit fb96779a8a
82 changed files with 5046 additions and 4771 deletions

View File

@@ -35,11 +35,6 @@ var bucketMetadataOpIgnoredErrs = append(bucketOpIgnoredErrs, errVolumeNotFound)
// MakeBucket - make a bucket.
func (xl xlObjects) MakeBucketWithLocation(bucket, location string) error {
bucketLock := xl.nsMutex.NewNSLock(bucket, "")
if err := bucketLock.GetLock(globalObjectTimeout); err != nil {
return err
}
defer bucketLock.Unlock()
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return errors.Trace(BucketNameInvalid{Bucket: bucket})
@@ -49,10 +44,10 @@ func (xl xlObjects) MakeBucketWithLocation(bucket, location string) error {
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var dErrs = make([]error, len(xl.storageDisks))
var dErrs = make([]error, len(xl.getDisks()))
// Make a volume entry on all underlying storage disks.
for index, disk := range xl.storageDisks {
for index, disk := range xl.getDisks() {
if disk == nil {
dErrs[index] = errors.Trace(errDiskNotFound)
continue
@@ -71,11 +66,11 @@ func (xl xlObjects) MakeBucketWithLocation(bucket, location string) error {
// Wait for all make vol to finish.
wg.Wait()
writeQuorum := len(xl.storageDisks)/2 + 1
writeQuorum := len(xl.getDisks())/2 + 1
err := reduceWriteQuorumErrs(dErrs, bucketOpIgnoredErrs, writeQuorum)
if errors.Cause(err) == errXLWriteQuorum {
// Purge successfully created buckets if we don't have writeQuorum.
undoMakeBucket(xl.storageDisks, bucket)
undoMakeBucket(xl.getDisks(), bucket)
}
return toObjectErr(err, bucket)
}
@@ -84,7 +79,7 @@ func (xl xlObjects) undoDeleteBucket(bucket string) {
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Undo previous make bucket entry on all underlying storage disks.
for index, disk := range xl.storageDisks {
for index, disk := range xl.getDisks() {
if disk == nil {
continue
}
@@ -150,7 +145,7 @@ func (xl xlObjects) getBucketInfo(bucketName string) (bucketInfo BucketInfo, err
// reduce to one error based on read quorum.
// `nil` is deliberately passed for ignoredErrs
// because these errors were already ignored.
readQuorum := len(xl.storageDisks) / 2
readQuorum := len(xl.getDisks()) / 2
return BucketInfo{}, reduceReadQuorumErrs(bucketErrs, nil, readQuorum)
}
@@ -240,10 +235,10 @@ func (xl xlObjects) DeleteBucket(bucket string) error {
// Collect if all disks report volume not found.
var wg = &sync.WaitGroup{}
var dErrs = make([]error, len(xl.storageDisks))
var dErrs = make([]error, len(xl.getDisks()))
// Remove a volume entry on all underlying storage disks.
for index, disk := range xl.storageDisks {
for index, disk := range xl.getDisks() {
if disk == nil {
dErrs[index] = errors.Trace(errDiskNotFound)
continue
@@ -273,7 +268,8 @@ func (xl xlObjects) DeleteBucket(bucket string) error {
// Wait for all the delete vols to finish.
wg.Wait()
writeQuorum := len(xl.storageDisks)/2 + 1
writeQuorum := len(xl.getDisks())/2 + 1
err := reduceWriteQuorumErrs(dErrs, bucketOpIgnoredErrs, writeQuorum)
if errors.Cause(err) == errXLWriteQuorum {
xl.undoDeleteBucket(bucket)
@@ -282,9 +278,6 @@ func (xl xlObjects) DeleteBucket(bucket string) error {
return toObjectErr(err, bucket)
}
// Delete all bucket metadata.
deleteBucketMetadata(bucket, xl)
return nil
}