Add large bucket support for erasure coded backend (#5160)

This PR implements an object layer which
combines input erasure sets of XL layers
into a unified namespace.

This object layer extends the existing
erasure coded implementation, it is assumed
in this design that providing > 16 disks is
a static configuration as well i.e if you started
the setup with 32 disks with 4 sets 8 disks per
pack then you would need to provide 4 sets always.

Some design details and restrictions:

- Objects are distributed using consistent ordering
  to a unique erasure coded layer.
- Each pack has its own dsync so locks are synchronized
  properly at pack (erasure layer).
- Each pack still has a maximum of 16 disks
  requirement, you can start with multiple
  such sets statically.
- Static sets set of disks and cannot be
  changed, there is no elastic expansion allowed.
- Static sets set of disks and cannot be
  changed, there is no elastic removal allowed.
- ListObjects() across sets can be noticeably
  slower since List happens on all servers,
  and is merged at this sets layer.

Fixes #5465
Fixes #5464
Fixes #5461
Fixes #5460
Fixes #5459
Fixes #5458
Fixes #5460
Fixes #5488
Fixes #5489
Fixes #5497
Fixes #5496
This commit is contained in:
Harshavardhana
2018-02-15 17:45:57 -08:00
committed by kannappanr
parent dd80256151
commit fb96779a8a
82 changed files with 5046 additions and 4771 deletions

View File

@@ -159,69 +159,6 @@ func newStorageAPI(endpoint Endpoint) (storage StorageAPI, err error) {
return newStorageRPC(endpoint), nil
}
var initMetaVolIgnoredErrs = append(baseIgnoredErrs, errVolumeExists)
// Initializes meta volume on all input storage disks.
func initMetaVolume(storageDisks []StorageAPI) error {
// This happens for the first time, but keep this here since this
// is the only place where it can be made expensive optimizing all
// other calls. Create minio meta volume, if it doesn't exist yet.
var wg = &sync.WaitGroup{}
// Initialize errs to collect errors inside go-routine.
var errs = make([]error, len(storageDisks))
// Initialize all disks in parallel.
for index, disk := range storageDisks {
if disk == nil {
// Ignore create meta volume on disks which are not found.
continue
}
wg.Add(1)
go func(index int, disk StorageAPI) {
// Indicate this wait group is done.
defer wg.Done()
// Attempt to create `.minio.sys`.
err := disk.MakeVol(minioMetaBucket)
if err != nil {
if !errors.IsErrIgnored(err, initMetaVolIgnoredErrs...) {
errs[index] = err
return
}
}
err = disk.MakeVol(minioMetaTmpBucket)
if err != nil {
if !errors.IsErrIgnored(err, initMetaVolIgnoredErrs...) {
errs[index] = err
return
}
}
err = disk.MakeVol(minioMetaMultipartBucket)
if err != nil {
if !errors.IsErrIgnored(err, initMetaVolIgnoredErrs...) {
errs[index] = err
return
}
}
}(index, disk)
}
// Wait for all cleanup to finish.
wg.Wait()
// Return upon first error.
for _, err := range errs {
if err == nil {
continue
}
return toObjectErr(err, minioMetaBucket)
}
// Return success here.
return nil
}
// Cleanup a directory recursively.
func cleanupDir(storage StorageAPI, volume, dirPath string) error {
var delFunc func(string) error