mirror of
https://github.com/minio/minio.git
synced 2025-11-06 20:33:07 -05:00
Add large bucket support for erasure coded backend (#5160)
This PR implements an object layer which combines input erasure sets of XL layers into a unified namespace. This object layer extends the existing erasure coded implementation, it is assumed in this design that providing > 16 disks is a static configuration as well i.e if you started the setup with 32 disks with 4 sets 8 disks per pack then you would need to provide 4 sets always. Some design details and restrictions: - Objects are distributed using consistent ordering to a unique erasure coded layer. - Each pack has its own dsync so locks are synchronized properly at pack (erasure layer). - Each pack still has a maximum of 16 disks requirement, you can start with multiple such sets statically. - Static sets set of disks and cannot be changed, there is no elastic expansion allowed. - Static sets set of disks and cannot be changed, there is no elastic removal allowed. - ListObjects() across sets can be noticeably slower since List happens on all servers, and is merged at this sets layer. Fixes #5465 Fixes #5464 Fixes #5461 Fixes #5460 Fixes #5459 Fixes #5458 Fixes #5460 Fixes #5488 Fixes #5489 Fixes #5497 Fixes #5496
This commit is contained in:
committed by
kannappanr
parent
dd80256151
commit
fb96779a8a
105
cmd/xl-v1.go
105
cmd/xl-v1.go
@@ -17,11 +17,10 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/minio/minio/pkg/bpool"
|
||||
"github.com/minio/minio/pkg/disk"
|
||||
"github.com/minio/minio/pkg/errors"
|
||||
)
|
||||
@@ -33,107 +32,36 @@ const (
|
||||
|
||||
// Uploads metadata file carries per multipart object metadata.
|
||||
uploadsJSONFile = "uploads.json"
|
||||
|
||||
// Maximum erasure blocks.
|
||||
maxErasureBlocks = 32
|
||||
|
||||
// Minimum erasure blocks.
|
||||
minErasureBlocks = 4
|
||||
)
|
||||
|
||||
// xlObjects - Implements XL object layer.
|
||||
type xlObjects struct {
|
||||
mutex *sync.Mutex
|
||||
storageDisks []StorageAPI // Collection of initialized backend disks.
|
||||
|
||||
// ListObjects pool management.
|
||||
listPool *treeWalkPool
|
||||
|
||||
// name space mutex for object layer
|
||||
// name space mutex for object layer.
|
||||
nsMutex *nsLockMap
|
||||
|
||||
// getDisks returns list of storageAPIs.
|
||||
getDisks func() []StorageAPI
|
||||
|
||||
// Byte pools used for temporary i/o buffers.
|
||||
bp *bpool.BytePoolCap
|
||||
|
||||
// Variable represents bucket policies in memory.
|
||||
bucketPolicies *bucketPolicies
|
||||
|
||||
// TODO: Deprecated only kept here for tests, should be removed in future.
|
||||
storageDisks []StorageAPI
|
||||
|
||||
// TODO: ListObjects pool management, should be removed in future.
|
||||
listPool *treeWalkPool
|
||||
}
|
||||
|
||||
// list of all errors that can be ignored in tree walk operation in XL
|
||||
var xlTreeWalkIgnoredErrs = append(baseIgnoredErrs, errDiskAccessDenied, errVolumeNotFound, errFileNotFound)
|
||||
|
||||
// newXLObjectLayer - initialize any object layer depending on the number of disks.
|
||||
func newXLObjectLayer(storageDisks []StorageAPI) (ObjectLayer, error) {
|
||||
// Initialize XL object layer.
|
||||
objAPI, err := newXLObjects(storageDisks)
|
||||
fatalIf(err, "Unable to initialize XL object layer.")
|
||||
|
||||
// Initialize and load bucket policies.
|
||||
err = initBucketPolicies(objAPI)
|
||||
fatalIf(err, "Unable to load all bucket policies.")
|
||||
|
||||
// Initialize a new event notifier.
|
||||
err = initEventNotifier(objAPI)
|
||||
fatalIf(err, "Unable to initialize event notification.")
|
||||
|
||||
// Success.
|
||||
return objAPI, nil
|
||||
}
|
||||
|
||||
// newXLObjects - initialize new xl object layer.
|
||||
func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
|
||||
if storageDisks == nil {
|
||||
return nil, errInvalidArgument
|
||||
}
|
||||
|
||||
// figure out readQuorum for erasure format.json
|
||||
readQuorum := len(storageDisks) / 2
|
||||
writeQuorum := len(storageDisks)/2 + 1
|
||||
|
||||
// Load saved XL format.json and validate.
|
||||
newStorageDisks, err := loadFormatXL(storageDisks, readQuorum)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Unable to recognize backend format, %s", err)
|
||||
}
|
||||
|
||||
// Initialize list pool.
|
||||
listPool := newTreeWalkPool(globalLookupTimeout)
|
||||
|
||||
// Initialize xl objects.
|
||||
xl := &xlObjects{
|
||||
mutex: &sync.Mutex{},
|
||||
storageDisks: newStorageDisks,
|
||||
listPool: listPool,
|
||||
nsMutex: newNSLock(globalIsDistXL),
|
||||
}
|
||||
|
||||
// Initialize meta volume, if volume already exists ignores it.
|
||||
if err = initMetaVolume(xl.storageDisks); err != nil {
|
||||
return nil, fmt.Errorf("Unable to initialize '.minio.sys' meta volume, %s", err)
|
||||
}
|
||||
|
||||
// If the number of offline servers is equal to the readQuorum
|
||||
// (i.e. the number of online servers also equals the
|
||||
// readQuorum), we cannot perform quick-heal (no
|
||||
// write-quorum). However reads may still be possible, so we
|
||||
// skip quick-heal in this case, and continue.
|
||||
offlineCount := len(newStorageDisks) - diskCount(newStorageDisks)
|
||||
if offlineCount == readQuorum {
|
||||
return xl, nil
|
||||
}
|
||||
|
||||
// Perform a quick heal on the buckets and bucket metadata for any discrepancies.
|
||||
if err = quickHeal(*xl, writeQuorum, readQuorum); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Start background process to cleanup old multipart objects in `.minio.sys`.
|
||||
go cleanupStaleMultipartUploads(multipartCleanupInterval, multipartExpiry, xl, xl.listMultipartUploadsCleanup, globalServiceDoneCh)
|
||||
|
||||
return xl, nil
|
||||
}
|
||||
|
||||
// Shutdown function for object storage interface.
|
||||
func (xl xlObjects) Shutdown() error {
|
||||
// Add any object layer shutdown activities here.
|
||||
for _, disk := range xl.storageDisks {
|
||||
for _, disk := range xl.getDisks() {
|
||||
// This closes storage rpc client connections if any.
|
||||
// Otherwise this is a no-op.
|
||||
if disk == nil {
|
||||
@@ -291,6 +219,5 @@ func getStorageInfo(disks []StorageAPI) StorageInfo {
|
||||
|
||||
// StorageInfo - returns underlying storage statistics.
|
||||
func (xl xlObjects) StorageInfo() StorageInfo {
|
||||
storageInfo := getStorageInfo(xl.storageDisks)
|
||||
return storageInfo
|
||||
return getStorageInfo(xl.getDisks())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user