mirror of
https://github.com/minio/minio.git
synced 2024-12-25 22:55:54 -05:00
fb96779a8a
This PR implements an object layer which combines input erasure sets of XL layers into a unified namespace. This object layer extends the existing erasure coded implementation, it is assumed in this design that providing > 16 disks is a static configuration as well i.e if you started the setup with 32 disks with 4 sets 8 disks per pack then you would need to provide 4 sets always. Some design details and restrictions: - Objects are distributed using consistent ordering to a unique erasure coded layer. - Each pack has its own dsync so locks are synchronized properly at pack (erasure layer). - Each pack still has a maximum of 16 disks requirement, you can start with multiple such sets statically. - Static sets set of disks and cannot be changed, there is no elastic expansion allowed. - Static sets set of disks and cannot be changed, there is no elastic removal allowed. - ListObjects() across sets can be noticeably slower since List happens on all servers, and is merged at this sets layer. Fixes #5465 Fixes #5464 Fixes #5461 Fixes #5460 Fixes #5459 Fixes #5458 Fixes #5460 Fixes #5488 Fixes #5489 Fixes #5497 Fixes #5496
326 lines
9.0 KiB
Go
326 lines
9.0 KiB
Go
/*
|
|
* Minio Cloud Storage, (C) 2016 Minio, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package cmd
|
|
|
|
import (
|
|
"reflect"
|
|
"sort"
|
|
"sync"
|
|
|
|
"github.com/minio/minio-go/pkg/policy"
|
|
"github.com/minio/minio/pkg/errors"
|
|
)
|
|
|
|
// list all errors that can be ignore in a bucket operation.
|
|
var bucketOpIgnoredErrs = append(baseIgnoredErrs, errDiskAccessDenied)
|
|
|
|
// list all errors that can be ignored in a bucket metadata operation.
|
|
var bucketMetadataOpIgnoredErrs = append(bucketOpIgnoredErrs, errVolumeNotFound)
|
|
|
|
/// Bucket operations
|
|
|
|
// MakeBucket - make a bucket.
|
|
func (xl xlObjects) MakeBucketWithLocation(bucket, location string) error {
|
|
// Verify if bucket is valid.
|
|
if !IsValidBucketName(bucket) {
|
|
return errors.Trace(BucketNameInvalid{Bucket: bucket})
|
|
}
|
|
|
|
// Initialize sync waitgroup.
|
|
var wg = &sync.WaitGroup{}
|
|
|
|
// Initialize list of errors.
|
|
var dErrs = make([]error, len(xl.getDisks()))
|
|
|
|
// Make a volume entry on all underlying storage disks.
|
|
for index, disk := range xl.getDisks() {
|
|
if disk == nil {
|
|
dErrs[index] = errors.Trace(errDiskNotFound)
|
|
continue
|
|
}
|
|
wg.Add(1)
|
|
// Make a volume inside a go-routine.
|
|
go func(index int, disk StorageAPI) {
|
|
defer wg.Done()
|
|
err := disk.MakeVol(bucket)
|
|
if err != nil {
|
|
dErrs[index] = errors.Trace(err)
|
|
}
|
|
}(index, disk)
|
|
}
|
|
|
|
// Wait for all make vol to finish.
|
|
wg.Wait()
|
|
|
|
writeQuorum := len(xl.getDisks())/2 + 1
|
|
err := reduceWriteQuorumErrs(dErrs, bucketOpIgnoredErrs, writeQuorum)
|
|
if errors.Cause(err) == errXLWriteQuorum {
|
|
// Purge successfully created buckets if we don't have writeQuorum.
|
|
undoMakeBucket(xl.getDisks(), bucket)
|
|
}
|
|
return toObjectErr(err, bucket)
|
|
}
|
|
|
|
func (xl xlObjects) undoDeleteBucket(bucket string) {
|
|
// Initialize sync waitgroup.
|
|
var wg = &sync.WaitGroup{}
|
|
// Undo previous make bucket entry on all underlying storage disks.
|
|
for index, disk := range xl.getDisks() {
|
|
if disk == nil {
|
|
continue
|
|
}
|
|
wg.Add(1)
|
|
// Delete a bucket inside a go-routine.
|
|
go func(index int, disk StorageAPI) {
|
|
defer wg.Done()
|
|
_ = disk.MakeVol(bucket)
|
|
}(index, disk)
|
|
}
|
|
|
|
// Wait for all make vol to finish.
|
|
wg.Wait()
|
|
}
|
|
|
|
// undo make bucket operation upon quorum failure.
|
|
func undoMakeBucket(storageDisks []StorageAPI, bucket string) {
|
|
// Initialize sync waitgroup.
|
|
var wg = &sync.WaitGroup{}
|
|
// Undo previous make bucket entry on all underlying storage disks.
|
|
for index, disk := range storageDisks {
|
|
if disk == nil {
|
|
continue
|
|
}
|
|
wg.Add(1)
|
|
// Delete a bucket inside a go-routine.
|
|
go func(index int, disk StorageAPI) {
|
|
defer wg.Done()
|
|
_ = disk.DeleteVol(bucket)
|
|
}(index, disk)
|
|
}
|
|
|
|
// Wait for all make vol to finish.
|
|
wg.Wait()
|
|
}
|
|
|
|
// getBucketInfo - returns the BucketInfo from one of the load balanced disks.
|
|
func (xl xlObjects) getBucketInfo(bucketName string) (bucketInfo BucketInfo, err error) {
|
|
var bucketErrs []error
|
|
for _, disk := range xl.getLoadBalancedDisks() {
|
|
if disk == nil {
|
|
bucketErrs = append(bucketErrs, errDiskNotFound)
|
|
continue
|
|
}
|
|
volInfo, serr := disk.StatVol(bucketName)
|
|
if serr == nil {
|
|
bucketInfo = BucketInfo{
|
|
Name: volInfo.Name,
|
|
Created: volInfo.Created,
|
|
}
|
|
return bucketInfo, nil
|
|
}
|
|
err = errors.Trace(serr)
|
|
// For any reason disk went offline continue and pick the next one.
|
|
if errors.IsErrIgnored(err, bucketMetadataOpIgnoredErrs...) {
|
|
bucketErrs = append(bucketErrs, err)
|
|
continue
|
|
}
|
|
// Any error which cannot be ignored, we return quickly.
|
|
return BucketInfo{}, err
|
|
}
|
|
// If all our errors were ignored, then we try to
|
|
// reduce to one error based on read quorum.
|
|
// `nil` is deliberately passed for ignoredErrs
|
|
// because these errors were already ignored.
|
|
readQuorum := len(xl.getDisks()) / 2
|
|
return BucketInfo{}, reduceReadQuorumErrs(bucketErrs, nil, readQuorum)
|
|
}
|
|
|
|
// GetBucketInfo - returns BucketInfo for a bucket.
|
|
func (xl xlObjects) GetBucketInfo(bucket string) (bi BucketInfo, e error) {
|
|
bucketLock := xl.nsMutex.NewNSLock(bucket, "")
|
|
if e := bucketLock.GetRLock(globalObjectTimeout); e != nil {
|
|
return bi, e
|
|
}
|
|
defer bucketLock.RUnlock()
|
|
// Verify if bucket is valid.
|
|
if !IsValidBucketName(bucket) {
|
|
return bi, BucketNameInvalid{Bucket: bucket}
|
|
}
|
|
|
|
bucketInfo, err := xl.getBucketInfo(bucket)
|
|
if err != nil {
|
|
return bi, toObjectErr(err, bucket)
|
|
}
|
|
return bucketInfo, nil
|
|
}
|
|
|
|
// listBuckets - returns list of all buckets from a disk picked at random.
|
|
func (xl xlObjects) listBuckets() (bucketsInfo []BucketInfo, err error) {
|
|
for _, disk := range xl.getLoadBalancedDisks() {
|
|
if disk == nil {
|
|
continue
|
|
}
|
|
var volsInfo []VolInfo
|
|
volsInfo, err = disk.ListVols()
|
|
if err == nil {
|
|
// NOTE: The assumption here is that volumes across all disks in
|
|
// readQuorum have consistent view i.e they all have same number
|
|
// of buckets. This is essentially not verified since healing
|
|
// should take care of this.
|
|
var bucketsInfo []BucketInfo
|
|
for _, volInfo := range volsInfo {
|
|
if isReservedOrInvalidBucket(volInfo.Name) {
|
|
continue
|
|
}
|
|
bucketsInfo = append(bucketsInfo, BucketInfo{
|
|
Name: volInfo.Name,
|
|
Created: volInfo.Created,
|
|
})
|
|
}
|
|
// For buckets info empty, loop once again to check
|
|
// if we have, can happen if disks were down.
|
|
if len(bucketsInfo) == 0 {
|
|
continue
|
|
}
|
|
return bucketsInfo, nil
|
|
}
|
|
err = errors.Trace(err)
|
|
// Ignore any disks not found.
|
|
if errors.IsErrIgnored(err, bucketMetadataOpIgnoredErrs...) {
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
// ListBuckets - lists all the buckets, sorted by its name.
|
|
func (xl xlObjects) ListBuckets() ([]BucketInfo, error) {
|
|
bucketInfos, err := xl.listBuckets()
|
|
if err != nil {
|
|
return nil, toObjectErr(err)
|
|
}
|
|
// Sort by bucket name before returning.
|
|
sort.Sort(byBucketName(bucketInfos))
|
|
return bucketInfos, nil
|
|
}
|
|
|
|
// DeleteBucket - deletes a bucket.
|
|
func (xl xlObjects) DeleteBucket(bucket string) error {
|
|
|
|
bucketLock := xl.nsMutex.NewNSLock(bucket, "")
|
|
if err := bucketLock.GetLock(globalObjectTimeout); err != nil {
|
|
return err
|
|
}
|
|
defer bucketLock.Unlock()
|
|
|
|
// Verify if bucket is valid.
|
|
if !IsValidBucketName(bucket) {
|
|
return BucketNameInvalid{Bucket: bucket}
|
|
}
|
|
|
|
// Collect if all disks report volume not found.
|
|
var wg = &sync.WaitGroup{}
|
|
var dErrs = make([]error, len(xl.getDisks()))
|
|
|
|
// Remove a volume entry on all underlying storage disks.
|
|
for index, disk := range xl.getDisks() {
|
|
if disk == nil {
|
|
dErrs[index] = errors.Trace(errDiskNotFound)
|
|
continue
|
|
}
|
|
wg.Add(1)
|
|
// Delete volume inside a go-routine.
|
|
go func(index int, disk StorageAPI) {
|
|
defer wg.Done()
|
|
// Attempt to delete bucket.
|
|
err := disk.DeleteVol(bucket)
|
|
|
|
if err != nil {
|
|
dErrs[index] = errors.Trace(err)
|
|
return
|
|
}
|
|
// Cleanup all the previously incomplete multiparts.
|
|
err = cleanupDir(disk, minioMetaMultipartBucket, bucket)
|
|
|
|
if err != nil {
|
|
if errors.Cause(err) == errVolumeNotFound {
|
|
return
|
|
}
|
|
dErrs[index] = err
|
|
}
|
|
}(index, disk)
|
|
}
|
|
|
|
// Wait for all the delete vols to finish.
|
|
wg.Wait()
|
|
|
|
writeQuorum := len(xl.getDisks())/2 + 1
|
|
err := reduceWriteQuorumErrs(dErrs, bucketOpIgnoredErrs, writeQuorum)
|
|
if errors.Cause(err) == errXLWriteQuorum {
|
|
xl.undoDeleteBucket(bucket)
|
|
}
|
|
if err != nil {
|
|
return toObjectErr(err, bucket)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// SetBucketPolicy sets policy on bucket
|
|
func (xl xlObjects) SetBucketPolicy(bucket string, policy policy.BucketAccessPolicy) error {
|
|
return persistAndNotifyBucketPolicyChange(bucket, false, policy, xl)
|
|
}
|
|
|
|
// GetBucketPolicy will get policy on bucket
|
|
func (xl xlObjects) GetBucketPolicy(bucket string) (policy.BucketAccessPolicy, error) {
|
|
// fetch bucket policy from cache.
|
|
bpolicy := xl.bucketPolicies.GetBucketPolicy(bucket)
|
|
if reflect.DeepEqual(bpolicy, emptyBucketPolicy) {
|
|
return readBucketPolicy(bucket, xl)
|
|
}
|
|
return bpolicy, nil
|
|
}
|
|
|
|
// DeleteBucketPolicy deletes all policies on bucket
|
|
func (xl xlObjects) DeleteBucketPolicy(bucket string) error {
|
|
return persistAndNotifyBucketPolicyChange(bucket, true, emptyBucketPolicy, xl)
|
|
}
|
|
|
|
// RefreshBucketPolicy refreshes policy cache from disk
|
|
func (xl xlObjects) RefreshBucketPolicy(bucket string) error {
|
|
policy, err := readBucketPolicy(bucket, xl)
|
|
|
|
if err != nil {
|
|
if reflect.DeepEqual(policy, emptyBucketPolicy) {
|
|
return xl.bucketPolicies.DeleteBucketPolicy(bucket)
|
|
}
|
|
return err
|
|
}
|
|
return xl.bucketPolicies.SetBucketPolicy(bucket, policy)
|
|
}
|
|
|
|
// IsNotificationSupported returns whether bucket notification is applicable for this layer.
|
|
func (xl xlObjects) IsNotificationSupported() bool {
|
|
return true
|
|
}
|
|
|
|
// IsEncryptionSupported returns whether server side encryption is applicable for this layer.
|
|
func (xl xlObjects) IsEncryptionSupported() bool {
|
|
return true
|
|
}
|