Add x-amz-storage-class support (#5295)

This adds configurable data and parity options on a per object
basis. To use variable parity

- Users can set environment variables to cofigure variable
parity

- Then add header x-amz-storage-class to putobject requests
with relevant storage class values

Fixes #4997
This commit is contained in:
Nitish Tiwari
2017-12-22 16:58:13 +05:30
committed by GitHub
parent f1355da72e
commit 1a3dbbc9dd
25 changed files with 1237 additions and 129 deletions

View File

@@ -113,4 +113,34 @@ func handleCommonEnvVars() {
// or is not set to 'off', if MINIO_UPDATE is set to 'off' then
// in-place update is off.
globalInplaceUpdateDisabled = strings.EqualFold(os.Getenv("MINIO_UPDATE"), "off")
// Validate and store the storage class env variables only for XL/Dist XL setups
if globalIsXL {
var err error
// Check for environment variables and parse into storageClass struct
if ssc := os.Getenv(standardStorageClass); ssc != "" {
globalStandardStorageClass, err = parseStorageClass(ssc)
fatalIf(err, "Invalid value set in environment variable %s.", standardStorageClass)
}
if rrsc := os.Getenv(reducedRedundancyStorageClass); rrsc != "" {
globalRRStorageClass, err = parseStorageClass(rrsc)
fatalIf(err, "Invalid value set in environment variable %s.", reducedRedundancyStorageClass)
}
// Validation is done after parsing both the storage classes. This is needed because we need one
// storage class value to deduce the correct value of the other storage class.
if globalRRStorageClass.Scheme != "" {
err := validateRRSParity(globalRRStorageClass.Parity, globalStandardStorageClass.Parity)
fatalIf(err, "Invalid value set in environment variable %s.", reducedRedundancyStorageClass)
globalIsStorageClass = true
}
if globalStandardStorageClass.Scheme != "" {
err := validateSSParity(globalStandardStorageClass.Parity, globalRRStorageClass.Parity)
fatalIf(err, "Invalid value set in environment variable %s.", standardStorageClass)
globalIsStorageClass = true
}
}
}

View File

@@ -20,6 +20,7 @@ import (
"errors"
"fmt"
"io/ioutil"
"strconv"
"sync"
"github.com/minio/minio/pkg/auth"
@@ -36,9 +37,9 @@ import (
// 6. Make changes in config-current_test.go for any test change
// Config version
const serverConfigVersion = "21"
const serverConfigVersion = "22"
type serverConfig = serverConfigV21
type serverConfig = serverConfigV22
var (
// globalServerConfig server config.
@@ -103,6 +104,52 @@ func (s *serverConfig) SetBrowser(b bool) {
s.Browser = BrowserFlag(b)
}
func (s *serverConfig) SetStorageClass(standardClass, rrsClass storageClass) {
s.Lock()
defer s.Unlock()
// Set the values
s.StorageClass.Standard = standardClass.Scheme + strconv.Itoa(standardClass.Parity)
s.StorageClass.RRS = rrsClass.Scheme + strconv.Itoa(rrsClass.Parity)
}
func (s *serverConfig) GetStorageClass() (standardStorageClass, rrsStorageClass storageClass) {
s.RLock()
defer s.RUnlock()
var err error
var ssc storageClass
var rrsc storageClass
if s.StorageClass.Standard != "" {
// Parse the values read from config file into storageClass struct
ssc, err = parseStorageClass(s.StorageClass.Standard)
fatalIf(err, "Invalid value %s set in config.json", s.StorageClass.Standard)
}
if s.StorageClass.RRS != "" {
// Parse the values read from config file into storageClass struct
rrsc, err = parseStorageClass(s.StorageClass.RRS)
fatalIf(err, "Invalid value %s set in config.json", s.StorageClass.RRS)
}
// Validation is done after parsing both the storage classes. This is needed because we need one
// storage class value to deduce the correct value of the other storage class.
if rrsc.Scheme != "" {
err := validateRRSParity(rrsc.Parity, ssc.Parity)
fatalIf(err, "Invalid value %s set in config.json", s.StorageClass.RRS)
globalIsStorageClass = true
}
if ssc.Scheme != "" {
err := validateSSParity(ssc.Parity, rrsc.Parity)
fatalIf(err, "Invalid value %s set in config.json", s.StorageClass.Standard)
globalIsStorageClass = true
}
return
}
// GetCredentials get current credentials.
func (s *serverConfig) GetBrowser() bool {
s.RLock()
@@ -175,6 +222,10 @@ func newConfig() error {
srvCfg.Domain = globalDomainName
}
if globalIsStorageClass {
srvCfg.SetStorageClass(globalStandardStorageClass, globalRRStorageClass)
}
// hold the mutex lock before a new config is assigned.
// Save the new config globally.
// unlock the mutex.
@@ -303,6 +354,10 @@ func loadConfig() error {
srvCfg.Domain = globalDomainName
}
if globalIsStorageClass {
srvCfg.SetStorageClass(globalStandardStorageClass, globalRRStorageClass)
}
// hold the mutex lock before a new config is assigned.
globalServerConfigMu.Lock()
globalServerConfig = srvCfg
@@ -318,6 +373,9 @@ func loadConfig() error {
if !globalIsEnvDomainName {
globalDomainName = globalServerConfig.Domain
}
if !globalIsStorageClass {
globalStandardStorageClass, globalRRStorageClass = globalServerConfig.GetStorageClass()
}
globalServerConfigMu.Unlock()
return nil

View File

@@ -158,6 +158,10 @@ func migrateConfig() error {
return err
}
fallthrough
case "21":
if err = migrateV21ToV22(); err != nil {
return err
}
case serverConfigVersion:
// No migration needed. this always points to current version.
err = nil
@@ -1704,3 +1708,110 @@ func migrateV20ToV21() error {
log.Printf(configMigrateMSGTemplate, configFile, cv20.Version, srvConfig.Version)
return nil
}
func migrateV21ToV22() error {
configFile := getConfigFile()
cv21 := &serverConfigV21{}
_, err := quick.Load(configFile, cv21)
if os.IsNotExist(err) {
return nil
} else if err != nil {
return fmt.Errorf("Unable to load config version 21. %v", err)
}
if cv21.Version != "21" {
return nil
}
// Copy over fields from V21 into V22 config struct
srvConfig := &serverConfigV22{
Notify: &notifier{},
}
srvConfig.Version = serverConfigVersion
srvConfig.Credential = cv21.Credential
srvConfig.Region = cv21.Region
if srvConfig.Region == "" {
// Region needs to be set for AWS Signature Version 4.
srvConfig.Region = globalMinioDefaultRegion
}
// check and set notifiers config
if len(cv21.Notify.AMQP) == 0 {
srvConfig.Notify.AMQP = make(map[string]amqpNotify)
srvConfig.Notify.AMQP["1"] = amqpNotify{}
} else {
// New deliveryMode parameter is added for AMQP,
// default value is already 0, so nothing to
// explicitly migrate here.
srvConfig.Notify.AMQP = cv21.Notify.AMQP
}
if len(cv21.Notify.ElasticSearch) == 0 {
srvConfig.Notify.ElasticSearch = make(map[string]elasticSearchNotify)
srvConfig.Notify.ElasticSearch["1"] = elasticSearchNotify{
Format: formatNamespace,
}
} else {
srvConfig.Notify.ElasticSearch = cv21.Notify.ElasticSearch
}
if len(cv21.Notify.Redis) == 0 {
srvConfig.Notify.Redis = make(map[string]redisNotify)
srvConfig.Notify.Redis["1"] = redisNotify{
Format: formatNamespace,
}
} else {
srvConfig.Notify.Redis = cv21.Notify.Redis
}
if len(cv21.Notify.PostgreSQL) == 0 {
srvConfig.Notify.PostgreSQL = make(map[string]postgreSQLNotify)
srvConfig.Notify.PostgreSQL["1"] = postgreSQLNotify{
Format: formatNamespace,
}
} else {
srvConfig.Notify.PostgreSQL = cv21.Notify.PostgreSQL
}
if len(cv21.Notify.Kafka) == 0 {
srvConfig.Notify.Kafka = make(map[string]kafkaNotify)
srvConfig.Notify.Kafka["1"] = kafkaNotify{}
} else {
srvConfig.Notify.Kafka = cv21.Notify.Kafka
}
if len(cv21.Notify.NATS) == 0 {
srvConfig.Notify.NATS = make(map[string]natsNotify)
srvConfig.Notify.NATS["1"] = natsNotify{}
} else {
srvConfig.Notify.NATS = cv21.Notify.NATS
}
if len(cv21.Notify.Webhook) == 0 {
srvConfig.Notify.Webhook = make(map[string]webhookNotify)
srvConfig.Notify.Webhook["1"] = webhookNotify{}
} else {
srvConfig.Notify.Webhook = cv21.Notify.Webhook
}
if len(cv21.Notify.MySQL) == 0 {
srvConfig.Notify.MySQL = make(map[string]mySQLNotify)
srvConfig.Notify.MySQL["1"] = mySQLNotify{
Format: formatNamespace,
}
} else {
srvConfig.Notify.MySQL = cv21.Notify.MySQL
}
if len(cv21.Notify.MQTT) == 0 {
srvConfig.Notify.MQTT = make(map[string]mqttNotify)
srvConfig.Notify.MQTT["1"] = mqttNotify{}
} else {
srvConfig.Notify.MQTT = cv21.Notify.MQTT
}
// Load browser config from existing config in the file.
srvConfig.Browser = cv21.Browser
// Load domain config from existing config in the file.
srvConfig.Domain = cv21.Domain
if err = quick.Save(configFile, srvConfig); err != nil {
return fmt.Errorf("Failed to migrate config from %s to %s. %v", cv21.Version, srvConfig.Version, err)
}
log.Printf(configMigrateMSGTemplate, configFile, cv21.Version, srvConfig.Version)
return nil
}

View File

@@ -530,3 +530,22 @@ type serverConfigV21 struct {
// Notification queue configuration.
Notify *notifier `json:"notify"`
}
// serverConfigV22 is just like version '21' with added support
// for StorageClass
type serverConfigV22 struct {
sync.RWMutex
Version string `json:"version"`
// S3 API configuration.
Credential auth.Credentials `json:"credential"`
Region string `json:"region"`
Browser BrowserFlag `json:"browser"`
Domain string `json:"domain"`
// Storage class configuration
StorageClass storageClassConfig `json:"storageclass"`
// Notification queue configuration.
Notify *notifier `json:"notify"`
}

View File

@@ -149,7 +149,6 @@ var (
globalIsEnvDomainName bool
globalDomainName string // Root domain for virtual host style requests
// Add new variable global values here.
globalListingTimeout = newDynamicTimeout( /*30*/ 600*time.Second /*5*/, 600*time.Second) // timeout for listing related ops
globalObjectTimeout = newDynamicTimeout( /*1*/ 10*time.Minute /*10*/, 600*time.Second) // timeout for Object API related ops
@@ -158,6 +157,16 @@ var (
// Keep connection active for clients actively using ListenBucketNotification.
globalSNSConnAlive = 5 * time.Second // Send a whitespace every 5 seconds.
// Storage classes
// Set to indicate if storage class is set up
globalIsStorageClass bool
// Set to store reduced redundancy storage class
globalRRStorageClass storageClass
// Set to store standard storage class
globalStandardStorageClass storageClass
// Add new variable global values here.
)
// global colors.

View File

@@ -60,6 +60,7 @@ var supportedHeaders = []string{
"cache-control",
"content-encoding",
"content-disposition",
amzStorageClass,
// Add more supported headers here.
}
@@ -116,7 +117,8 @@ func extractMetadataFromHeader(header http.Header) (map[string]string, error) {
return nil, errors.Trace(errInvalidArgument)
}
metadata := make(map[string]string)
// Save standard supported headers.
// Save all supported headers.
for _, supportedHeader := range supportedHeaders {
canonicalHeader := http.CanonicalHeaderKey(supportedHeader)
// HTTP headers are case insensitive, look for both canonical
@@ -127,6 +129,7 @@ func extractMetadataFromHeader(header http.Header) (map[string]string, error) {
metadata[supportedHeader] = header.Get(supportedHeader)
}
}
// Go through all other headers for any additional headers that needs to be saved.
for key := range header {
if key != http.CanonicalHeaderKey(key) {

View File

@@ -43,10 +43,10 @@ type StorageInfo struct {
Type BackendType
// Following fields are only meaningful if BackendType is Erasure.
OnlineDisks int // Online disks during server startup.
OfflineDisks int // Offline disks during server startup.
ReadQuorum int // Minimum disks required for successful read operations.
WriteQuorum int // Minimum disks required for successful write operations.
OnlineDisks int // Online disks during server startup.
OfflineDisks int // Offline disks during server startup.
standardSCParity int // Parity disks for currently configured Standard storage class.
rrSCParity int // Parity disks for currently configured Reduced Redundancy storage class.
}
}

View File

@@ -238,7 +238,8 @@ func testPutObjectPartDiskNotFound(obj ObjectLayer, instanceType string, disks [
if err == nil {
t.Fatalf("Test %s: expected to fail but passed instead", instanceType)
}
expectedErr := InsufficientWriteQuorum{}
// as majority of xl.json are not available, we expect InsufficientReadQuorum while trying to fetch the object quorum
expectedErr := InsufficientReadQuorum{}
if err.Error() != expectedErr.Error() {
t.Fatalf("Test %s: expected error %s, got %s instead.", instanceType, expectedErr, err)
}

View File

@@ -47,6 +47,16 @@ func printStartupMessage(apiEndPoints []string) {
strippedAPIEndpoints := stripStandardPorts(apiEndPoints)
// Object layer is initialized then print StorageInfo.
objAPI := newObjectLayerFn()
if objAPI != nil {
printStorageInfo(objAPI.StorageInfo())
// Storage class info only printed for Erasure backend
if objAPI.StorageInfo().Backend.Type == Erasure {
printStorageClassInfoMsg(objAPI.StorageInfo())
}
}
// Prints credential, region and browser access.
printServerCommonMsg(strippedAPIEndpoints)
@@ -57,12 +67,6 @@ func printStartupMessage(apiEndPoints []string) {
// Prints documentation message.
printObjectAPIMsg()
// Object layer is initialized then print StorageInfo.
objAPI := newObjectLayerFn()
if objAPI != nil {
printStorageInfo(objAPI.StorageInfo())
}
// SSL is configured reads certification chain, prints
// authority and expiry.
if globalIsSSL {
@@ -173,18 +177,42 @@ func getStorageInfoMsg(storageInfo StorageInfo) string {
humanize.IBytes(uint64(storageInfo.Total)))
if storageInfo.Backend.Type == Erasure {
diskInfo := fmt.Sprintf(" %d Online, %d Offline. ", storageInfo.Backend.OnlineDisks, storageInfo.Backend.OfflineDisks)
if maxDiskFailures := storageInfo.Backend.ReadQuorum - storageInfo.Backend.OfflineDisks; maxDiskFailures >= 0 {
diskInfo += fmt.Sprintf("We can withstand [%d] drive failure(s).", maxDiskFailures)
}
msg += colorBlue("\nStatus:") + fmt.Sprintf(getFormatStr(len(diskInfo), 8), diskInfo)
}
return msg
}
func printStorageClassInfoMsg(storageInfo StorageInfo) {
standardClassMsg := getStandardStorageClassInfoMsg(storageInfo)
rrsClassMsg := getRRSStorageClassInfoMsg(storageInfo)
storageClassMsg := fmt.Sprintf(getFormatStr(len(standardClassMsg), 3), standardClassMsg) + fmt.Sprintf(getFormatStr(len(rrsClassMsg), 3), rrsClassMsg)
// Print storage class section only if data is present
if storageClassMsg != "" {
log.Println(colorBlue("Storage Class:"))
log.Println(storageClassMsg)
}
}
func getStandardStorageClassInfoMsg(storageInfo StorageInfo) string {
var msg string
if maxDiskFailures := storageInfo.Backend.standardSCParity - storageInfo.Backend.OfflineDisks; maxDiskFailures >= 0 {
msg += fmt.Sprintf("Objects with Standard class can withstand [%d] drive failure(s).\n", maxDiskFailures)
}
return msg
}
func getRRSStorageClassInfoMsg(storageInfo StorageInfo) string {
var msg string
if maxDiskFailures := storageInfo.Backend.rrSCParity - storageInfo.Backend.OfflineDisks; maxDiskFailures >= 0 {
msg += fmt.Sprintf("Objects with Reduced Redundancy class can withstand [%d] drive failure(s).\n", maxDiskFailures)
}
return msg
}
// Prints startup message of storage capacity and erasure information.
func printStorageInfo(storageInfo StorageInfo) {
log.Println()
log.Println(getStorageInfoMsg(storageInfo))
log.Println()
}
// Prints certificate expiry date warning

View File

@@ -35,11 +35,11 @@ func TestStorageInfoMsg(t *testing.T) {
Total: 10 * humanize.GiByte,
Free: 2 * humanize.GiByte,
Backend: struct {
Type BackendType
OnlineDisks int
OfflineDisks int
ReadQuorum int
WriteQuorum int
Type BackendType
OnlineDisks int
OfflineDisks int
standardSCParity int
rrSCParity int
}{Erasure, 7, 1, 4, 5},
}
@@ -155,3 +155,97 @@ func TestPrintStartupMessage(t *testing.T) {
apiEndpoints := []string{"http://127.0.0.1:9000"}
printStartupMessage(apiEndpoints)
}
func TestGetStandardStorageClassInfoMsg(t *testing.T) {
tests := []struct {
name string
args StorageInfo
want string
}{
{"1", StorageInfo{
Total: 20 * humanize.GiByte,
Free: 2 * humanize.GiByte,
Backend: struct {
Type BackendType
OnlineDisks int
OfflineDisks int
standardSCParity int
rrSCParity int
}{Erasure, 15, 1, 5, 3},
}, "Objects with Standard class can withstand [4] drive failure(s).\n"},
{"2", StorageInfo{
Total: 30 * humanize.GiByte,
Free: 3 * humanize.GiByte,
Backend: struct {
Type BackendType
OnlineDisks int
OfflineDisks int
standardSCParity int
rrSCParity int
}{Erasure, 10, 0, 5, 3},
}, "Objects with Standard class can withstand [5] drive failure(s).\n"},
{"3", StorageInfo{
Total: 15 * humanize.GiByte,
Free: 2 * humanize.GiByte,
Backend: struct {
Type BackendType
OnlineDisks int
OfflineDisks int
standardSCParity int
rrSCParity int
}{Erasure, 12, 3, 6, 2},
}, "Objects with Standard class can withstand [3] drive failure(s).\n"},
}
for _, tt := range tests {
if got := getStandardStorageClassInfoMsg(tt.args); got != tt.want {
t.Errorf("Test %s failed, expected %v, got %v", tt.name, tt.want, got)
}
}
}
func TestGetRRSStorageClassInfoMsg(t *testing.T) {
tests := []struct {
name string
args StorageInfo
want string
}{
{"1", StorageInfo{
Total: 20 * humanize.GiByte,
Free: 2 * humanize.GiByte,
Backend: struct {
Type BackendType
OnlineDisks int
OfflineDisks int
standardSCParity int
rrSCParity int
}{Erasure, 15, 1, 5, 3},
}, "Objects with Reduced Redundancy class can withstand [2] drive failure(s).\n"},
{"2", StorageInfo{
Total: 30 * humanize.GiByte,
Free: 3 * humanize.GiByte,
Backend: struct {
Type BackendType
OnlineDisks int
OfflineDisks int
standardSCParity int
rrSCParity int
}{Erasure, 16, 0, 5, 3},
}, "Objects with Reduced Redundancy class can withstand [3] drive failure(s).\n"},
{"3", StorageInfo{
Total: 15 * humanize.GiByte,
Free: 2 * humanize.GiByte,
Backend: struct {
Type BackendType
OnlineDisks int
OfflineDisks int
standardSCParity int
rrSCParity int
}{Erasure, 12, 3, 6, 5},
}, "Objects with Reduced Redundancy class can withstand [2] drive failure(s).\n"},
}
for _, tt := range tests {
if got := getRRSStorageClassInfoMsg(tt.args); got != tt.want {
t.Errorf("Test %s failed, expected %v, got %v", tt.name, tt.want, got)
}
}
}

187
cmd/storage-class.go Normal file
View File

@@ -0,0 +1,187 @@
/*
* Minio Cloud Storage, (C) 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"errors"
"fmt"
"strconv"
"strings"
)
const (
// metadata entry for storage class
amzStorageClass = "x-amz-storage-class"
// Reduced redundancy storage class
reducedRedundancyStorageClass = "MINIO_STORAGE_CLASS_RRS"
// Standard storage class
standardStorageClass = "MINIO_STORAGE_CLASS_STANDARD"
// Supported storage class scheme is EC
supportedStorageClassScheme = "EC"
// Minimum parity disks
minimumParityDisks = 2
defaultRRSParity = 2
)
// Struct to hold storage class
type storageClass struct {
Scheme string
Parity int
}
type storageClassConfig struct {
Standard string `json:"standard"`
RRS string `json:"rrs"`
}
// Parses given storageClassEnv and returns a storageClass structure.
// Supported Storage Class format is "Scheme:Number of parity disks".
// Currently only supported scheme is "EC".
func parseStorageClass(storageClassEnv string) (sc storageClass, err error) {
s := strings.Split(storageClassEnv, ":")
// only two elements allowed in the string - "scheme" and "number of parity disks"
if len(s) > 2 {
return storageClass{}, errors.New("Too many sections in " + storageClassEnv)
} else if len(s) < 2 {
return storageClass{}, errors.New("Too few sections in " + storageClassEnv)
}
// only allowed scheme is "EC"
if s[0] != supportedStorageClassScheme {
return storageClass{}, errors.New("Unsupported scheme " + s[0] + ". Supported scheme is EC")
}
// Number of parity disks should be integer
parityDisks, err := strconv.Atoi(s[1])
if err != nil {
return storageClass{}, err
}
sc = storageClass{
Scheme: s[0],
Parity: parityDisks,
}
return sc, nil
}
// Validates the parity disks for Reduced Redundancy storage class
func validateRRSParity(rrsParity, ssParity int) (err error) {
// Reduced redundancy storage class is not supported for 4 disks erasure coded setup.
if len(globalEndpoints) == 4 && rrsParity != 0 {
return fmt.Errorf("Reduced redundancy storage class not supported for " + strconv.Itoa(len(globalEndpoints)) + " disk setup")
}
// RRS parity disks should be greater than or equal to minimumParityDisks. Parity below minimumParityDisks is not recommended.
if rrsParity < minimumParityDisks {
return fmt.Errorf("Reduced redundancy storage class parity should be greater than or equal to " + strconv.Itoa(minimumParityDisks))
}
// Reduced redundancy implies lesser parity than standard storage class. So, RRS parity disks should be
// - less than N/2, if StorageClass parity is not set.
// - less than StorageClass Parity, if Storage class parity is set.
switch ssParity {
case 0:
if rrsParity >= len(globalEndpoints)/2 {
return fmt.Errorf("Reduced redundancy storage class parity disks should be less than " + strconv.Itoa(len(globalEndpoints)/2))
}
default:
if rrsParity >= ssParity {
return fmt.Errorf("Reduced redundancy storage class parity disks should be less than " + strconv.Itoa(ssParity))
}
}
return nil
}
// Validates the parity disks for Standard storage class
func validateSSParity(ssParity, rrsParity int) (err error) {
// Standard storage class implies more parity than Reduced redundancy storage class. So, Standard storage parity disks should be
// - greater than or equal to 2, if RRS parity is not set.
// - greater than RRS Parity, if RRS parity is set.
switch rrsParity {
case 0:
if ssParity < minimumParityDisks {
return fmt.Errorf("Standard storage class parity disks should be greater than or equal to " + strconv.Itoa(minimumParityDisks))
}
default:
if ssParity <= rrsParity {
return fmt.Errorf("Standard storage class parity disks should be greater than " + strconv.Itoa(rrsParity))
}
}
// Standard storage class parity should be less than or equal to N/2
if ssParity > len(globalEndpoints)/2 {
return fmt.Errorf("Standard storage class parity disks should be less than or equal to " + strconv.Itoa(len(globalEndpoints)/2))
}
return nil
}
// Returns the data and parity drive count based on storage class
// If storage class is set using the env vars MINIO_STORAGE_CLASS_RRS and MINIO_STORAGE_CLASS_STANDARD
// -- corresponding values are returned
// If storage class is not set using environment variables, default values are returned
// -- Default for Reduced Redundancy Storage class is, parity = 2 and data = N-Parity
// -- Default for Standard Storage class is, parity = N/2, data = N/2
// If storage class is not present in metadata, default value is data = N/2, parity = N/2
func getDrivesCount(sc string, disks []StorageAPI) (data, parity int) {
totalDisks := len(disks)
parity = totalDisks / 2
switch sc {
case reducedRedundancyStorageClass:
if globalRRStorageClass.Parity != 0 {
// set the rrs parity if available
parity = globalRRStorageClass.Parity
} else {
// else fall back to default value
parity = defaultRRSParity
}
case standardStorageClass:
if globalStandardStorageClass.Parity != 0 {
// set the standard parity if available
parity = globalStandardStorageClass.Parity
}
}
// data is always totalDisks - parity
return totalDisks - parity, parity
}
// Returns per object readQuorum and writeQuorum
// readQuorum is the minimum required disks to read data.
// writeQuorum is the minimum required disks to write data.
func objectQuorumFromMeta(xl xlObjects, partsMetaData []xlMetaV1, errs []error) (objectReadQuorum, objectWriteQuorum int, err error) {
// get the latest updated Metadata and a count of all the latest updated xlMeta(s)
latestXLMeta, count := getLatestXLMeta(partsMetaData, errs)
// latestXLMeta is updated most recently.
// We implicitly assume that all the xlMeta(s) have same dataBlocks and parityBlocks.
// We now check that at least dataBlocks number of xlMeta is available. This means count
// should be greater than or equal to dataBlocks field of latestXLMeta. If not we throw read quorum error.
if count < latestXLMeta.Erasure.DataBlocks {
// This is the case when we can't reliably deduce object quorum
return 0, 0, errXLReadQuorum
}
// Since all the valid erasure code meta updated at the same time are equivalent, pass dataBlocks
// from latestXLMeta to get the quorum
return latestXLMeta.Erasure.DataBlocks, latestXLMeta.Erasure.DataBlocks + 1, nil
}

355
cmd/storage-class_test.go Normal file
View File

@@ -0,0 +1,355 @@
/*
* Minio Cloud Storage, (C) 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"bytes"
"errors"
"reflect"
"testing"
)
func TestParseStorageClass(t *testing.T) {
ExecObjectLayerTest(t, testParseStorageClass)
}
func testParseStorageClass(obj ObjectLayer, instanceType string, t TestErrHandler) {
tests := []struct {
name int
storageClassEnv string
wantSc storageClass
expectedError error
}{
{1, "EC:3", storageClass{
Scheme: "EC",
Parity: 3},
nil},
{2, "EC:4", storageClass{
Scheme: "EC",
Parity: 4},
nil},
{3, "AB:4", storageClass{
Scheme: "EC",
Parity: 4},
errors.New("Unsupported scheme AB. Supported scheme is EC")},
{4, "EC:4:5", storageClass{
Scheme: "EC",
Parity: 4},
errors.New("Too many sections in EC:4:5")},
{5, "AB", storageClass{
Scheme: "EC",
Parity: 4},
errors.New("Too few sections in AB")},
}
for _, tt := range tests {
gotSc, err := parseStorageClass(tt.storageClassEnv)
if err != nil && tt.expectedError == nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if err == nil && tt.expectedError != nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if tt.expectedError == nil && !reflect.DeepEqual(gotSc, tt.wantSc) {
t.Errorf("Test %d, Expected %v, got %v", tt.name, tt.wantSc, gotSc)
return
}
if tt.expectedError != nil && !reflect.DeepEqual(err, tt.expectedError) {
t.Errorf("Test %d, Expected %v, got %v", tt.name, tt.expectedError, err)
}
}
}
func TestValidateRRSParity(t *testing.T) {
ExecObjectLayerTestWithDirs(t, testValidateRRSParity)
}
func testValidateRRSParity(obj ObjectLayer, instanceType string, dirs []string, t TestErrHandler) {
// Reset global storage class flags
resetGlobalStorageEnvs()
// Set globalEndpoints for a single node XL setup.
globalEndpoints = mustGetNewEndpointList(dirs...)
tests := []struct {
name int
rrsParity int
ssParity int
expectedError error
}{
{1, 2, 4, nil},
{2, 1, 4, errors.New("Reduced redundancy storage class parity should be greater than or equal to 2")},
{3, 7, 6, errors.New("Reduced redundancy storage class parity disks should be less than 6")},
{4, 9, 0, errors.New("Reduced redundancy storage class parity disks should be less than 8")},
{5, 3, 3, errors.New("Reduced redundancy storage class parity disks should be less than 3")},
}
for _, tt := range tests {
err := validateRRSParity(tt.rrsParity, tt.ssParity)
if err != nil && tt.expectedError == nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if err == nil && tt.expectedError != nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if tt.expectedError != nil && !reflect.DeepEqual(err, tt.expectedError) {
t.Errorf("Test %d, Expected %v, got %v", tt.name, tt.expectedError, err)
}
}
}
func TestValidateSSParity(t *testing.T) {
ExecObjectLayerTestWithDirs(t, testValidateSSParity)
}
func testValidateSSParity(obj ObjectLayer, instanceType string, dirs []string, t TestErrHandler) {
// Reset global storage class flags
resetGlobalStorageEnvs()
// Set globalEndpoints for a single node XL setup.
globalEndpoints = mustGetNewEndpointList(dirs...)
tests := []struct {
name int
ssParity int
rrsParity int
expectedError error
}{
{1, 4, 2, nil},
{2, 6, 5, nil},
{3, 1, 0, errors.New("Standard storage class parity disks should be greater than or equal to 2")},
{4, 4, 6, errors.New("Standard storage class parity disks should be greater than 6")},
{5, 9, 0, errors.New("Standard storage class parity disks should be less than or equal to 8")},
{6, 3, 3, errors.New("Standard storage class parity disks should be greater than 3")},
}
for _, tt := range tests {
err := validateSSParity(tt.ssParity, tt.rrsParity)
if err != nil && tt.expectedError == nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if err == nil && tt.expectedError != nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if tt.expectedError != nil && !reflect.DeepEqual(err, tt.expectedError) {
t.Errorf("Test %d, Expected %v, got %v", tt.name, tt.expectedError, err)
}
}
}
func TestGetDrivesCount(t *testing.T) {
ExecObjectLayerTestWithDirs(t, testGetDrivesCount)
}
func testGetDrivesCount(obj ObjectLayer, instanceType string, dirs []string, t TestErrHandler) {
// Reset global storage class flags
resetGlobalStorageEnvs()
xl := obj.(*xlObjects)
tests := []struct {
name int
sc string
disks []StorageAPI
expectedData int
expectedParity int
}{
{1, reducedRedundancyStorageClass, xl.storageDisks, 14, 2},
{2, standardStorageClass, xl.storageDisks, 8, 8},
{3, "", xl.storageDisks, 8, 8},
{4, reducedRedundancyStorageClass, xl.storageDisks, 9, 7},
{5, standardStorageClass, xl.storageDisks, 10, 6},
}
for _, tt := range tests {
// Set env var for test case 4
if tt.name == 4 {
globalRRStorageClass.Parity = 7
}
// Set env var for test case 5
if tt.name == 5 {
globalStandardStorageClass.Parity = 6
}
data, parity := getDrivesCount(tt.sc, tt.disks)
if data != tt.expectedData {
t.Errorf("Test %d, Expected data disks %d, got %d", tt.name, tt.expectedData, data)
return
}
if parity != tt.expectedParity {
t.Errorf("Test %d, Expected parity disks %d, got %d", tt.name, tt.expectedParity, parity)
return
}
}
}
func TestObjectQuorumFromMeta(t *testing.T) {
ExecObjectLayerTestWithDirs(t, testObjectQuorumFromMeta)
}
func testObjectQuorumFromMeta(obj ObjectLayer, instanceType string, dirs []string, t TestErrHandler) {
// Reset global storage class flags
resetGlobalStorageEnvs()
bucket := getRandomBucketName()
// make data with more than one part
partCount := 3
data := bytes.Repeat([]byte("a"), int(globalPutPartSize)*partCount)
xl := obj.(*xlObjects)
xlDisks := xl.storageDisks
err := obj.MakeBucketWithLocation(bucket, globalMinioDefaultRegion)
if err != nil {
t.Fatalf("Failed to make a bucket %v", err)
}
// Object for test case 1 - No StorageClass defined, no MetaData in PutObject
object1 := "object1"
_, err = obj.PutObject(bucket, object1, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), nil)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts1, errs1 := readAllXLMetadata(xlDisks, bucket, object1)
// Object for test case 2 - No StorageClass defined, MetaData in PutObject requesting RRS Class
object2 := "object2"
metadata2 := make(map[string]string)
metadata2["x-amz-storage-class"] = reducedRedundancyStorageClass
_, err = obj.PutObject(bucket, object2, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), metadata2)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts2, errs2 := readAllXLMetadata(xlDisks, bucket, object2)
// Object for test case 3 - No StorageClass defined, MetaData in PutObject requesting Standard Storage Class
object3 := "object3"
metadata3 := make(map[string]string)
metadata3["x-amz-storage-class"] = standardStorageClass
_, err = obj.PutObject(bucket, object3, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), metadata3)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts3, errs3 := readAllXLMetadata(xlDisks, bucket, object3)
// Object for test case 4 - Standard StorageClass defined as Parity 6, MetaData in PutObject requesting Standard Storage Class
object4 := "object4"
metadata4 := make(map[string]string)
metadata4["x-amz-storage-class"] = standardStorageClass
globalStandardStorageClass = storageClass{
Parity: 6,
Scheme: "EC",
}
_, err = obj.PutObject(bucket, object4, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), metadata4)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts4, errs4 := readAllXLMetadata(xlDisks, bucket, object4)
// Object for test case 5 - RRS StorageClass defined as Parity 2, MetaData in PutObject requesting RRS Class
// Reset global storage class flags
resetGlobalStorageEnvs()
object5 := "object5"
metadata5 := make(map[string]string)
metadata5["x-amz-storage-class"] = reducedRedundancyStorageClass
globalRRStorageClass = storageClass{
Parity: 2,
Scheme: "EC",
}
_, err = obj.PutObject(bucket, object5, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), metadata5)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts5, errs5 := readAllXLMetadata(xlDisks, bucket, object5)
// Object for test case 6 - RRS StorageClass defined as Parity 2, MetaData in PutObject requesting Standard Storage Class
// Reset global storage class flags
resetGlobalStorageEnvs()
object6 := "object6"
metadata6 := make(map[string]string)
metadata6["x-amz-storage-class"] = standardStorageClass
globalRRStorageClass = storageClass{
Parity: 2,
Scheme: "EC",
}
_, err = obj.PutObject(bucket, object6, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), metadata6)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts6, errs6 := readAllXLMetadata(xlDisks, bucket, object6)
// Object for test case 7 - Standard StorageClass defined as Parity 5, MetaData in PutObject requesting RRS Class
// Reset global storage class flags
resetGlobalStorageEnvs()
object7 := "object7"
metadata7 := make(map[string]string)
metadata7["x-amz-storage-class"] = reducedRedundancyStorageClass
globalStandardStorageClass = storageClass{
Parity: 5,
Scheme: "EC",
}
_, err = obj.PutObject(bucket, object7, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), metadata7)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts7, errs7 := readAllXLMetadata(xlDisks, bucket, object7)
tests := []struct {
name int
xl xlObjects
parts []xlMetaV1
errs []error
expectedReadQuorum int
expectedWriteQuorum int
expectedError error
}{
{1, *xl, parts1, errs1, 8, 9, nil},
{2, *xl, parts2, errs2, 14, 15, nil},
{3, *xl, parts3, errs3, 8, 9, nil},
{4, *xl, parts4, errs4, 10, 11, nil},
{5, *xl, parts5, errs5, 14, 15, nil},
{6, *xl, parts6, errs6, 8, 9, nil},
{7, *xl, parts7, errs7, 14, 15, nil},
}
for _, tt := range tests {
actualReadQuorum, actualWriteQuorum, err := objectQuorumFromMeta(tt.xl, tt.parts, tt.errs)
if tt.expectedError != nil && err == nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if tt.expectedError == nil && err != nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if tt.expectedReadQuorum != actualReadQuorum {
t.Errorf("Test %d, Expected Read Quorum %d, got %d", tt.name, tt.expectedReadQuorum, actualReadQuorum)
return
}
if tt.expectedWriteQuorum != actualWriteQuorum {
t.Errorf("Test %d, Expected Write Quorum %d, got %d", tt.name, tt.expectedWriteQuorum, actualWriteQuorum)
return
}
}
}

View File

@@ -489,6 +489,12 @@ func resetGlobalIsEnvs() {
globalIsEnvCreds = false
globalIsEnvBrowser = false
globalIsEnvRegion = false
globalIsStorageClass = false
}
func resetGlobalStorageEnvs() {
globalStandardStorageClass = storageClass{}
globalRRStorageClass = storageClass{}
}
// Resets all the globals used modified in tests.
@@ -510,6 +516,8 @@ func resetTestGlobals() {
resetGlobalIsXL()
// Reset global isEnvCreds flag.
resetGlobalIsEnvs()
// Reset global storage class flags
resetGlobalStorageEnvs()
}
// Configure the server for the test run.
@@ -1968,6 +1976,9 @@ type objAPITestType func(obj ObjectLayer, instanceType string, bucketName string
// Regular object test type.
type objTestType func(obj ObjectLayer, instanceType string, t TestErrHandler)
// Special test type for test with directories
type objTestTypeWithDirs func(obj ObjectLayer, instanceType string, dirs []string, t TestErrHandler)
// Special object test type for disk not found situations.
type objTestDiskNotFoundType func(obj ObjectLayer, instanceType string, dirs []string, t *testing.T)
@@ -1999,6 +2010,31 @@ func ExecObjectLayerTest(t TestErrHandler, objTest objTestType) {
defer removeRoots(append(fsDirs, fsDir))
}
// ExecObjectLayerTestWithDirs - executes object layer tests.
// Creates single node and XL ObjectLayer instance and runs test for both the layers.
func ExecObjectLayerTestWithDirs(t TestErrHandler, objTest objTestTypeWithDirs) {
// initialize the server and obtain the credentials and root.
// credentials are necessary to sign the HTTP request.
rootPath, err := newTestConfig(globalMinioDefaultRegion)
if err != nil {
t.Fatal("Unexpected error", err)
}
defer os.RemoveAll(rootPath)
objLayer, fsDir, err := prepareFS()
if err != nil {
t.Fatalf("Initialization of object layer failed for single node setup: %s", err)
}
objLayer, fsDirs, err := prepareXL()
if err != nil {
t.Fatalf("Initialization of object layer failed for XL setup: %s", err)
}
// Executing the object layer tests for XL.
objTest(objLayer, XLTestStr, fsDirs, t)
defer removeRoots(append(fsDirs, fsDir))
}
// ExecObjectLayerDiskAlteredTest - executes object layer tests while altering
// disks in between tests. Creates XL ObjectLayer instance and runs test for XL layer.
func ExecObjectLayerDiskAlteredTest(t *testing.T, objTest objTestDiskNotFoundType) {

View File

@@ -64,7 +64,8 @@ func (xl xlObjects) MakeBucketWithLocation(bucket, location string) error {
// Wait for all make vol to finish.
wg.Wait()
err := reduceWriteQuorumErrs(dErrs, bucketOpIgnoredErrs, xl.writeQuorum)
writeQuorum := len(xl.storageDisks)/2 + 1
err := reduceWriteQuorumErrs(dErrs, bucketOpIgnoredErrs, writeQuorum)
if errors.Cause(err) == errXLWriteQuorum {
// Purge successfully created buckets if we don't have writeQuorum.
undoMakeBucket(xl.storageDisks, bucket)
@@ -142,7 +143,8 @@ func (xl xlObjects) getBucketInfo(bucketName string) (bucketInfo BucketInfo, err
// reduce to one error based on read quorum.
// `nil` is deliberately passed for ignoredErrs
// because these errors were already ignored.
return BucketInfo{}, reduceReadQuorumErrs(bucketErrs, nil, xl.readQuorum)
readQuorum := len(xl.storageDisks) / 2
return BucketInfo{}, reduceReadQuorumErrs(bucketErrs, nil, readQuorum)
}
// GetBucketInfo - returns BucketInfo for a bucket.
@@ -251,7 +253,8 @@ func (xl xlObjects) DeleteBucket(bucket string) error {
// Wait for all the delete vols to finish.
wg.Wait()
err := reduceWriteQuorumErrs(dErrs, bucketOpIgnoredErrs, xl.writeQuorum)
writeQuorum := len(xl.storageDisks)/2 + 1
err := reduceWriteQuorumErrs(dErrs, bucketOpIgnoredErrs, writeQuorum)
if errors.Cause(err) == errXLWriteQuorum {
xl.undoDeleteBucket(bucket)
}

View File

@@ -121,6 +121,29 @@ func listOnlineDisks(disks []StorageAPI, partsMetadata []xlMetaV1, errs []error)
return onlineDisks, modTime
}
// Returns one of the latest updated xlMeta files and count of total valid xlMeta(s) updated latest
func getLatestXLMeta(partsMetadata []xlMetaV1, errs []error) (xlMetaV1, int) {
// List all the file commit ids from parts metadata.
modTimes := listObjectModtimes(partsMetadata, errs)
// Count all lastest updated xlMeta values
var count int
var latestXLMeta xlMetaV1
// Reduce list of UUIDs to a single common value - i.e. the last updated Time
modTime, _ := commonTime(modTimes)
// Interate through all the modTimes and count the xlMeta(s) with latest time.
for index, t := range modTimes {
if t == modTime && partsMetadata[index].IsValid() {
latestXLMeta = partsMetadata[index]
count++
}
}
// Return one of the latest xlMetaData, and the count of lastest updated xlMeta files
return latestXLMeta, count
}
// outDatedDisks - return disks which don't have the latest object (i.e xl.json).
// disks that are offline are not 'marked' outdated.
func outDatedDisks(disks, latestDisks []StorageAPI, errs []error, partsMetadata []xlMetaV1,
@@ -184,7 +207,11 @@ func xlHealStat(xl xlObjects, partsMetadata []xlMetaV1, errs []error) HealObject
// Less than quorum erasure coded blocks of the object have the same create time.
// This object can't be healed with the information we have.
modTime, count := commonTime(listObjectModtimes(partsMetadata, errs))
if count < xl.readQuorum {
// get read quorum for this object
readQuorum, _, err := objectQuorumFromMeta(xl, partsMetadata, errs)
if count < readQuorum || err != nil {
return HealObjectInfo{
Status: quorumUnavailable,
MissingDataCount: 0,
@@ -217,7 +244,7 @@ func xlHealStat(xl xlObjects, partsMetadata []xlMetaV1, errs []error) HealObject
disksMissing = true
fallthrough
case errFileNotFound:
if xlMeta.Erasure.Distribution[i]-1 < xl.dataBlocks {
if xlMeta.Erasure.Distribution[i]-1 < xlMeta.Erasure.DataBlocks {
missingDataCount++
} else {
missingParityCount++

View File

@@ -364,7 +364,8 @@ func TestDisksWithAllParts(t *testing.T) {
}
partsMetadata, errs := readAllXLMetadata(xlDisks, bucket, object)
if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, xl.readQuorum); reducedErr != nil {
readQuorum := len(xl.storageDisks) / 2
if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, readQuorum); reducedErr != nil {
t.Fatalf("Failed to read xl meta data %v", reducedErr)
}

View File

@@ -81,13 +81,16 @@ func (xl xlObjects) HealBucket(bucket string) error {
return err
}
// get write quorum for an object
writeQuorum := len(xl.storageDisks)/2 + 1
// Heal bucket.
if err := healBucket(xl.storageDisks, bucket, xl.writeQuorum); err != nil {
if err := healBucket(xl.storageDisks, bucket, writeQuorum); err != nil {
return err
}
// Proceed to heal bucket metadata.
return healBucketMetadata(xl.storageDisks, bucket, xl.readQuorum)
return healBucketMetadata(xl, bucket)
}
// Heal bucket - create buckets on disks where it does not exist.
@@ -139,17 +142,11 @@ func healBucket(storageDisks []StorageAPI, bucket string, writeQuorum int) error
// Heals all the metadata associated for a given bucket, this function
// heals `policy.json`, `notification.xml` and `listeners.json`.
func healBucketMetadata(storageDisks []StorageAPI, bucket string, readQuorum int) error {
func healBucketMetadata(xlObj xlObjects, bucket string) error {
healBucketMetaFn := func(metaPath string) error {
metaLock := globalNSMutex.NewNSLock(minioMetaBucket, metaPath)
if err := metaLock.GetRLock(globalHealingTimeout); err != nil {
if _, _, err := xlObj.HealObject(minioMetaBucket, metaPath); err != nil && !isErrObjectNotFound(err) {
return err
}
defer metaLock.RUnlock()
// Heals the given file at metaPath.
if _, _, err := healObject(storageDisks, minioMetaBucket, metaPath, readQuorum); err != nil && !isErrObjectNotFound(err) {
return err
} // Success.
return nil
}
@@ -308,9 +305,9 @@ func (xl xlObjects) ListBucketsHeal() ([]BucketInfo, error) {
// during startup i.e healing of buckets, bucket metadata (policy.json,
// notification.xml, listeners.json) etc. Currently this function
// supports quick healing of buckets, bucket metadata.
func quickHeal(storageDisks []StorageAPI, writeQuorum int, readQuorum int) error {
func quickHeal(xlObj xlObjects, writeQuorum int, readQuorum int) error {
// List all bucket name occurrence from all disks.
_, bucketOcc, err := listAllBuckets(storageDisks)
_, bucketOcc, err := listAllBuckets(xlObj.storageDisks)
if err != nil {
return err
}
@@ -318,10 +315,10 @@ func quickHeal(storageDisks []StorageAPI, writeQuorum int, readQuorum int) error
// All bucket names and bucket metadata that should be healed.
for bucketName, occCount := range bucketOcc {
// Heal bucket only if healing is needed.
if occCount != len(storageDisks) {
if occCount != len(xlObj.storageDisks) {
// Heal bucket and then proceed to heal bucket metadata if any.
if err = healBucket(storageDisks, bucketName, writeQuorum); err == nil {
if err = healBucketMetadata(storageDisks, bucketName, readQuorum); err == nil {
if err = healBucket(xlObj.storageDisks, bucketName, writeQuorum); err == nil {
if err = healBucketMetadata(xlObj, bucketName); err == nil {
continue
}
return err
@@ -535,6 +532,15 @@ func healObject(storageDisks []StorageAPI, bucket, object string, quorum int) (i
// and later the disk comes back up again, heal on the object
// should delete it.
func (xl xlObjects) HealObject(bucket, object string) (int, int, error) {
// Read metadata files from all the disks
partsMetadata, errs := readAllXLMetadata(xl.storageDisks, bucket, object)
// get read quorum for this object
readQuorum, _, err := objectQuorumFromMeta(xl, partsMetadata, errs)
if err != nil {
return 0, 0, err
}
// Lock the object before healing.
objectLock := globalNSMutex.NewNSLock(bucket, object)
if err := objectLock.GetRLock(globalHealingTimeout); err != nil {
@@ -543,5 +549,5 @@ func (xl xlObjects) HealObject(bucket, object string) (int, int, error) {
defer objectLock.RUnlock()
// Heal the object.
return healObject(xl.storageDisks, bucket, object, xl.readQuorum)
return healObject(xl.storageDisks, bucket, object, readQuorum)
}

View File

@@ -333,8 +333,12 @@ func TestQuickHeal(t *testing.T) {
}
}
// figure out read and write quorum
readQuorum := len(xl.storageDisks) / 2
writeQuorum := len(xl.storageDisks)/2 + 1
// Heal the missing buckets.
if err = quickHeal(xl.storageDisks, xl.writeQuorum, xl.readQuorum); err != nil {
if err = quickHeal(*xl, writeQuorum, readQuorum); err != nil {
t.Fatal(err)
}
@@ -351,7 +355,7 @@ func TestQuickHeal(t *testing.T) {
t.Fatal("storage disk is not *retryStorage type")
}
xl.storageDisks[0] = newNaughtyDisk(posixDisk, nil, errUnformattedDisk)
if err = quickHeal(xl.storageDisks, xl.writeQuorum, xl.readQuorum); err != errUnformattedDisk {
if err = quickHeal(*xl, writeQuorum, readQuorum); err != errUnformattedDisk {
t.Fatal(err)
}
@@ -368,7 +372,7 @@ func TestQuickHeal(t *testing.T) {
}
xl = obj.(*xlObjects)
xl.storageDisks[0] = nil
if err = quickHeal(xl.storageDisks, xl.writeQuorum, xl.readQuorum); err != nil {
if err = quickHeal(*xl, writeQuorum, readQuorum); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
@@ -390,7 +394,7 @@ func TestQuickHeal(t *testing.T) {
t.Fatal("storage disk is not *retryStorage type")
}
xl.storageDisks[0] = newNaughtyDisk(posixDisk, nil, errDiskNotFound)
if err = quickHeal(xl.storageDisks, xl.writeQuorum, xl.readQuorum); err != nil {
if err = quickHeal(*xl, writeQuorum, readQuorum); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
}
@@ -533,7 +537,8 @@ func TestHealObjectXL(t *testing.T) {
// Try healing now, expect to receive errDiskNotFound.
_, _, err = obj.HealObject(bucket, object)
if errors.Cause(err) != errDiskNotFound {
// since majority of xl.jsons are not available, object quorum can't be read properly and error will be errXLReadQuorum
if errors.Cause(err) != errXLReadQuorum {
t.Errorf("Expected %v but received %v", errDiskNotFound, err)
}
}

View File

@@ -396,7 +396,8 @@ func (xl xlObjects) readXLMetaParts(bucket, object string) (xlMetaParts []object
}
// If all errors were ignored, reduce to maximal occurrence
// based on the read quorum.
return nil, reduceReadQuorumErrs(ignoredErrs, nil, xl.readQuorum)
readQuorum := len(xl.storageDisks) / 2
return nil, reduceReadQuorumErrs(ignoredErrs, nil, readQuorum)
}
// readXLMetaStat - return xlMetaV1.Stat and xlMetaV1.Meta from one of the disks picked at random.
@@ -423,7 +424,8 @@ func (xl xlObjects) readXLMetaStat(bucket, object string) (xlStat statInfo, xlMe
}
// If all errors were ignored, reduce to maximal occurrence
// based on the read quorum.
return statInfo{}, nil, reduceReadQuorumErrs(ignoredErrs, nil, xl.readQuorum)
readQuorum := len(xl.storageDisks) / 2
return statInfo{}, nil, reduceReadQuorumErrs(ignoredErrs, nil, readQuorum)
}
// deleteXLMetadata - deletes `xl.json` on a single disk.
@@ -513,7 +515,7 @@ func writeUniqueXLMetadata(disks []StorageAPI, bucket, prefix string, xlMetas []
}
// writeSameXLMetadata - write `xl.json` on all disks in order.
func writeSameXLMetadata(disks []StorageAPI, bucket, prefix string, xlMeta xlMetaV1, writeQuorum, readQuorum int) ([]StorageAPI, error) {
func writeSameXLMetadata(disks []StorageAPI, bucket, prefix string, xlMeta xlMetaV1, writeQuorum int) ([]StorageAPI, error) {
var wg = &sync.WaitGroup{}
var mErrs = make([]error, len(disks))

View File

@@ -32,7 +32,7 @@ import (
)
// updateUploadJSON - add or remove upload ID info in all `uploads.json`.
func (xl xlObjects) updateUploadJSON(bucket, object, uploadID string, initiated time.Time, isRemove bool) error {
func (xl xlObjects) updateUploadJSON(bucket, object, uploadID string, initiated time.Time, writeQuorum int, isRemove bool) error {
uploadsPath := path.Join(bucket, object, uploadsJSONFile)
tmpUploadsPath := mustGetUUID()
@@ -95,7 +95,7 @@ func (xl xlObjects) updateUploadJSON(bucket, object, uploadID string, initiated
// Wait for all the writes to finish.
wg.Wait()
err := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, xl.writeQuorum)
err := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, writeQuorum)
if errors.Cause(err) == errXLWriteQuorum {
// No quorum. Perform cleanup on the minority of disks
// on which the operation succeeded.
@@ -151,13 +151,13 @@ func (xl xlObjects) updateUploadJSON(bucket, object, uploadID string, initiated
}
// addUploadID - add upload ID and its initiated time to 'uploads.json'.
func (xl xlObjects) addUploadID(bucket, object string, uploadID string, initiated time.Time) error {
return xl.updateUploadJSON(bucket, object, uploadID, initiated, false)
func (xl xlObjects) addUploadID(bucket, object string, uploadID string, initiated time.Time, writeQuorum int) error {
return xl.updateUploadJSON(bucket, object, uploadID, initiated, writeQuorum, false)
}
// removeUploadID - remove upload ID in 'uploads.json'.
func (xl xlObjects) removeUploadID(bucket, object string, uploadID string) error {
return xl.updateUploadJSON(bucket, object, uploadID, time.Time{}, true)
func (xl xlObjects) removeUploadID(bucket, object string, uploadID string, writeQuorum int) error {
return xl.updateUploadJSON(bucket, object, uploadID, time.Time{}, writeQuorum, true)
}
// Returns if the prefix is a multipart upload.
@@ -228,7 +228,8 @@ func (xl xlObjects) statPart(bucket, object, uploadID, partName string) (fileInf
}
// If all errors were ignored, reduce to maximal occurrence
// based on the read quorum.
return FileInfo{}, reduceReadQuorumErrs(ignoredErrs, nil, xl.readQuorum)
readQuorum := len(xl.storageDisks) / 2
return FileInfo{}, reduceReadQuorumErrs(ignoredErrs, nil, readQuorum)
}
// commitXLMetadata - commit `xl.json` from source prefix to destination prefix in the given slice of disks.
@@ -499,7 +500,15 @@ func (xl xlObjects) ListMultipartUploads(bucket, object, keyMarker, uploadIDMark
// disks. `uploads.json` carries metadata regarding on-going multipart
// operation(s) on the object.
func (xl xlObjects) newMultipartUpload(bucket string, object string, meta map[string]string) (string, error) {
xlMeta := newXLMetaV1(object, xl.dataBlocks, xl.parityBlocks)
dataBlocks, parityBlocks := getDrivesCount(meta[amzStorageClass], xl.storageDisks)
xlMeta := newXLMetaV1(object, dataBlocks, parityBlocks)
// we now know the number of blocks this object needs for data and parity.
// establish the writeQuorum using this data
writeQuorum := dataBlocks + 1
// If not set default to "application/octet-stream"
if meta["content-type"] == "" {
contentType := "application/octet-stream"
@@ -528,7 +537,7 @@ func (xl xlObjects) newMultipartUpload(bucket string, object string, meta map[st
tempUploadIDPath := uploadID
// Write updated `xl.json` to all disks.
disks, err := writeSameXLMetadata(xl.storageDisks, minioMetaTmpBucket, tempUploadIDPath, xlMeta, xl.writeQuorum, xl.readQuorum)
disks, err := writeSameXLMetadata(xl.storageDisks, minioMetaTmpBucket, tempUploadIDPath, xlMeta, writeQuorum)
if err != nil {
return "", toObjectErr(err, minioMetaTmpBucket, tempUploadIDPath)
}
@@ -538,14 +547,14 @@ func (xl xlObjects) newMultipartUpload(bucket string, object string, meta map[st
defer xl.deleteObject(minioMetaTmpBucket, tempUploadIDPath)
// Attempt to rename temp upload object to actual upload path object
_, rErr := renameObject(disks, minioMetaTmpBucket, tempUploadIDPath, minioMetaMultipartBucket, uploadIDPath, xl.writeQuorum)
_, rErr := renameObject(disks, minioMetaTmpBucket, tempUploadIDPath, minioMetaMultipartBucket, uploadIDPath, writeQuorum)
if rErr != nil {
return "", toObjectErr(rErr, minioMetaMultipartBucket, uploadIDPath)
}
initiated := UTCNow()
// Create or update 'uploads.json'
if err = xl.addUploadID(bucket, object, uploadID, initiated); err != nil {
if err = xl.addUploadID(bucket, object, uploadID, initiated, writeQuorum); err != nil {
return "", err
}
// Return success.
@@ -641,7 +650,14 @@ func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, d
// Read metadata associated with the object from all disks.
partsMetadata, errs = readAllXLMetadata(xl.storageDisks, minioMetaMultipartBucket,
uploadIDPath)
reducedErr := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, xl.writeQuorum)
// get Quorum for this object
_, writeQuorum, err := objectQuorumFromMeta(xl, partsMetadata, errs)
if err != nil {
return pi, toObjectErr(err, bucket, object)
}
reducedErr := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, writeQuorum)
if errors.Cause(reducedErr) == errXLWriteQuorum {
preUploadIDLock.RUnlock()
return pi, toObjectErr(reducedErr, bucket, object)
@@ -669,7 +685,7 @@ func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, d
// Delete the temporary object part. If PutObjectPart succeeds there would be nothing to delete.
defer xl.deleteObject(minioMetaTmpBucket, tmpPart)
if data.Size() > 0 {
if pErr := xl.prepareFile(minioMetaTmpBucket, tmpPartPath, data.Size(), onlineDisks, xlMeta.Erasure.BlockSize, xlMeta.Erasure.DataBlocks); err != nil {
if pErr := xl.prepareFile(minioMetaTmpBucket, tmpPartPath, data.Size(), onlineDisks, xlMeta.Erasure.BlockSize, xlMeta.Erasure.DataBlocks, writeQuorum); err != nil {
return pi, toObjectErr(pErr, bucket, object)
}
@@ -680,7 +696,7 @@ func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, d
return pi, toObjectErr(err, bucket, object)
}
buffer := make([]byte, xlMeta.Erasure.BlockSize, 2*xlMeta.Erasure.BlockSize) // alloc additional space for parity blocks created while erasure coding
file, err := storage.CreateFile(data, minioMetaTmpBucket, tmpPartPath, buffer, DefaultBitrotAlgorithm, xl.writeQuorum)
file, err := storage.CreateFile(data, minioMetaTmpBucket, tmpPartPath, buffer, DefaultBitrotAlgorithm, writeQuorum)
if err != nil {
return pi, toObjectErr(err, bucket, object)
}
@@ -705,14 +721,14 @@ func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, d
// Rename temporary part file to its final location.
partPath := path.Join(uploadIDPath, partSuffix)
onlineDisks, err = renamePart(onlineDisks, minioMetaTmpBucket, tmpPartPath, minioMetaMultipartBucket, partPath, xl.writeQuorum)
onlineDisks, err = renamePart(onlineDisks, minioMetaTmpBucket, tmpPartPath, minioMetaMultipartBucket, partPath, writeQuorum)
if err != nil {
return pi, toObjectErr(err, minioMetaMultipartBucket, partPath)
}
// Read metadata again because it might be updated with parallel upload of another part.
partsMetadata, errs = readAllXLMetadata(onlineDisks, minioMetaMultipartBucket, uploadIDPath)
reducedErr = reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, xl.writeQuorum)
reducedErr = reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, writeQuorum)
if errors.Cause(reducedErr) == errXLWriteQuorum {
return pi, toObjectErr(reducedErr, bucket, object)
}
@@ -747,11 +763,11 @@ func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, d
tempXLMetaPath := newUUID
// Writes a unique `xl.json` each disk carrying new checksum related information.
if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempXLMetaPath, partsMetadata, xl.writeQuorum); err != nil {
if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempXLMetaPath, partsMetadata, writeQuorum); err != nil {
return pi, toObjectErr(err, minioMetaTmpBucket, tempXLMetaPath)
}
if _, err = commitXLMetadata(onlineDisks, minioMetaTmpBucket, tempXLMetaPath, minioMetaMultipartBucket, uploadIDPath, xl.writeQuorum); err != nil {
if _, err = commitXLMetadata(onlineDisks, minioMetaTmpBucket, tempXLMetaPath, minioMetaMultipartBucket, uploadIDPath, writeQuorum); err != nil {
return pi, toObjectErr(err, minioMetaMultipartBucket, uploadIDPath)
}
@@ -904,7 +920,14 @@ func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, upload
// Read metadata associated with the object from all disks.
partsMetadata, errs := readAllXLMetadata(xl.storageDisks, minioMetaMultipartBucket, uploadIDPath)
reducedErr := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, xl.writeQuorum)
// get Quorum for this object
_, writeQuorum, err := objectQuorumFromMeta(xl, partsMetadata, errs)
if err != nil {
return oi, toObjectErr(err, bucket, object)
}
reducedErr := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, writeQuorum)
if errors.Cause(reducedErr) == errXLWriteQuorum {
return oi, toObjectErr(reducedErr, bucket, object)
}
@@ -992,12 +1015,12 @@ func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, upload
}
// Write unique `xl.json` for each disk.
if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempUploadIDPath, partsMetadata, xl.writeQuorum); err != nil {
if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempUploadIDPath, partsMetadata, writeQuorum); err != nil {
return oi, toObjectErr(err, minioMetaTmpBucket, tempUploadIDPath)
}
var rErr error
onlineDisks, rErr = commitXLMetadata(onlineDisks, minioMetaTmpBucket, tempUploadIDPath, minioMetaMultipartBucket, uploadIDPath, xl.writeQuorum)
onlineDisks, rErr = commitXLMetadata(onlineDisks, minioMetaTmpBucket, tempUploadIDPath, minioMetaMultipartBucket, uploadIDPath, writeQuorum)
if rErr != nil {
return oi, toObjectErr(rErr, minioMetaMultipartBucket, uploadIDPath)
}
@@ -1025,7 +1048,7 @@ func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, upload
// NOTE: Do not use online disks slice here.
// The reason is that existing object should be purged
// regardless of `xl.json` status and rolled back in case of errors.
_, err = renameObject(xl.storageDisks, bucket, object, minioMetaTmpBucket, newUniqueID, xl.writeQuorum)
_, err = renameObject(xl.storageDisks, bucket, object, minioMetaTmpBucket, newUniqueID, writeQuorum)
if err != nil {
return oi, toObjectErr(err, bucket, object)
}
@@ -1045,7 +1068,7 @@ func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, upload
}
// Rename the multipart object to final location.
if _, err = renameObject(onlineDisks, minioMetaMultipartBucket, uploadIDPath, bucket, object, xl.writeQuorum); err != nil {
if _, err = renameObject(onlineDisks, minioMetaMultipartBucket, uploadIDPath, bucket, object, writeQuorum); err != nil {
return oi, toObjectErr(err, bucket, object)
}
@@ -1060,7 +1083,7 @@ func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, upload
defer objectMPartPathLock.Unlock()
// remove entry from uploads.json with quorum
if err = xl.removeUploadID(bucket, object, uploadID); err != nil {
if err = xl.removeUploadID(bucket, object, uploadID, writeQuorum); err != nil {
return oi, toObjectErr(err, minioMetaMultipartBucket, path.Join(bucket, object))
}
@@ -1081,13 +1104,10 @@ func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, upload
}
// Wrapper which removes all the uploaded parts.
func (xl xlObjects) cleanupUploadedParts(bucket, object, uploadID string) error {
func (xl xlObjects) cleanupUploadedParts(uploadIDPath string, writeQuorum int) error {
var errs = make([]error, len(xl.storageDisks))
var wg = &sync.WaitGroup{}
// Construct uploadIDPath.
uploadIDPath := path.Join(bucket, object, uploadID)
// Cleanup uploadID for all disks.
for index, disk := range xl.storageDisks {
if disk == nil {
@@ -1108,7 +1128,7 @@ func (xl xlObjects) cleanupUploadedParts(bucket, object, uploadID string) error
// Wait for all the cleanups to finish.
wg.Wait()
return reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, xl.writeQuorum)
return reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, writeQuorum)
}
// abortMultipartUpload - wrapper for purging an ongoing multipart
@@ -1116,8 +1136,20 @@ func (xl xlObjects) cleanupUploadedParts(bucket, object, uploadID string) error
// the directory at '.minio.sys/multipart/bucket/object/uploadID' holding
// all the upload parts.
func (xl xlObjects) abortMultipartUpload(bucket, object, uploadID string) (err error) {
// Construct uploadIDPath.
uploadIDPath := path.Join(bucket, object, uploadID)
// Read metadata associated with the object from all disks.
partsMetadata, errs := readAllXLMetadata(xl.storageDisks, minioMetaMultipartBucket, uploadIDPath)
// get Quorum for this object
_, writeQuorum, err := objectQuorumFromMeta(xl, partsMetadata, errs)
if err != nil {
return toObjectErr(err, bucket, object)
}
// Cleanup all uploaded parts.
if err = xl.cleanupUploadedParts(bucket, object, uploadID); err != nil {
if err = xl.cleanupUploadedParts(uploadIDPath, writeQuorum); err != nil {
return toObjectErr(err, bucket, object)
}
@@ -1131,7 +1163,7 @@ func (xl xlObjects) abortMultipartUpload(bucket, object, uploadID string) (err e
defer objectMPartPathLock.Unlock()
// remove entry from uploads.json with quorum
if err = xl.removeUploadID(bucket, object, uploadID); err != nil {
if err = xl.removeUploadID(bucket, object, uploadID, writeQuorum); err != nil {
return toObjectErr(err, bucket, object)
}

View File

@@ -142,19 +142,20 @@ func TestUpdateUploadJSON(t *testing.T) {
}
testCases := []struct {
uploadID string
initiated time.Time
isRemove bool
errVal error
uploadID string
initiated time.Time
writeQuorum int
isRemove bool
errVal error
}{
{"111abc", UTCNow(), false, nil},
{"222abc", UTCNow(), false, nil},
{"111abc", time.Time{}, true, nil},
{"111abc", UTCNow(), 9, false, nil},
{"222abc", UTCNow(), 10, false, nil},
{"111abc", time.Time{}, 11, true, nil},
}
xl := obj.(*xlObjects)
for i, test := range testCases {
testErrVal := xl.updateUploadJSON(bucket, object, test.uploadID, test.initiated, test.isRemove)
testErrVal := xl.updateUploadJSON(bucket, object, test.uploadID, test.initiated, test.writeQuorum, test.isRemove)
if testErrVal != test.errVal {
t.Errorf("Test %d: Expected error value %v, but got %v",
i+1, test.errVal, testErrVal)
@@ -166,7 +167,7 @@ func TestUpdateUploadJSON(t *testing.T) {
xl.storageDisks[i] = newNaughtyDisk(xl.storageDisks[i].(*retryStorage), nil, errFaultyDisk)
}
testErrVal := xl.updateUploadJSON(bucket, object, "222abc", UTCNow(), false)
testErrVal := xl.updateUploadJSON(bucket, object, "222abc", UTCNow(), 10, false)
if testErrVal == nil || testErrVal.Error() != errXLWriteQuorum.Error() {
t.Errorf("Expected write quorum error, but got: %v", testErrVal)
}

View File

@@ -34,7 +34,7 @@ import (
var objectOpIgnoredErrs = append(baseIgnoredErrs, errDiskAccessDenied)
// prepareFile hints the bottom layer to optimize the creation of a new object
func (xl xlObjects) prepareFile(bucket, object string, size int64, onlineDisks []StorageAPI, blockSize int64, dataBlocks int) error {
func (xl xlObjects) prepareFile(bucket, object string, size int64, onlineDisks []StorageAPI, blockSize int64, dataBlocks, writeQuorum int) error {
pErrs := make([]error, len(onlineDisks))
// Calculate the real size of the part in one disk.
actualSize := xl.sizeOnDisk(size, blockSize, dataBlocks)
@@ -49,7 +49,7 @@ func (xl xlObjects) prepareFile(bucket, object string, size int64, onlineDisks [
}
}
}
return reduceWriteQuorumErrs(pErrs, objectOpIgnoredErrs, xl.writeQuorum)
return reduceWriteQuorumErrs(pErrs, objectOpIgnoredErrs, writeQuorum)
}
/// Object Operations
@@ -60,7 +60,14 @@ func (xl xlObjects) prepareFile(bucket, object string, size int64, onlineDisks [
func (xl xlObjects) CopyObject(srcBucket, srcObject, dstBucket, dstObject string, metadata map[string]string) (oi ObjectInfo, e error) {
// Read metadata associated with the object from all disks.
metaArr, errs := readAllXLMetadata(xl.storageDisks, srcBucket, srcObject)
if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, xl.readQuorum); reducedErr != nil {
// get Quorum for this object
readQuorum, writeQuorum, err := objectQuorumFromMeta(xl, metaArr, errs)
if err != nil {
return oi, toObjectErr(err, srcBucket, srcObject)
}
if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, readQuorum); reducedErr != nil {
return oi, toObjectErr(reducedErr, srcBucket, srcObject)
}
@@ -92,11 +99,11 @@ func (xl xlObjects) CopyObject(srcBucket, srcObject, dstBucket, dstObject string
tempObj := mustGetUUID()
// Write unique `xl.json` for each disk.
if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, xl.writeQuorum); err != nil {
if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, writeQuorum); err != nil {
return oi, toObjectErr(err, srcBucket, srcObject)
}
// Rename atomically `xl.json` from tmp location to destination for each disk.
if _, err = renameXLMetadata(onlineDisks, minioMetaTmpBucket, tempObj, srcBucket, srcObject, xl.writeQuorum); err != nil {
if _, err = renameXLMetadata(onlineDisks, minioMetaTmpBucket, tempObj, srcBucket, srcObject, writeQuorum); err != nil {
return oi, toObjectErr(err, srcBucket, srcObject)
}
return xlMeta.ToObjectInfo(srcBucket, srcObject), nil
@@ -154,7 +161,14 @@ func (xl xlObjects) GetObject(bucket, object string, startOffset int64, length i
// Read metadata associated with the object from all disks.
metaArr, errs := readAllXLMetadata(xl.storageDisks, bucket, object)
if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, xl.readQuorum); reducedErr != nil {
// get Quorum for this object
readQuorum, _, err := objectQuorumFromMeta(xl, metaArr, errs)
if err != nil {
return toObjectErr(err, bucket, object)
}
if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, readQuorum); reducedErr != nil {
return toObjectErr(reducedErr, bucket, object)
}
@@ -368,7 +382,7 @@ func undoRename(disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry str
// rename - common function that renamePart and renameObject use to rename
// the respective underlying storage layer representations.
func rename(disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry string, isDir bool, quorum int) ([]StorageAPI, error) {
func rename(disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry string, isDir bool, writeQuorum int) ([]StorageAPI, error) {
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
@@ -399,9 +413,9 @@ func rename(disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry string,
// Wait for all renames to finish.
wg.Wait()
// We can safely allow RenameFile errors up to len(xl.storageDisks) - xl.writeQuorum
// We can safely allow RenameFile errors up to len(xl.storageDisks) - writeQuorum
// otherwise return failure. Cleanup successful renames.
err := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, quorum)
err := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, writeQuorum)
if errors.Cause(err) == errXLWriteQuorum {
// Undo all the partial rename operations.
undoRename(disks, srcBucket, srcEntry, dstBucket, dstEntry, isDir, errs)
@@ -493,11 +507,17 @@ func (xl xlObjects) PutObject(bucket string, object string, data *hash.Reader, m
}
}
}
// Get parity and data drive count based on storage class metadata
dataDrives, parityDrives := getDrivesCount(metadata[amzStorageClass], xl.storageDisks)
// we now know the number of blocks this object needs for data and parity.
// writeQuorum is dataBlocks + 1
writeQuorum := dataDrives + 1
// Initialize parts metadata
partsMetadata := make([]xlMetaV1, len(xl.storageDisks))
xlMeta := newXLMetaV1(object, xl.dataBlocks, xl.parityBlocks)
xlMeta := newXLMetaV1(object, dataDrives, parityDrives)
// Initialize xl meta.
for index := range partsMetadata {
@@ -541,7 +561,7 @@ func (xl xlObjects) PutObject(bucket string, object string, data *hash.Reader, m
// This is only an optimization.
var curPartReader io.Reader
if curPartSize > 0 {
pErr := xl.prepareFile(minioMetaTmpBucket, tempErasureObj, curPartSize, storage.disks, xlMeta.Erasure.BlockSize, xlMeta.Erasure.DataBlocks)
pErr := xl.prepareFile(minioMetaTmpBucket, tempErasureObj, curPartSize, storage.disks, xlMeta.Erasure.BlockSize, xlMeta.Erasure.DataBlocks, writeQuorum)
if pErr != nil {
return ObjectInfo{}, toObjectErr(pErr, bucket, object)
}
@@ -554,7 +574,7 @@ func (xl xlObjects) PutObject(bucket string, object string, data *hash.Reader, m
}
file, erasureErr := storage.CreateFile(curPartReader, minioMetaTmpBucket,
tempErasureObj, buffer, DefaultBitrotAlgorithm, xl.writeQuorum)
tempErasureObj, buffer, DefaultBitrotAlgorithm, writeQuorum)
if erasureErr != nil {
return ObjectInfo{}, toObjectErr(erasureErr, minioMetaTmpBucket, tempErasureObj)
}
@@ -602,7 +622,7 @@ func (xl xlObjects) PutObject(bucket string, object string, data *hash.Reader, m
// NOTE: Do not use online disks slice here.
// The reason is that existing object should be purged
// regardless of `xl.json` status and rolled back in case of errors.
_, err = renameObject(xl.storageDisks, bucket, object, minioMetaTmpBucket, newUniqueID, xl.writeQuorum)
_, err = renameObject(xl.storageDisks, bucket, object, minioMetaTmpBucket, newUniqueID, writeQuorum)
if err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
@@ -617,12 +637,12 @@ func (xl xlObjects) PutObject(bucket string, object string, data *hash.Reader, m
}
// Write unique `xl.json` for each disk.
if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, xl.writeQuorum); err != nil {
if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, writeQuorum); err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
// Rename the successfully written temporary object to final location.
if _, err = renameObject(onlineDisks, minioMetaTmpBucket, tempObj, bucket, object, xl.writeQuorum); err != nil {
if _, err = renameObject(onlineDisks, minioMetaTmpBucket, tempObj, bucket, object, writeQuorum); err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
@@ -659,6 +679,15 @@ func (xl xlObjects) deleteObject(bucket, object string) error {
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Read metadata associated with the object from all disks.
metaArr, errs := readAllXLMetadata(xl.storageDisks, bucket, object)
// get Quorum for this object
_, writeQuorum, err := objectQuorumFromMeta(xl, metaArr, errs)
if err != nil {
return err
}
// Initialize list of errors.
var dErrs = make([]error, len(xl.storageDisks))
@@ -680,7 +709,7 @@ func (xl xlObjects) deleteObject(bucket, object string) error {
// Wait for all routines to finish.
wg.Wait()
return reduceWriteQuorumErrs(dErrs, objectOpIgnoredErrs, xl.writeQuorum)
return reduceWriteQuorumErrs(dErrs, objectOpIgnoredErrs, writeQuorum)
}
// DeleteObject - deletes an object, this call doesn't necessary reply

View File

@@ -154,8 +154,9 @@ func TestXLDeleteObjectDiskNotFound(t *testing.T) {
xl.storageDisks[8] = nil
err = obj.DeleteObject(bucket, object)
err = errors.Cause(err)
if err != toObjectErr(errXLWriteQuorum, bucket, object) {
t.Errorf("Expected deleteObject to fail with %v, but failed with %v", toObjectErr(errXLWriteQuorum, bucket, object), err)
// since majority of disks are not available, metaquorum is not achieved and hence errXLReadQuorum error
if err != toObjectErr(errXLReadQuorum, bucket, object) {
t.Errorf("Expected deleteObject to fail with %v, but failed with %v", toObjectErr(errXLReadQuorum, bucket, object), err)
}
// Cleanup backend directories
removeRoots(fsDirs)

View File

@@ -50,10 +50,6 @@ const (
type xlObjects struct {
mutex *sync.Mutex
storageDisks []StorageAPI // Collection of initialized backend disks.
dataBlocks int // dataBlocks count caculated for erasure.
parityBlocks int // parityBlocks count calculated for erasure.
readQuorum int // readQuorum minimum required disks to read data.
writeQuorum int // writeQuorum minimum required disks to write data.
// ListObjects pool management.
listPool *treeWalkPool
@@ -92,6 +88,7 @@ func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
return nil, errInvalidArgument
}
// figure out readQuorum for erasure format.json
readQuorum := len(storageDisks) / 2
writeQuorum := len(storageDisks)/2 + 1
@@ -101,9 +98,6 @@ func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
return nil, fmt.Errorf("Unable to recognize backend format, %s", err)
}
// Calculate data and parity blocks.
dataBlocks, parityBlocks := len(newStorageDisks)/2, len(newStorageDisks)/2
// Initialize list pool.
listPool := newTreeWalkPool(globalLookupTimeout)
@@ -111,8 +105,6 @@ func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
xl := &xlObjects{
mutex: &sync.Mutex{},
storageDisks: newStorageDisks,
dataBlocks: dataBlocks,
parityBlocks: parityBlocks,
listPool: listPool,
}
@@ -144,11 +136,6 @@ func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
return nil, fmt.Errorf("Unable to initialize '.minio.sys' meta volume, %s", err)
}
// Figure out read and write quorum based on number of storage disks.
// READ and WRITE quorum is always set to (N/2) number of disks.
xl.readQuorum = readQuorum
xl.writeQuorum = writeQuorum
// If the number of offline servers is equal to the readQuorum
// (i.e. the number of online servers also equals the
// readQuorum), we cannot perform quick-heal (no
@@ -160,7 +147,7 @@ func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
}
// Perform a quick heal on the buckets and bucket metadata for any discrepancies.
if err = quickHeal(xl.storageDisks, xl.writeQuorum, xl.readQuorum); err != nil {
if err = quickHeal(*xl, writeQuorum, readQuorum); err != nil {
return nil, err
}
@@ -258,13 +245,18 @@ func getStorageInfo(disks []StorageAPI) StorageInfo {
storageInfo.Backend.Type = Erasure
storageInfo.Backend.OnlineDisks = onlineDisks
storageInfo.Backend.OfflineDisks = offlineDisks
_, scParity := getDrivesCount(standardStorageClass, disks)
storageInfo.Backend.standardSCParity = scParity
_, rrSCparity := getDrivesCount(reducedRedundancyStorageClass, disks)
storageInfo.Backend.rrSCParity = rrSCparity
return storageInfo
}
// StorageInfo - returns underlying storage statistics.
func (xl xlObjects) StorageInfo() StorageInfo {
storageInfo := getStorageInfo(xl.storageDisks)
storageInfo.Backend.ReadQuorum = xl.readQuorum
storageInfo.Backend.WriteQuorum = xl.writeQuorum
return storageInfo
}