Add x-amz-storage-class support (#5295)

This adds configurable data and parity options on a per object
basis. To use variable parity

- Users can set environment variables to cofigure variable
parity

- Then add header x-amz-storage-class to putobject requests
with relevant storage class values

Fixes #4997
This commit is contained in:
Nitish Tiwari 2017-12-22 16:58:13 +05:30 committed by GitHub
parent f1355da72e
commit 1a3dbbc9dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 1237 additions and 129 deletions

View File

@ -113,4 +113,34 @@ func handleCommonEnvVars() {
// or is not set to 'off', if MINIO_UPDATE is set to 'off' then // or is not set to 'off', if MINIO_UPDATE is set to 'off' then
// in-place update is off. // in-place update is off.
globalInplaceUpdateDisabled = strings.EqualFold(os.Getenv("MINIO_UPDATE"), "off") globalInplaceUpdateDisabled = strings.EqualFold(os.Getenv("MINIO_UPDATE"), "off")
// Validate and store the storage class env variables only for XL/Dist XL setups
if globalIsXL {
var err error
// Check for environment variables and parse into storageClass struct
if ssc := os.Getenv(standardStorageClass); ssc != "" {
globalStandardStorageClass, err = parseStorageClass(ssc)
fatalIf(err, "Invalid value set in environment variable %s.", standardStorageClass)
}
if rrsc := os.Getenv(reducedRedundancyStorageClass); rrsc != "" {
globalRRStorageClass, err = parseStorageClass(rrsc)
fatalIf(err, "Invalid value set in environment variable %s.", reducedRedundancyStorageClass)
}
// Validation is done after parsing both the storage classes. This is needed because we need one
// storage class value to deduce the correct value of the other storage class.
if globalRRStorageClass.Scheme != "" {
err := validateRRSParity(globalRRStorageClass.Parity, globalStandardStorageClass.Parity)
fatalIf(err, "Invalid value set in environment variable %s.", reducedRedundancyStorageClass)
globalIsStorageClass = true
}
if globalStandardStorageClass.Scheme != "" {
err := validateSSParity(globalStandardStorageClass.Parity, globalRRStorageClass.Parity)
fatalIf(err, "Invalid value set in environment variable %s.", standardStorageClass)
globalIsStorageClass = true
}
}
} }

View File

@ -20,6 +20,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"strconv"
"sync" "sync"
"github.com/minio/minio/pkg/auth" "github.com/minio/minio/pkg/auth"
@ -36,9 +37,9 @@ import (
// 6. Make changes in config-current_test.go for any test change // 6. Make changes in config-current_test.go for any test change
// Config version // Config version
const serverConfigVersion = "21" const serverConfigVersion = "22"
type serverConfig = serverConfigV21 type serverConfig = serverConfigV22
var ( var (
// globalServerConfig server config. // globalServerConfig server config.
@ -103,6 +104,52 @@ func (s *serverConfig) SetBrowser(b bool) {
s.Browser = BrowserFlag(b) s.Browser = BrowserFlag(b)
} }
func (s *serverConfig) SetStorageClass(standardClass, rrsClass storageClass) {
s.Lock()
defer s.Unlock()
// Set the values
s.StorageClass.Standard = standardClass.Scheme + strconv.Itoa(standardClass.Parity)
s.StorageClass.RRS = rrsClass.Scheme + strconv.Itoa(rrsClass.Parity)
}
func (s *serverConfig) GetStorageClass() (standardStorageClass, rrsStorageClass storageClass) {
s.RLock()
defer s.RUnlock()
var err error
var ssc storageClass
var rrsc storageClass
if s.StorageClass.Standard != "" {
// Parse the values read from config file into storageClass struct
ssc, err = parseStorageClass(s.StorageClass.Standard)
fatalIf(err, "Invalid value %s set in config.json", s.StorageClass.Standard)
}
if s.StorageClass.RRS != "" {
// Parse the values read from config file into storageClass struct
rrsc, err = parseStorageClass(s.StorageClass.RRS)
fatalIf(err, "Invalid value %s set in config.json", s.StorageClass.RRS)
}
// Validation is done after parsing both the storage classes. This is needed because we need one
// storage class value to deduce the correct value of the other storage class.
if rrsc.Scheme != "" {
err := validateRRSParity(rrsc.Parity, ssc.Parity)
fatalIf(err, "Invalid value %s set in config.json", s.StorageClass.RRS)
globalIsStorageClass = true
}
if ssc.Scheme != "" {
err := validateSSParity(ssc.Parity, rrsc.Parity)
fatalIf(err, "Invalid value %s set in config.json", s.StorageClass.Standard)
globalIsStorageClass = true
}
return
}
// GetCredentials get current credentials. // GetCredentials get current credentials.
func (s *serverConfig) GetBrowser() bool { func (s *serverConfig) GetBrowser() bool {
s.RLock() s.RLock()
@ -175,6 +222,10 @@ func newConfig() error {
srvCfg.Domain = globalDomainName srvCfg.Domain = globalDomainName
} }
if globalIsStorageClass {
srvCfg.SetStorageClass(globalStandardStorageClass, globalRRStorageClass)
}
// hold the mutex lock before a new config is assigned. // hold the mutex lock before a new config is assigned.
// Save the new config globally. // Save the new config globally.
// unlock the mutex. // unlock the mutex.
@ -303,6 +354,10 @@ func loadConfig() error {
srvCfg.Domain = globalDomainName srvCfg.Domain = globalDomainName
} }
if globalIsStorageClass {
srvCfg.SetStorageClass(globalStandardStorageClass, globalRRStorageClass)
}
// hold the mutex lock before a new config is assigned. // hold the mutex lock before a new config is assigned.
globalServerConfigMu.Lock() globalServerConfigMu.Lock()
globalServerConfig = srvCfg globalServerConfig = srvCfg
@ -318,6 +373,9 @@ func loadConfig() error {
if !globalIsEnvDomainName { if !globalIsEnvDomainName {
globalDomainName = globalServerConfig.Domain globalDomainName = globalServerConfig.Domain
} }
if !globalIsStorageClass {
globalStandardStorageClass, globalRRStorageClass = globalServerConfig.GetStorageClass()
}
globalServerConfigMu.Unlock() globalServerConfigMu.Unlock()
return nil return nil

View File

@ -158,6 +158,10 @@ func migrateConfig() error {
return err return err
} }
fallthrough fallthrough
case "21":
if err = migrateV21ToV22(); err != nil {
return err
}
case serverConfigVersion: case serverConfigVersion:
// No migration needed. this always points to current version. // No migration needed. this always points to current version.
err = nil err = nil
@ -1704,3 +1708,110 @@ func migrateV20ToV21() error {
log.Printf(configMigrateMSGTemplate, configFile, cv20.Version, srvConfig.Version) log.Printf(configMigrateMSGTemplate, configFile, cv20.Version, srvConfig.Version)
return nil return nil
} }
func migrateV21ToV22() error {
configFile := getConfigFile()
cv21 := &serverConfigV21{}
_, err := quick.Load(configFile, cv21)
if os.IsNotExist(err) {
return nil
} else if err != nil {
return fmt.Errorf("Unable to load config version 21. %v", err)
}
if cv21.Version != "21" {
return nil
}
// Copy over fields from V21 into V22 config struct
srvConfig := &serverConfigV22{
Notify: &notifier{},
}
srvConfig.Version = serverConfigVersion
srvConfig.Credential = cv21.Credential
srvConfig.Region = cv21.Region
if srvConfig.Region == "" {
// Region needs to be set for AWS Signature Version 4.
srvConfig.Region = globalMinioDefaultRegion
}
// check and set notifiers config
if len(cv21.Notify.AMQP) == 0 {
srvConfig.Notify.AMQP = make(map[string]amqpNotify)
srvConfig.Notify.AMQP["1"] = amqpNotify{}
} else {
// New deliveryMode parameter is added for AMQP,
// default value is already 0, so nothing to
// explicitly migrate here.
srvConfig.Notify.AMQP = cv21.Notify.AMQP
}
if len(cv21.Notify.ElasticSearch) == 0 {
srvConfig.Notify.ElasticSearch = make(map[string]elasticSearchNotify)
srvConfig.Notify.ElasticSearch["1"] = elasticSearchNotify{
Format: formatNamespace,
}
} else {
srvConfig.Notify.ElasticSearch = cv21.Notify.ElasticSearch
}
if len(cv21.Notify.Redis) == 0 {
srvConfig.Notify.Redis = make(map[string]redisNotify)
srvConfig.Notify.Redis["1"] = redisNotify{
Format: formatNamespace,
}
} else {
srvConfig.Notify.Redis = cv21.Notify.Redis
}
if len(cv21.Notify.PostgreSQL) == 0 {
srvConfig.Notify.PostgreSQL = make(map[string]postgreSQLNotify)
srvConfig.Notify.PostgreSQL["1"] = postgreSQLNotify{
Format: formatNamespace,
}
} else {
srvConfig.Notify.PostgreSQL = cv21.Notify.PostgreSQL
}
if len(cv21.Notify.Kafka) == 0 {
srvConfig.Notify.Kafka = make(map[string]kafkaNotify)
srvConfig.Notify.Kafka["1"] = kafkaNotify{}
} else {
srvConfig.Notify.Kafka = cv21.Notify.Kafka
}
if len(cv21.Notify.NATS) == 0 {
srvConfig.Notify.NATS = make(map[string]natsNotify)
srvConfig.Notify.NATS["1"] = natsNotify{}
} else {
srvConfig.Notify.NATS = cv21.Notify.NATS
}
if len(cv21.Notify.Webhook) == 0 {
srvConfig.Notify.Webhook = make(map[string]webhookNotify)
srvConfig.Notify.Webhook["1"] = webhookNotify{}
} else {
srvConfig.Notify.Webhook = cv21.Notify.Webhook
}
if len(cv21.Notify.MySQL) == 0 {
srvConfig.Notify.MySQL = make(map[string]mySQLNotify)
srvConfig.Notify.MySQL["1"] = mySQLNotify{
Format: formatNamespace,
}
} else {
srvConfig.Notify.MySQL = cv21.Notify.MySQL
}
if len(cv21.Notify.MQTT) == 0 {
srvConfig.Notify.MQTT = make(map[string]mqttNotify)
srvConfig.Notify.MQTT["1"] = mqttNotify{}
} else {
srvConfig.Notify.MQTT = cv21.Notify.MQTT
}
// Load browser config from existing config in the file.
srvConfig.Browser = cv21.Browser
// Load domain config from existing config in the file.
srvConfig.Domain = cv21.Domain
if err = quick.Save(configFile, srvConfig); err != nil {
return fmt.Errorf("Failed to migrate config from %s to %s. %v", cv21.Version, srvConfig.Version, err)
}
log.Printf(configMigrateMSGTemplate, configFile, cv21.Version, srvConfig.Version)
return nil
}

View File

@ -530,3 +530,22 @@ type serverConfigV21 struct {
// Notification queue configuration. // Notification queue configuration.
Notify *notifier `json:"notify"` Notify *notifier `json:"notify"`
} }
// serverConfigV22 is just like version '21' with added support
// for StorageClass
type serverConfigV22 struct {
sync.RWMutex
Version string `json:"version"`
// S3 API configuration.
Credential auth.Credentials `json:"credential"`
Region string `json:"region"`
Browser BrowserFlag `json:"browser"`
Domain string `json:"domain"`
// Storage class configuration
StorageClass storageClassConfig `json:"storageclass"`
// Notification queue configuration.
Notify *notifier `json:"notify"`
}

View File

@ -149,7 +149,6 @@ var (
globalIsEnvDomainName bool globalIsEnvDomainName bool
globalDomainName string // Root domain for virtual host style requests globalDomainName string // Root domain for virtual host style requests
// Add new variable global values here.
globalListingTimeout = newDynamicTimeout( /*30*/ 600*time.Second /*5*/, 600*time.Second) // timeout for listing related ops globalListingTimeout = newDynamicTimeout( /*30*/ 600*time.Second /*5*/, 600*time.Second) // timeout for listing related ops
globalObjectTimeout = newDynamicTimeout( /*1*/ 10*time.Minute /*10*/, 600*time.Second) // timeout for Object API related ops globalObjectTimeout = newDynamicTimeout( /*1*/ 10*time.Minute /*10*/, 600*time.Second) // timeout for Object API related ops
@ -158,6 +157,16 @@ var (
// Keep connection active for clients actively using ListenBucketNotification. // Keep connection active for clients actively using ListenBucketNotification.
globalSNSConnAlive = 5 * time.Second // Send a whitespace every 5 seconds. globalSNSConnAlive = 5 * time.Second // Send a whitespace every 5 seconds.
// Storage classes
// Set to indicate if storage class is set up
globalIsStorageClass bool
// Set to store reduced redundancy storage class
globalRRStorageClass storageClass
// Set to store standard storage class
globalStandardStorageClass storageClass
// Add new variable global values here.
) )
// global colors. // global colors.

View File

@ -60,6 +60,7 @@ var supportedHeaders = []string{
"cache-control", "cache-control",
"content-encoding", "content-encoding",
"content-disposition", "content-disposition",
amzStorageClass,
// Add more supported headers here. // Add more supported headers here.
} }
@ -116,7 +117,8 @@ func extractMetadataFromHeader(header http.Header) (map[string]string, error) {
return nil, errors.Trace(errInvalidArgument) return nil, errors.Trace(errInvalidArgument)
} }
metadata := make(map[string]string) metadata := make(map[string]string)
// Save standard supported headers.
// Save all supported headers.
for _, supportedHeader := range supportedHeaders { for _, supportedHeader := range supportedHeaders {
canonicalHeader := http.CanonicalHeaderKey(supportedHeader) canonicalHeader := http.CanonicalHeaderKey(supportedHeader)
// HTTP headers are case insensitive, look for both canonical // HTTP headers are case insensitive, look for both canonical
@ -127,6 +129,7 @@ func extractMetadataFromHeader(header http.Header) (map[string]string, error) {
metadata[supportedHeader] = header.Get(supportedHeader) metadata[supportedHeader] = header.Get(supportedHeader)
} }
} }
// Go through all other headers for any additional headers that needs to be saved. // Go through all other headers for any additional headers that needs to be saved.
for key := range header { for key := range header {
if key != http.CanonicalHeaderKey(key) { if key != http.CanonicalHeaderKey(key) {

View File

@ -43,10 +43,10 @@ type StorageInfo struct {
Type BackendType Type BackendType
// Following fields are only meaningful if BackendType is Erasure. // Following fields are only meaningful if BackendType is Erasure.
OnlineDisks int // Online disks during server startup. OnlineDisks int // Online disks during server startup.
OfflineDisks int // Offline disks during server startup. OfflineDisks int // Offline disks during server startup.
ReadQuorum int // Minimum disks required for successful read operations. standardSCParity int // Parity disks for currently configured Standard storage class.
WriteQuorum int // Minimum disks required for successful write operations. rrSCParity int // Parity disks for currently configured Reduced Redundancy storage class.
} }
} }

View File

@ -238,7 +238,8 @@ func testPutObjectPartDiskNotFound(obj ObjectLayer, instanceType string, disks [
if err == nil { if err == nil {
t.Fatalf("Test %s: expected to fail but passed instead", instanceType) t.Fatalf("Test %s: expected to fail but passed instead", instanceType)
} }
expectedErr := InsufficientWriteQuorum{} // as majority of xl.json are not available, we expect InsufficientReadQuorum while trying to fetch the object quorum
expectedErr := InsufficientReadQuorum{}
if err.Error() != expectedErr.Error() { if err.Error() != expectedErr.Error() {
t.Fatalf("Test %s: expected error %s, got %s instead.", instanceType, expectedErr, err) t.Fatalf("Test %s: expected error %s, got %s instead.", instanceType, expectedErr, err)
} }

View File

@ -47,6 +47,16 @@ func printStartupMessage(apiEndPoints []string) {
strippedAPIEndpoints := stripStandardPorts(apiEndPoints) strippedAPIEndpoints := stripStandardPorts(apiEndPoints)
// Object layer is initialized then print StorageInfo.
objAPI := newObjectLayerFn()
if objAPI != nil {
printStorageInfo(objAPI.StorageInfo())
// Storage class info only printed for Erasure backend
if objAPI.StorageInfo().Backend.Type == Erasure {
printStorageClassInfoMsg(objAPI.StorageInfo())
}
}
// Prints credential, region and browser access. // Prints credential, region and browser access.
printServerCommonMsg(strippedAPIEndpoints) printServerCommonMsg(strippedAPIEndpoints)
@ -57,12 +67,6 @@ func printStartupMessage(apiEndPoints []string) {
// Prints documentation message. // Prints documentation message.
printObjectAPIMsg() printObjectAPIMsg()
// Object layer is initialized then print StorageInfo.
objAPI := newObjectLayerFn()
if objAPI != nil {
printStorageInfo(objAPI.StorageInfo())
}
// SSL is configured reads certification chain, prints // SSL is configured reads certification chain, prints
// authority and expiry. // authority and expiry.
if globalIsSSL { if globalIsSSL {
@ -173,18 +177,42 @@ func getStorageInfoMsg(storageInfo StorageInfo) string {
humanize.IBytes(uint64(storageInfo.Total))) humanize.IBytes(uint64(storageInfo.Total)))
if storageInfo.Backend.Type == Erasure { if storageInfo.Backend.Type == Erasure {
diskInfo := fmt.Sprintf(" %d Online, %d Offline. ", storageInfo.Backend.OnlineDisks, storageInfo.Backend.OfflineDisks) diskInfo := fmt.Sprintf(" %d Online, %d Offline. ", storageInfo.Backend.OnlineDisks, storageInfo.Backend.OfflineDisks)
if maxDiskFailures := storageInfo.Backend.ReadQuorum - storageInfo.Backend.OfflineDisks; maxDiskFailures >= 0 {
diskInfo += fmt.Sprintf("We can withstand [%d] drive failure(s).", maxDiskFailures)
}
msg += colorBlue("\nStatus:") + fmt.Sprintf(getFormatStr(len(diskInfo), 8), diskInfo) msg += colorBlue("\nStatus:") + fmt.Sprintf(getFormatStr(len(diskInfo), 8), diskInfo)
} }
return msg return msg
} }
func printStorageClassInfoMsg(storageInfo StorageInfo) {
standardClassMsg := getStandardStorageClassInfoMsg(storageInfo)
rrsClassMsg := getRRSStorageClassInfoMsg(storageInfo)
storageClassMsg := fmt.Sprintf(getFormatStr(len(standardClassMsg), 3), standardClassMsg) + fmt.Sprintf(getFormatStr(len(rrsClassMsg), 3), rrsClassMsg)
// Print storage class section only if data is present
if storageClassMsg != "" {
log.Println(colorBlue("Storage Class:"))
log.Println(storageClassMsg)
}
}
func getStandardStorageClassInfoMsg(storageInfo StorageInfo) string {
var msg string
if maxDiskFailures := storageInfo.Backend.standardSCParity - storageInfo.Backend.OfflineDisks; maxDiskFailures >= 0 {
msg += fmt.Sprintf("Objects with Standard class can withstand [%d] drive failure(s).\n", maxDiskFailures)
}
return msg
}
func getRRSStorageClassInfoMsg(storageInfo StorageInfo) string {
var msg string
if maxDiskFailures := storageInfo.Backend.rrSCParity - storageInfo.Backend.OfflineDisks; maxDiskFailures >= 0 {
msg += fmt.Sprintf("Objects with Reduced Redundancy class can withstand [%d] drive failure(s).\n", maxDiskFailures)
}
return msg
}
// Prints startup message of storage capacity and erasure information. // Prints startup message of storage capacity and erasure information.
func printStorageInfo(storageInfo StorageInfo) { func printStorageInfo(storageInfo StorageInfo) {
log.Println()
log.Println(getStorageInfoMsg(storageInfo)) log.Println(getStorageInfoMsg(storageInfo))
log.Println()
} }
// Prints certificate expiry date warning // Prints certificate expiry date warning

View File

@ -35,11 +35,11 @@ func TestStorageInfoMsg(t *testing.T) {
Total: 10 * humanize.GiByte, Total: 10 * humanize.GiByte,
Free: 2 * humanize.GiByte, Free: 2 * humanize.GiByte,
Backend: struct { Backend: struct {
Type BackendType Type BackendType
OnlineDisks int OnlineDisks int
OfflineDisks int OfflineDisks int
ReadQuorum int standardSCParity int
WriteQuorum int rrSCParity int
}{Erasure, 7, 1, 4, 5}, }{Erasure, 7, 1, 4, 5},
} }
@ -155,3 +155,97 @@ func TestPrintStartupMessage(t *testing.T) {
apiEndpoints := []string{"http://127.0.0.1:9000"} apiEndpoints := []string{"http://127.0.0.1:9000"}
printStartupMessage(apiEndpoints) printStartupMessage(apiEndpoints)
} }
func TestGetStandardStorageClassInfoMsg(t *testing.T) {
tests := []struct {
name string
args StorageInfo
want string
}{
{"1", StorageInfo{
Total: 20 * humanize.GiByte,
Free: 2 * humanize.GiByte,
Backend: struct {
Type BackendType
OnlineDisks int
OfflineDisks int
standardSCParity int
rrSCParity int
}{Erasure, 15, 1, 5, 3},
}, "Objects with Standard class can withstand [4] drive failure(s).\n"},
{"2", StorageInfo{
Total: 30 * humanize.GiByte,
Free: 3 * humanize.GiByte,
Backend: struct {
Type BackendType
OnlineDisks int
OfflineDisks int
standardSCParity int
rrSCParity int
}{Erasure, 10, 0, 5, 3},
}, "Objects with Standard class can withstand [5] drive failure(s).\n"},
{"3", StorageInfo{
Total: 15 * humanize.GiByte,
Free: 2 * humanize.GiByte,
Backend: struct {
Type BackendType
OnlineDisks int
OfflineDisks int
standardSCParity int
rrSCParity int
}{Erasure, 12, 3, 6, 2},
}, "Objects with Standard class can withstand [3] drive failure(s).\n"},
}
for _, tt := range tests {
if got := getStandardStorageClassInfoMsg(tt.args); got != tt.want {
t.Errorf("Test %s failed, expected %v, got %v", tt.name, tt.want, got)
}
}
}
func TestGetRRSStorageClassInfoMsg(t *testing.T) {
tests := []struct {
name string
args StorageInfo
want string
}{
{"1", StorageInfo{
Total: 20 * humanize.GiByte,
Free: 2 * humanize.GiByte,
Backend: struct {
Type BackendType
OnlineDisks int
OfflineDisks int
standardSCParity int
rrSCParity int
}{Erasure, 15, 1, 5, 3},
}, "Objects with Reduced Redundancy class can withstand [2] drive failure(s).\n"},
{"2", StorageInfo{
Total: 30 * humanize.GiByte,
Free: 3 * humanize.GiByte,
Backend: struct {
Type BackendType
OnlineDisks int
OfflineDisks int
standardSCParity int
rrSCParity int
}{Erasure, 16, 0, 5, 3},
}, "Objects with Reduced Redundancy class can withstand [3] drive failure(s).\n"},
{"3", StorageInfo{
Total: 15 * humanize.GiByte,
Free: 2 * humanize.GiByte,
Backend: struct {
Type BackendType
OnlineDisks int
OfflineDisks int
standardSCParity int
rrSCParity int
}{Erasure, 12, 3, 6, 5},
}, "Objects with Reduced Redundancy class can withstand [2] drive failure(s).\n"},
}
for _, tt := range tests {
if got := getRRSStorageClassInfoMsg(tt.args); got != tt.want {
t.Errorf("Test %s failed, expected %v, got %v", tt.name, tt.want, got)
}
}
}

187
cmd/storage-class.go Normal file
View File

@ -0,0 +1,187 @@
/*
* Minio Cloud Storage, (C) 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"errors"
"fmt"
"strconv"
"strings"
)
const (
// metadata entry for storage class
amzStorageClass = "x-amz-storage-class"
// Reduced redundancy storage class
reducedRedundancyStorageClass = "MINIO_STORAGE_CLASS_RRS"
// Standard storage class
standardStorageClass = "MINIO_STORAGE_CLASS_STANDARD"
// Supported storage class scheme is EC
supportedStorageClassScheme = "EC"
// Minimum parity disks
minimumParityDisks = 2
defaultRRSParity = 2
)
// Struct to hold storage class
type storageClass struct {
Scheme string
Parity int
}
type storageClassConfig struct {
Standard string `json:"standard"`
RRS string `json:"rrs"`
}
// Parses given storageClassEnv and returns a storageClass structure.
// Supported Storage Class format is "Scheme:Number of parity disks".
// Currently only supported scheme is "EC".
func parseStorageClass(storageClassEnv string) (sc storageClass, err error) {
s := strings.Split(storageClassEnv, ":")
// only two elements allowed in the string - "scheme" and "number of parity disks"
if len(s) > 2 {
return storageClass{}, errors.New("Too many sections in " + storageClassEnv)
} else if len(s) < 2 {
return storageClass{}, errors.New("Too few sections in " + storageClassEnv)
}
// only allowed scheme is "EC"
if s[0] != supportedStorageClassScheme {
return storageClass{}, errors.New("Unsupported scheme " + s[0] + ". Supported scheme is EC")
}
// Number of parity disks should be integer
parityDisks, err := strconv.Atoi(s[1])
if err != nil {
return storageClass{}, err
}
sc = storageClass{
Scheme: s[0],
Parity: parityDisks,
}
return sc, nil
}
// Validates the parity disks for Reduced Redundancy storage class
func validateRRSParity(rrsParity, ssParity int) (err error) {
// Reduced redundancy storage class is not supported for 4 disks erasure coded setup.
if len(globalEndpoints) == 4 && rrsParity != 0 {
return fmt.Errorf("Reduced redundancy storage class not supported for " + strconv.Itoa(len(globalEndpoints)) + " disk setup")
}
// RRS parity disks should be greater than or equal to minimumParityDisks. Parity below minimumParityDisks is not recommended.
if rrsParity < minimumParityDisks {
return fmt.Errorf("Reduced redundancy storage class parity should be greater than or equal to " + strconv.Itoa(minimumParityDisks))
}
// Reduced redundancy implies lesser parity than standard storage class. So, RRS parity disks should be
// - less than N/2, if StorageClass parity is not set.
// - less than StorageClass Parity, if Storage class parity is set.
switch ssParity {
case 0:
if rrsParity >= len(globalEndpoints)/2 {
return fmt.Errorf("Reduced redundancy storage class parity disks should be less than " + strconv.Itoa(len(globalEndpoints)/2))
}
default:
if rrsParity >= ssParity {
return fmt.Errorf("Reduced redundancy storage class parity disks should be less than " + strconv.Itoa(ssParity))
}
}
return nil
}
// Validates the parity disks for Standard storage class
func validateSSParity(ssParity, rrsParity int) (err error) {
// Standard storage class implies more parity than Reduced redundancy storage class. So, Standard storage parity disks should be
// - greater than or equal to 2, if RRS parity is not set.
// - greater than RRS Parity, if RRS parity is set.
switch rrsParity {
case 0:
if ssParity < minimumParityDisks {
return fmt.Errorf("Standard storage class parity disks should be greater than or equal to " + strconv.Itoa(minimumParityDisks))
}
default:
if ssParity <= rrsParity {
return fmt.Errorf("Standard storage class parity disks should be greater than " + strconv.Itoa(rrsParity))
}
}
// Standard storage class parity should be less than or equal to N/2
if ssParity > len(globalEndpoints)/2 {
return fmt.Errorf("Standard storage class parity disks should be less than or equal to " + strconv.Itoa(len(globalEndpoints)/2))
}
return nil
}
// Returns the data and parity drive count based on storage class
// If storage class is set using the env vars MINIO_STORAGE_CLASS_RRS and MINIO_STORAGE_CLASS_STANDARD
// -- corresponding values are returned
// If storage class is not set using environment variables, default values are returned
// -- Default for Reduced Redundancy Storage class is, parity = 2 and data = N-Parity
// -- Default for Standard Storage class is, parity = N/2, data = N/2
// If storage class is not present in metadata, default value is data = N/2, parity = N/2
func getDrivesCount(sc string, disks []StorageAPI) (data, parity int) {
totalDisks := len(disks)
parity = totalDisks / 2
switch sc {
case reducedRedundancyStorageClass:
if globalRRStorageClass.Parity != 0 {
// set the rrs parity if available
parity = globalRRStorageClass.Parity
} else {
// else fall back to default value
parity = defaultRRSParity
}
case standardStorageClass:
if globalStandardStorageClass.Parity != 0 {
// set the standard parity if available
parity = globalStandardStorageClass.Parity
}
}
// data is always totalDisks - parity
return totalDisks - parity, parity
}
// Returns per object readQuorum and writeQuorum
// readQuorum is the minimum required disks to read data.
// writeQuorum is the minimum required disks to write data.
func objectQuorumFromMeta(xl xlObjects, partsMetaData []xlMetaV1, errs []error) (objectReadQuorum, objectWriteQuorum int, err error) {
// get the latest updated Metadata and a count of all the latest updated xlMeta(s)
latestXLMeta, count := getLatestXLMeta(partsMetaData, errs)
// latestXLMeta is updated most recently.
// We implicitly assume that all the xlMeta(s) have same dataBlocks and parityBlocks.
// We now check that at least dataBlocks number of xlMeta is available. This means count
// should be greater than or equal to dataBlocks field of latestXLMeta. If not we throw read quorum error.
if count < latestXLMeta.Erasure.DataBlocks {
// This is the case when we can't reliably deduce object quorum
return 0, 0, errXLReadQuorum
}
// Since all the valid erasure code meta updated at the same time are equivalent, pass dataBlocks
// from latestXLMeta to get the quorum
return latestXLMeta.Erasure.DataBlocks, latestXLMeta.Erasure.DataBlocks + 1, nil
}

355
cmd/storage-class_test.go Normal file
View File

@ -0,0 +1,355 @@
/*
* Minio Cloud Storage, (C) 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"bytes"
"errors"
"reflect"
"testing"
)
func TestParseStorageClass(t *testing.T) {
ExecObjectLayerTest(t, testParseStorageClass)
}
func testParseStorageClass(obj ObjectLayer, instanceType string, t TestErrHandler) {
tests := []struct {
name int
storageClassEnv string
wantSc storageClass
expectedError error
}{
{1, "EC:3", storageClass{
Scheme: "EC",
Parity: 3},
nil},
{2, "EC:4", storageClass{
Scheme: "EC",
Parity: 4},
nil},
{3, "AB:4", storageClass{
Scheme: "EC",
Parity: 4},
errors.New("Unsupported scheme AB. Supported scheme is EC")},
{4, "EC:4:5", storageClass{
Scheme: "EC",
Parity: 4},
errors.New("Too many sections in EC:4:5")},
{5, "AB", storageClass{
Scheme: "EC",
Parity: 4},
errors.New("Too few sections in AB")},
}
for _, tt := range tests {
gotSc, err := parseStorageClass(tt.storageClassEnv)
if err != nil && tt.expectedError == nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if err == nil && tt.expectedError != nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if tt.expectedError == nil && !reflect.DeepEqual(gotSc, tt.wantSc) {
t.Errorf("Test %d, Expected %v, got %v", tt.name, tt.wantSc, gotSc)
return
}
if tt.expectedError != nil && !reflect.DeepEqual(err, tt.expectedError) {
t.Errorf("Test %d, Expected %v, got %v", tt.name, tt.expectedError, err)
}
}
}
func TestValidateRRSParity(t *testing.T) {
ExecObjectLayerTestWithDirs(t, testValidateRRSParity)
}
func testValidateRRSParity(obj ObjectLayer, instanceType string, dirs []string, t TestErrHandler) {
// Reset global storage class flags
resetGlobalStorageEnvs()
// Set globalEndpoints for a single node XL setup.
globalEndpoints = mustGetNewEndpointList(dirs...)
tests := []struct {
name int
rrsParity int
ssParity int
expectedError error
}{
{1, 2, 4, nil},
{2, 1, 4, errors.New("Reduced redundancy storage class parity should be greater than or equal to 2")},
{3, 7, 6, errors.New("Reduced redundancy storage class parity disks should be less than 6")},
{4, 9, 0, errors.New("Reduced redundancy storage class parity disks should be less than 8")},
{5, 3, 3, errors.New("Reduced redundancy storage class parity disks should be less than 3")},
}
for _, tt := range tests {
err := validateRRSParity(tt.rrsParity, tt.ssParity)
if err != nil && tt.expectedError == nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if err == nil && tt.expectedError != nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if tt.expectedError != nil && !reflect.DeepEqual(err, tt.expectedError) {
t.Errorf("Test %d, Expected %v, got %v", tt.name, tt.expectedError, err)
}
}
}
func TestValidateSSParity(t *testing.T) {
ExecObjectLayerTestWithDirs(t, testValidateSSParity)
}
func testValidateSSParity(obj ObjectLayer, instanceType string, dirs []string, t TestErrHandler) {
// Reset global storage class flags
resetGlobalStorageEnvs()
// Set globalEndpoints for a single node XL setup.
globalEndpoints = mustGetNewEndpointList(dirs...)
tests := []struct {
name int
ssParity int
rrsParity int
expectedError error
}{
{1, 4, 2, nil},
{2, 6, 5, nil},
{3, 1, 0, errors.New("Standard storage class parity disks should be greater than or equal to 2")},
{4, 4, 6, errors.New("Standard storage class parity disks should be greater than 6")},
{5, 9, 0, errors.New("Standard storage class parity disks should be less than or equal to 8")},
{6, 3, 3, errors.New("Standard storage class parity disks should be greater than 3")},
}
for _, tt := range tests {
err := validateSSParity(tt.ssParity, tt.rrsParity)
if err != nil && tt.expectedError == nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if err == nil && tt.expectedError != nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if tt.expectedError != nil && !reflect.DeepEqual(err, tt.expectedError) {
t.Errorf("Test %d, Expected %v, got %v", tt.name, tt.expectedError, err)
}
}
}
func TestGetDrivesCount(t *testing.T) {
ExecObjectLayerTestWithDirs(t, testGetDrivesCount)
}
func testGetDrivesCount(obj ObjectLayer, instanceType string, dirs []string, t TestErrHandler) {
// Reset global storage class flags
resetGlobalStorageEnvs()
xl := obj.(*xlObjects)
tests := []struct {
name int
sc string
disks []StorageAPI
expectedData int
expectedParity int
}{
{1, reducedRedundancyStorageClass, xl.storageDisks, 14, 2},
{2, standardStorageClass, xl.storageDisks, 8, 8},
{3, "", xl.storageDisks, 8, 8},
{4, reducedRedundancyStorageClass, xl.storageDisks, 9, 7},
{5, standardStorageClass, xl.storageDisks, 10, 6},
}
for _, tt := range tests {
// Set env var for test case 4
if tt.name == 4 {
globalRRStorageClass.Parity = 7
}
// Set env var for test case 5
if tt.name == 5 {
globalStandardStorageClass.Parity = 6
}
data, parity := getDrivesCount(tt.sc, tt.disks)
if data != tt.expectedData {
t.Errorf("Test %d, Expected data disks %d, got %d", tt.name, tt.expectedData, data)
return
}
if parity != tt.expectedParity {
t.Errorf("Test %d, Expected parity disks %d, got %d", tt.name, tt.expectedParity, parity)
return
}
}
}
func TestObjectQuorumFromMeta(t *testing.T) {
ExecObjectLayerTestWithDirs(t, testObjectQuorumFromMeta)
}
func testObjectQuorumFromMeta(obj ObjectLayer, instanceType string, dirs []string, t TestErrHandler) {
// Reset global storage class flags
resetGlobalStorageEnvs()
bucket := getRandomBucketName()
// make data with more than one part
partCount := 3
data := bytes.Repeat([]byte("a"), int(globalPutPartSize)*partCount)
xl := obj.(*xlObjects)
xlDisks := xl.storageDisks
err := obj.MakeBucketWithLocation(bucket, globalMinioDefaultRegion)
if err != nil {
t.Fatalf("Failed to make a bucket %v", err)
}
// Object for test case 1 - No StorageClass defined, no MetaData in PutObject
object1 := "object1"
_, err = obj.PutObject(bucket, object1, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), nil)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts1, errs1 := readAllXLMetadata(xlDisks, bucket, object1)
// Object for test case 2 - No StorageClass defined, MetaData in PutObject requesting RRS Class
object2 := "object2"
metadata2 := make(map[string]string)
metadata2["x-amz-storage-class"] = reducedRedundancyStorageClass
_, err = obj.PutObject(bucket, object2, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), metadata2)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts2, errs2 := readAllXLMetadata(xlDisks, bucket, object2)
// Object for test case 3 - No StorageClass defined, MetaData in PutObject requesting Standard Storage Class
object3 := "object3"
metadata3 := make(map[string]string)
metadata3["x-amz-storage-class"] = standardStorageClass
_, err = obj.PutObject(bucket, object3, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), metadata3)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts3, errs3 := readAllXLMetadata(xlDisks, bucket, object3)
// Object for test case 4 - Standard StorageClass defined as Parity 6, MetaData in PutObject requesting Standard Storage Class
object4 := "object4"
metadata4 := make(map[string]string)
metadata4["x-amz-storage-class"] = standardStorageClass
globalStandardStorageClass = storageClass{
Parity: 6,
Scheme: "EC",
}
_, err = obj.PutObject(bucket, object4, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), metadata4)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts4, errs4 := readAllXLMetadata(xlDisks, bucket, object4)
// Object for test case 5 - RRS StorageClass defined as Parity 2, MetaData in PutObject requesting RRS Class
// Reset global storage class flags
resetGlobalStorageEnvs()
object5 := "object5"
metadata5 := make(map[string]string)
metadata5["x-amz-storage-class"] = reducedRedundancyStorageClass
globalRRStorageClass = storageClass{
Parity: 2,
Scheme: "EC",
}
_, err = obj.PutObject(bucket, object5, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), metadata5)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts5, errs5 := readAllXLMetadata(xlDisks, bucket, object5)
// Object for test case 6 - RRS StorageClass defined as Parity 2, MetaData in PutObject requesting Standard Storage Class
// Reset global storage class flags
resetGlobalStorageEnvs()
object6 := "object6"
metadata6 := make(map[string]string)
metadata6["x-amz-storage-class"] = standardStorageClass
globalRRStorageClass = storageClass{
Parity: 2,
Scheme: "EC",
}
_, err = obj.PutObject(bucket, object6, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), metadata6)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts6, errs6 := readAllXLMetadata(xlDisks, bucket, object6)
// Object for test case 7 - Standard StorageClass defined as Parity 5, MetaData in PutObject requesting RRS Class
// Reset global storage class flags
resetGlobalStorageEnvs()
object7 := "object7"
metadata7 := make(map[string]string)
metadata7["x-amz-storage-class"] = reducedRedundancyStorageClass
globalStandardStorageClass = storageClass{
Parity: 5,
Scheme: "EC",
}
_, err = obj.PutObject(bucket, object7, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""), metadata7)
if err != nil {
t.Fatalf("Failed to putObject %v", err)
}
parts7, errs7 := readAllXLMetadata(xlDisks, bucket, object7)
tests := []struct {
name int
xl xlObjects
parts []xlMetaV1
errs []error
expectedReadQuorum int
expectedWriteQuorum int
expectedError error
}{
{1, *xl, parts1, errs1, 8, 9, nil},
{2, *xl, parts2, errs2, 14, 15, nil},
{3, *xl, parts3, errs3, 8, 9, nil},
{4, *xl, parts4, errs4, 10, 11, nil},
{5, *xl, parts5, errs5, 14, 15, nil},
{6, *xl, parts6, errs6, 8, 9, nil},
{7, *xl, parts7, errs7, 14, 15, nil},
}
for _, tt := range tests {
actualReadQuorum, actualWriteQuorum, err := objectQuorumFromMeta(tt.xl, tt.parts, tt.errs)
if tt.expectedError != nil && err == nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if tt.expectedError == nil && err != nil {
t.Errorf("Test %d, Expected %s, got %s", tt.name, tt.expectedError, err)
return
}
if tt.expectedReadQuorum != actualReadQuorum {
t.Errorf("Test %d, Expected Read Quorum %d, got %d", tt.name, tt.expectedReadQuorum, actualReadQuorum)
return
}
if tt.expectedWriteQuorum != actualWriteQuorum {
t.Errorf("Test %d, Expected Write Quorum %d, got %d", tt.name, tt.expectedWriteQuorum, actualWriteQuorum)
return
}
}
}

View File

@ -489,6 +489,12 @@ func resetGlobalIsEnvs() {
globalIsEnvCreds = false globalIsEnvCreds = false
globalIsEnvBrowser = false globalIsEnvBrowser = false
globalIsEnvRegion = false globalIsEnvRegion = false
globalIsStorageClass = false
}
func resetGlobalStorageEnvs() {
globalStandardStorageClass = storageClass{}
globalRRStorageClass = storageClass{}
} }
// Resets all the globals used modified in tests. // Resets all the globals used modified in tests.
@ -510,6 +516,8 @@ func resetTestGlobals() {
resetGlobalIsXL() resetGlobalIsXL()
// Reset global isEnvCreds flag. // Reset global isEnvCreds flag.
resetGlobalIsEnvs() resetGlobalIsEnvs()
// Reset global storage class flags
resetGlobalStorageEnvs()
} }
// Configure the server for the test run. // Configure the server for the test run.
@ -1968,6 +1976,9 @@ type objAPITestType func(obj ObjectLayer, instanceType string, bucketName string
// Regular object test type. // Regular object test type.
type objTestType func(obj ObjectLayer, instanceType string, t TestErrHandler) type objTestType func(obj ObjectLayer, instanceType string, t TestErrHandler)
// Special test type for test with directories
type objTestTypeWithDirs func(obj ObjectLayer, instanceType string, dirs []string, t TestErrHandler)
// Special object test type for disk not found situations. // Special object test type for disk not found situations.
type objTestDiskNotFoundType func(obj ObjectLayer, instanceType string, dirs []string, t *testing.T) type objTestDiskNotFoundType func(obj ObjectLayer, instanceType string, dirs []string, t *testing.T)
@ -1999,6 +2010,31 @@ func ExecObjectLayerTest(t TestErrHandler, objTest objTestType) {
defer removeRoots(append(fsDirs, fsDir)) defer removeRoots(append(fsDirs, fsDir))
} }
// ExecObjectLayerTestWithDirs - executes object layer tests.
// Creates single node and XL ObjectLayer instance and runs test for both the layers.
func ExecObjectLayerTestWithDirs(t TestErrHandler, objTest objTestTypeWithDirs) {
// initialize the server and obtain the credentials and root.
// credentials are necessary to sign the HTTP request.
rootPath, err := newTestConfig(globalMinioDefaultRegion)
if err != nil {
t.Fatal("Unexpected error", err)
}
defer os.RemoveAll(rootPath)
objLayer, fsDir, err := prepareFS()
if err != nil {
t.Fatalf("Initialization of object layer failed for single node setup: %s", err)
}
objLayer, fsDirs, err := prepareXL()
if err != nil {
t.Fatalf("Initialization of object layer failed for XL setup: %s", err)
}
// Executing the object layer tests for XL.
objTest(objLayer, XLTestStr, fsDirs, t)
defer removeRoots(append(fsDirs, fsDir))
}
// ExecObjectLayerDiskAlteredTest - executes object layer tests while altering // ExecObjectLayerDiskAlteredTest - executes object layer tests while altering
// disks in between tests. Creates XL ObjectLayer instance and runs test for XL layer. // disks in between tests. Creates XL ObjectLayer instance and runs test for XL layer.
func ExecObjectLayerDiskAlteredTest(t *testing.T, objTest objTestDiskNotFoundType) { func ExecObjectLayerDiskAlteredTest(t *testing.T, objTest objTestDiskNotFoundType) {

View File

@ -64,7 +64,8 @@ func (xl xlObjects) MakeBucketWithLocation(bucket, location string) error {
// Wait for all make vol to finish. // Wait for all make vol to finish.
wg.Wait() wg.Wait()
err := reduceWriteQuorumErrs(dErrs, bucketOpIgnoredErrs, xl.writeQuorum) writeQuorum := len(xl.storageDisks)/2 + 1
err := reduceWriteQuorumErrs(dErrs, bucketOpIgnoredErrs, writeQuorum)
if errors.Cause(err) == errXLWriteQuorum { if errors.Cause(err) == errXLWriteQuorum {
// Purge successfully created buckets if we don't have writeQuorum. // Purge successfully created buckets if we don't have writeQuorum.
undoMakeBucket(xl.storageDisks, bucket) undoMakeBucket(xl.storageDisks, bucket)
@ -142,7 +143,8 @@ func (xl xlObjects) getBucketInfo(bucketName string) (bucketInfo BucketInfo, err
// reduce to one error based on read quorum. // reduce to one error based on read quorum.
// `nil` is deliberately passed for ignoredErrs // `nil` is deliberately passed for ignoredErrs
// because these errors were already ignored. // because these errors were already ignored.
return BucketInfo{}, reduceReadQuorumErrs(bucketErrs, nil, xl.readQuorum) readQuorum := len(xl.storageDisks) / 2
return BucketInfo{}, reduceReadQuorumErrs(bucketErrs, nil, readQuorum)
} }
// GetBucketInfo - returns BucketInfo for a bucket. // GetBucketInfo - returns BucketInfo for a bucket.
@ -251,7 +253,8 @@ func (xl xlObjects) DeleteBucket(bucket string) error {
// Wait for all the delete vols to finish. // Wait for all the delete vols to finish.
wg.Wait() wg.Wait()
err := reduceWriteQuorumErrs(dErrs, bucketOpIgnoredErrs, xl.writeQuorum) writeQuorum := len(xl.storageDisks)/2 + 1
err := reduceWriteQuorumErrs(dErrs, bucketOpIgnoredErrs, writeQuorum)
if errors.Cause(err) == errXLWriteQuorum { if errors.Cause(err) == errXLWriteQuorum {
xl.undoDeleteBucket(bucket) xl.undoDeleteBucket(bucket)
} }

View File

@ -121,6 +121,29 @@ func listOnlineDisks(disks []StorageAPI, partsMetadata []xlMetaV1, errs []error)
return onlineDisks, modTime return onlineDisks, modTime
} }
// Returns one of the latest updated xlMeta files and count of total valid xlMeta(s) updated latest
func getLatestXLMeta(partsMetadata []xlMetaV1, errs []error) (xlMetaV1, int) {
// List all the file commit ids from parts metadata.
modTimes := listObjectModtimes(partsMetadata, errs)
// Count all lastest updated xlMeta values
var count int
var latestXLMeta xlMetaV1
// Reduce list of UUIDs to a single common value - i.e. the last updated Time
modTime, _ := commonTime(modTimes)
// Interate through all the modTimes and count the xlMeta(s) with latest time.
for index, t := range modTimes {
if t == modTime && partsMetadata[index].IsValid() {
latestXLMeta = partsMetadata[index]
count++
}
}
// Return one of the latest xlMetaData, and the count of lastest updated xlMeta files
return latestXLMeta, count
}
// outDatedDisks - return disks which don't have the latest object (i.e xl.json). // outDatedDisks - return disks which don't have the latest object (i.e xl.json).
// disks that are offline are not 'marked' outdated. // disks that are offline are not 'marked' outdated.
func outDatedDisks(disks, latestDisks []StorageAPI, errs []error, partsMetadata []xlMetaV1, func outDatedDisks(disks, latestDisks []StorageAPI, errs []error, partsMetadata []xlMetaV1,
@ -184,7 +207,11 @@ func xlHealStat(xl xlObjects, partsMetadata []xlMetaV1, errs []error) HealObject
// Less than quorum erasure coded blocks of the object have the same create time. // Less than quorum erasure coded blocks of the object have the same create time.
// This object can't be healed with the information we have. // This object can't be healed with the information we have.
modTime, count := commonTime(listObjectModtimes(partsMetadata, errs)) modTime, count := commonTime(listObjectModtimes(partsMetadata, errs))
if count < xl.readQuorum {
// get read quorum for this object
readQuorum, _, err := objectQuorumFromMeta(xl, partsMetadata, errs)
if count < readQuorum || err != nil {
return HealObjectInfo{ return HealObjectInfo{
Status: quorumUnavailable, Status: quorumUnavailable,
MissingDataCount: 0, MissingDataCount: 0,
@ -217,7 +244,7 @@ func xlHealStat(xl xlObjects, partsMetadata []xlMetaV1, errs []error) HealObject
disksMissing = true disksMissing = true
fallthrough fallthrough
case errFileNotFound: case errFileNotFound:
if xlMeta.Erasure.Distribution[i]-1 < xl.dataBlocks { if xlMeta.Erasure.Distribution[i]-1 < xlMeta.Erasure.DataBlocks {
missingDataCount++ missingDataCount++
} else { } else {
missingParityCount++ missingParityCount++

View File

@ -364,7 +364,8 @@ func TestDisksWithAllParts(t *testing.T) {
} }
partsMetadata, errs := readAllXLMetadata(xlDisks, bucket, object) partsMetadata, errs := readAllXLMetadata(xlDisks, bucket, object)
if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, xl.readQuorum); reducedErr != nil { readQuorum := len(xl.storageDisks) / 2
if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, readQuorum); reducedErr != nil {
t.Fatalf("Failed to read xl meta data %v", reducedErr) t.Fatalf("Failed to read xl meta data %v", reducedErr)
} }

View File

@ -81,13 +81,16 @@ func (xl xlObjects) HealBucket(bucket string) error {
return err return err
} }
// get write quorum for an object
writeQuorum := len(xl.storageDisks)/2 + 1
// Heal bucket. // Heal bucket.
if err := healBucket(xl.storageDisks, bucket, xl.writeQuorum); err != nil { if err := healBucket(xl.storageDisks, bucket, writeQuorum); err != nil {
return err return err
} }
// Proceed to heal bucket metadata. // Proceed to heal bucket metadata.
return healBucketMetadata(xl.storageDisks, bucket, xl.readQuorum) return healBucketMetadata(xl, bucket)
} }
// Heal bucket - create buckets on disks where it does not exist. // Heal bucket - create buckets on disks where it does not exist.
@ -139,17 +142,11 @@ func healBucket(storageDisks []StorageAPI, bucket string, writeQuorum int) error
// Heals all the metadata associated for a given bucket, this function // Heals all the metadata associated for a given bucket, this function
// heals `policy.json`, `notification.xml` and `listeners.json`. // heals `policy.json`, `notification.xml` and `listeners.json`.
func healBucketMetadata(storageDisks []StorageAPI, bucket string, readQuorum int) error { func healBucketMetadata(xlObj xlObjects, bucket string) error {
healBucketMetaFn := func(metaPath string) error { healBucketMetaFn := func(metaPath string) error {
metaLock := globalNSMutex.NewNSLock(minioMetaBucket, metaPath) if _, _, err := xlObj.HealObject(minioMetaBucket, metaPath); err != nil && !isErrObjectNotFound(err) {
if err := metaLock.GetRLock(globalHealingTimeout); err != nil {
return err return err
} }
defer metaLock.RUnlock()
// Heals the given file at metaPath.
if _, _, err := healObject(storageDisks, minioMetaBucket, metaPath, readQuorum); err != nil && !isErrObjectNotFound(err) {
return err
} // Success.
return nil return nil
} }
@ -308,9 +305,9 @@ func (xl xlObjects) ListBucketsHeal() ([]BucketInfo, error) {
// during startup i.e healing of buckets, bucket metadata (policy.json, // during startup i.e healing of buckets, bucket metadata (policy.json,
// notification.xml, listeners.json) etc. Currently this function // notification.xml, listeners.json) etc. Currently this function
// supports quick healing of buckets, bucket metadata. // supports quick healing of buckets, bucket metadata.
func quickHeal(storageDisks []StorageAPI, writeQuorum int, readQuorum int) error { func quickHeal(xlObj xlObjects, writeQuorum int, readQuorum int) error {
// List all bucket name occurrence from all disks. // List all bucket name occurrence from all disks.
_, bucketOcc, err := listAllBuckets(storageDisks) _, bucketOcc, err := listAllBuckets(xlObj.storageDisks)
if err != nil { if err != nil {
return err return err
} }
@ -318,10 +315,10 @@ func quickHeal(storageDisks []StorageAPI, writeQuorum int, readQuorum int) error
// All bucket names and bucket metadata that should be healed. // All bucket names and bucket metadata that should be healed.
for bucketName, occCount := range bucketOcc { for bucketName, occCount := range bucketOcc {
// Heal bucket only if healing is needed. // Heal bucket only if healing is needed.
if occCount != len(storageDisks) { if occCount != len(xlObj.storageDisks) {
// Heal bucket and then proceed to heal bucket metadata if any. // Heal bucket and then proceed to heal bucket metadata if any.
if err = healBucket(storageDisks, bucketName, writeQuorum); err == nil { if err = healBucket(xlObj.storageDisks, bucketName, writeQuorum); err == nil {
if err = healBucketMetadata(storageDisks, bucketName, readQuorum); err == nil { if err = healBucketMetadata(xlObj, bucketName); err == nil {
continue continue
} }
return err return err
@ -535,6 +532,15 @@ func healObject(storageDisks []StorageAPI, bucket, object string, quorum int) (i
// and later the disk comes back up again, heal on the object // and later the disk comes back up again, heal on the object
// should delete it. // should delete it.
func (xl xlObjects) HealObject(bucket, object string) (int, int, error) { func (xl xlObjects) HealObject(bucket, object string) (int, int, error) {
// Read metadata files from all the disks
partsMetadata, errs := readAllXLMetadata(xl.storageDisks, bucket, object)
// get read quorum for this object
readQuorum, _, err := objectQuorumFromMeta(xl, partsMetadata, errs)
if err != nil {
return 0, 0, err
}
// Lock the object before healing. // Lock the object before healing.
objectLock := globalNSMutex.NewNSLock(bucket, object) objectLock := globalNSMutex.NewNSLock(bucket, object)
if err := objectLock.GetRLock(globalHealingTimeout); err != nil { if err := objectLock.GetRLock(globalHealingTimeout); err != nil {
@ -543,5 +549,5 @@ func (xl xlObjects) HealObject(bucket, object string) (int, int, error) {
defer objectLock.RUnlock() defer objectLock.RUnlock()
// Heal the object. // Heal the object.
return healObject(xl.storageDisks, bucket, object, xl.readQuorum) return healObject(xl.storageDisks, bucket, object, readQuorum)
} }

View File

@ -333,8 +333,12 @@ func TestQuickHeal(t *testing.T) {
} }
} }
// figure out read and write quorum
readQuorum := len(xl.storageDisks) / 2
writeQuorum := len(xl.storageDisks)/2 + 1
// Heal the missing buckets. // Heal the missing buckets.
if err = quickHeal(xl.storageDisks, xl.writeQuorum, xl.readQuorum); err != nil { if err = quickHeal(*xl, writeQuorum, readQuorum); err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -351,7 +355,7 @@ func TestQuickHeal(t *testing.T) {
t.Fatal("storage disk is not *retryStorage type") t.Fatal("storage disk is not *retryStorage type")
} }
xl.storageDisks[0] = newNaughtyDisk(posixDisk, nil, errUnformattedDisk) xl.storageDisks[0] = newNaughtyDisk(posixDisk, nil, errUnformattedDisk)
if err = quickHeal(xl.storageDisks, xl.writeQuorum, xl.readQuorum); err != errUnformattedDisk { if err = quickHeal(*xl, writeQuorum, readQuorum); err != errUnformattedDisk {
t.Fatal(err) t.Fatal(err)
} }
@ -368,7 +372,7 @@ func TestQuickHeal(t *testing.T) {
} }
xl = obj.(*xlObjects) xl = obj.(*xlObjects)
xl.storageDisks[0] = nil xl.storageDisks[0] = nil
if err = quickHeal(xl.storageDisks, xl.writeQuorum, xl.readQuorum); err != nil { if err = quickHeal(*xl, writeQuorum, readQuorum); err != nil {
t.Fatal("Got an unexpected error: ", err) t.Fatal("Got an unexpected error: ", err)
} }
@ -390,7 +394,7 @@ func TestQuickHeal(t *testing.T) {
t.Fatal("storage disk is not *retryStorage type") t.Fatal("storage disk is not *retryStorage type")
} }
xl.storageDisks[0] = newNaughtyDisk(posixDisk, nil, errDiskNotFound) xl.storageDisks[0] = newNaughtyDisk(posixDisk, nil, errDiskNotFound)
if err = quickHeal(xl.storageDisks, xl.writeQuorum, xl.readQuorum); err != nil { if err = quickHeal(*xl, writeQuorum, readQuorum); err != nil {
t.Fatal("Got an unexpected error: ", err) t.Fatal("Got an unexpected error: ", err)
} }
} }
@ -533,7 +537,8 @@ func TestHealObjectXL(t *testing.T) {
// Try healing now, expect to receive errDiskNotFound. // Try healing now, expect to receive errDiskNotFound.
_, _, err = obj.HealObject(bucket, object) _, _, err = obj.HealObject(bucket, object)
if errors.Cause(err) != errDiskNotFound { // since majority of xl.jsons are not available, object quorum can't be read properly and error will be errXLReadQuorum
if errors.Cause(err) != errXLReadQuorum {
t.Errorf("Expected %v but received %v", errDiskNotFound, err) t.Errorf("Expected %v but received %v", errDiskNotFound, err)
} }
} }

View File

@ -396,7 +396,8 @@ func (xl xlObjects) readXLMetaParts(bucket, object string) (xlMetaParts []object
} }
// If all errors were ignored, reduce to maximal occurrence // If all errors were ignored, reduce to maximal occurrence
// based on the read quorum. // based on the read quorum.
return nil, reduceReadQuorumErrs(ignoredErrs, nil, xl.readQuorum) readQuorum := len(xl.storageDisks) / 2
return nil, reduceReadQuorumErrs(ignoredErrs, nil, readQuorum)
} }
// readXLMetaStat - return xlMetaV1.Stat and xlMetaV1.Meta from one of the disks picked at random. // readXLMetaStat - return xlMetaV1.Stat and xlMetaV1.Meta from one of the disks picked at random.
@ -423,7 +424,8 @@ func (xl xlObjects) readXLMetaStat(bucket, object string) (xlStat statInfo, xlMe
} }
// If all errors were ignored, reduce to maximal occurrence // If all errors were ignored, reduce to maximal occurrence
// based on the read quorum. // based on the read quorum.
return statInfo{}, nil, reduceReadQuorumErrs(ignoredErrs, nil, xl.readQuorum) readQuorum := len(xl.storageDisks) / 2
return statInfo{}, nil, reduceReadQuorumErrs(ignoredErrs, nil, readQuorum)
} }
// deleteXLMetadata - deletes `xl.json` on a single disk. // deleteXLMetadata - deletes `xl.json` on a single disk.
@ -513,7 +515,7 @@ func writeUniqueXLMetadata(disks []StorageAPI, bucket, prefix string, xlMetas []
} }
// writeSameXLMetadata - write `xl.json` on all disks in order. // writeSameXLMetadata - write `xl.json` on all disks in order.
func writeSameXLMetadata(disks []StorageAPI, bucket, prefix string, xlMeta xlMetaV1, writeQuorum, readQuorum int) ([]StorageAPI, error) { func writeSameXLMetadata(disks []StorageAPI, bucket, prefix string, xlMeta xlMetaV1, writeQuorum int) ([]StorageAPI, error) {
var wg = &sync.WaitGroup{} var wg = &sync.WaitGroup{}
var mErrs = make([]error, len(disks)) var mErrs = make([]error, len(disks))

View File

@ -32,7 +32,7 @@ import (
) )
// updateUploadJSON - add or remove upload ID info in all `uploads.json`. // updateUploadJSON - add or remove upload ID info in all `uploads.json`.
func (xl xlObjects) updateUploadJSON(bucket, object, uploadID string, initiated time.Time, isRemove bool) error { func (xl xlObjects) updateUploadJSON(bucket, object, uploadID string, initiated time.Time, writeQuorum int, isRemove bool) error {
uploadsPath := path.Join(bucket, object, uploadsJSONFile) uploadsPath := path.Join(bucket, object, uploadsJSONFile)
tmpUploadsPath := mustGetUUID() tmpUploadsPath := mustGetUUID()
@ -95,7 +95,7 @@ func (xl xlObjects) updateUploadJSON(bucket, object, uploadID string, initiated
// Wait for all the writes to finish. // Wait for all the writes to finish.
wg.Wait() wg.Wait()
err := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, xl.writeQuorum) err := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, writeQuorum)
if errors.Cause(err) == errXLWriteQuorum { if errors.Cause(err) == errXLWriteQuorum {
// No quorum. Perform cleanup on the minority of disks // No quorum. Perform cleanup on the minority of disks
// on which the operation succeeded. // on which the operation succeeded.
@ -151,13 +151,13 @@ func (xl xlObjects) updateUploadJSON(bucket, object, uploadID string, initiated
} }
// addUploadID - add upload ID and its initiated time to 'uploads.json'. // addUploadID - add upload ID and its initiated time to 'uploads.json'.
func (xl xlObjects) addUploadID(bucket, object string, uploadID string, initiated time.Time) error { func (xl xlObjects) addUploadID(bucket, object string, uploadID string, initiated time.Time, writeQuorum int) error {
return xl.updateUploadJSON(bucket, object, uploadID, initiated, false) return xl.updateUploadJSON(bucket, object, uploadID, initiated, writeQuorum, false)
} }
// removeUploadID - remove upload ID in 'uploads.json'. // removeUploadID - remove upload ID in 'uploads.json'.
func (xl xlObjects) removeUploadID(bucket, object string, uploadID string) error { func (xl xlObjects) removeUploadID(bucket, object string, uploadID string, writeQuorum int) error {
return xl.updateUploadJSON(bucket, object, uploadID, time.Time{}, true) return xl.updateUploadJSON(bucket, object, uploadID, time.Time{}, writeQuorum, true)
} }
// Returns if the prefix is a multipart upload. // Returns if the prefix is a multipart upload.
@ -228,7 +228,8 @@ func (xl xlObjects) statPart(bucket, object, uploadID, partName string) (fileInf
} }
// If all errors were ignored, reduce to maximal occurrence // If all errors were ignored, reduce to maximal occurrence
// based on the read quorum. // based on the read quorum.
return FileInfo{}, reduceReadQuorumErrs(ignoredErrs, nil, xl.readQuorum) readQuorum := len(xl.storageDisks) / 2
return FileInfo{}, reduceReadQuorumErrs(ignoredErrs, nil, readQuorum)
} }
// commitXLMetadata - commit `xl.json` from source prefix to destination prefix in the given slice of disks. // commitXLMetadata - commit `xl.json` from source prefix to destination prefix in the given slice of disks.
@ -499,7 +500,15 @@ func (xl xlObjects) ListMultipartUploads(bucket, object, keyMarker, uploadIDMark
// disks. `uploads.json` carries metadata regarding on-going multipart // disks. `uploads.json` carries metadata regarding on-going multipart
// operation(s) on the object. // operation(s) on the object.
func (xl xlObjects) newMultipartUpload(bucket string, object string, meta map[string]string) (string, error) { func (xl xlObjects) newMultipartUpload(bucket string, object string, meta map[string]string) (string, error) {
xlMeta := newXLMetaV1(object, xl.dataBlocks, xl.parityBlocks)
dataBlocks, parityBlocks := getDrivesCount(meta[amzStorageClass], xl.storageDisks)
xlMeta := newXLMetaV1(object, dataBlocks, parityBlocks)
// we now know the number of blocks this object needs for data and parity.
// establish the writeQuorum using this data
writeQuorum := dataBlocks + 1
// If not set default to "application/octet-stream" // If not set default to "application/octet-stream"
if meta["content-type"] == "" { if meta["content-type"] == "" {
contentType := "application/octet-stream" contentType := "application/octet-stream"
@ -528,7 +537,7 @@ func (xl xlObjects) newMultipartUpload(bucket string, object string, meta map[st
tempUploadIDPath := uploadID tempUploadIDPath := uploadID
// Write updated `xl.json` to all disks. // Write updated `xl.json` to all disks.
disks, err := writeSameXLMetadata(xl.storageDisks, minioMetaTmpBucket, tempUploadIDPath, xlMeta, xl.writeQuorum, xl.readQuorum) disks, err := writeSameXLMetadata(xl.storageDisks, minioMetaTmpBucket, tempUploadIDPath, xlMeta, writeQuorum)
if err != nil { if err != nil {
return "", toObjectErr(err, minioMetaTmpBucket, tempUploadIDPath) return "", toObjectErr(err, minioMetaTmpBucket, tempUploadIDPath)
} }
@ -538,14 +547,14 @@ func (xl xlObjects) newMultipartUpload(bucket string, object string, meta map[st
defer xl.deleteObject(minioMetaTmpBucket, tempUploadIDPath) defer xl.deleteObject(minioMetaTmpBucket, tempUploadIDPath)
// Attempt to rename temp upload object to actual upload path object // Attempt to rename temp upload object to actual upload path object
_, rErr := renameObject(disks, minioMetaTmpBucket, tempUploadIDPath, minioMetaMultipartBucket, uploadIDPath, xl.writeQuorum) _, rErr := renameObject(disks, minioMetaTmpBucket, tempUploadIDPath, minioMetaMultipartBucket, uploadIDPath, writeQuorum)
if rErr != nil { if rErr != nil {
return "", toObjectErr(rErr, minioMetaMultipartBucket, uploadIDPath) return "", toObjectErr(rErr, minioMetaMultipartBucket, uploadIDPath)
} }
initiated := UTCNow() initiated := UTCNow()
// Create or update 'uploads.json' // Create or update 'uploads.json'
if err = xl.addUploadID(bucket, object, uploadID, initiated); err != nil { if err = xl.addUploadID(bucket, object, uploadID, initiated, writeQuorum); err != nil {
return "", err return "", err
} }
// Return success. // Return success.
@ -641,7 +650,14 @@ func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, d
// Read metadata associated with the object from all disks. // Read metadata associated with the object from all disks.
partsMetadata, errs = readAllXLMetadata(xl.storageDisks, minioMetaMultipartBucket, partsMetadata, errs = readAllXLMetadata(xl.storageDisks, minioMetaMultipartBucket,
uploadIDPath) uploadIDPath)
reducedErr := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, xl.writeQuorum)
// get Quorum for this object
_, writeQuorum, err := objectQuorumFromMeta(xl, partsMetadata, errs)
if err != nil {
return pi, toObjectErr(err, bucket, object)
}
reducedErr := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, writeQuorum)
if errors.Cause(reducedErr) == errXLWriteQuorum { if errors.Cause(reducedErr) == errXLWriteQuorum {
preUploadIDLock.RUnlock() preUploadIDLock.RUnlock()
return pi, toObjectErr(reducedErr, bucket, object) return pi, toObjectErr(reducedErr, bucket, object)
@ -669,7 +685,7 @@ func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, d
// Delete the temporary object part. If PutObjectPart succeeds there would be nothing to delete. // Delete the temporary object part. If PutObjectPart succeeds there would be nothing to delete.
defer xl.deleteObject(minioMetaTmpBucket, tmpPart) defer xl.deleteObject(minioMetaTmpBucket, tmpPart)
if data.Size() > 0 { if data.Size() > 0 {
if pErr := xl.prepareFile(minioMetaTmpBucket, tmpPartPath, data.Size(), onlineDisks, xlMeta.Erasure.BlockSize, xlMeta.Erasure.DataBlocks); err != nil { if pErr := xl.prepareFile(minioMetaTmpBucket, tmpPartPath, data.Size(), onlineDisks, xlMeta.Erasure.BlockSize, xlMeta.Erasure.DataBlocks, writeQuorum); err != nil {
return pi, toObjectErr(pErr, bucket, object) return pi, toObjectErr(pErr, bucket, object)
} }
@ -680,7 +696,7 @@ func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, d
return pi, toObjectErr(err, bucket, object) return pi, toObjectErr(err, bucket, object)
} }
buffer := make([]byte, xlMeta.Erasure.BlockSize, 2*xlMeta.Erasure.BlockSize) // alloc additional space for parity blocks created while erasure coding buffer := make([]byte, xlMeta.Erasure.BlockSize, 2*xlMeta.Erasure.BlockSize) // alloc additional space for parity blocks created while erasure coding
file, err := storage.CreateFile(data, minioMetaTmpBucket, tmpPartPath, buffer, DefaultBitrotAlgorithm, xl.writeQuorum) file, err := storage.CreateFile(data, minioMetaTmpBucket, tmpPartPath, buffer, DefaultBitrotAlgorithm, writeQuorum)
if err != nil { if err != nil {
return pi, toObjectErr(err, bucket, object) return pi, toObjectErr(err, bucket, object)
} }
@ -705,14 +721,14 @@ func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, d
// Rename temporary part file to its final location. // Rename temporary part file to its final location.
partPath := path.Join(uploadIDPath, partSuffix) partPath := path.Join(uploadIDPath, partSuffix)
onlineDisks, err = renamePart(onlineDisks, minioMetaTmpBucket, tmpPartPath, minioMetaMultipartBucket, partPath, xl.writeQuorum) onlineDisks, err = renamePart(onlineDisks, minioMetaTmpBucket, tmpPartPath, minioMetaMultipartBucket, partPath, writeQuorum)
if err != nil { if err != nil {
return pi, toObjectErr(err, minioMetaMultipartBucket, partPath) return pi, toObjectErr(err, minioMetaMultipartBucket, partPath)
} }
// Read metadata again because it might be updated with parallel upload of another part. // Read metadata again because it might be updated with parallel upload of another part.
partsMetadata, errs = readAllXLMetadata(onlineDisks, minioMetaMultipartBucket, uploadIDPath) partsMetadata, errs = readAllXLMetadata(onlineDisks, minioMetaMultipartBucket, uploadIDPath)
reducedErr = reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, xl.writeQuorum) reducedErr = reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, writeQuorum)
if errors.Cause(reducedErr) == errXLWriteQuorum { if errors.Cause(reducedErr) == errXLWriteQuorum {
return pi, toObjectErr(reducedErr, bucket, object) return pi, toObjectErr(reducedErr, bucket, object)
} }
@ -747,11 +763,11 @@ func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, d
tempXLMetaPath := newUUID tempXLMetaPath := newUUID
// Writes a unique `xl.json` each disk carrying new checksum related information. // Writes a unique `xl.json` each disk carrying new checksum related information.
if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempXLMetaPath, partsMetadata, xl.writeQuorum); err != nil { if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempXLMetaPath, partsMetadata, writeQuorum); err != nil {
return pi, toObjectErr(err, minioMetaTmpBucket, tempXLMetaPath) return pi, toObjectErr(err, minioMetaTmpBucket, tempXLMetaPath)
} }
if _, err = commitXLMetadata(onlineDisks, minioMetaTmpBucket, tempXLMetaPath, minioMetaMultipartBucket, uploadIDPath, xl.writeQuorum); err != nil { if _, err = commitXLMetadata(onlineDisks, minioMetaTmpBucket, tempXLMetaPath, minioMetaMultipartBucket, uploadIDPath, writeQuorum); err != nil {
return pi, toObjectErr(err, minioMetaMultipartBucket, uploadIDPath) return pi, toObjectErr(err, minioMetaMultipartBucket, uploadIDPath)
} }
@ -904,7 +920,14 @@ func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, upload
// Read metadata associated with the object from all disks. // Read metadata associated with the object from all disks.
partsMetadata, errs := readAllXLMetadata(xl.storageDisks, minioMetaMultipartBucket, uploadIDPath) partsMetadata, errs := readAllXLMetadata(xl.storageDisks, minioMetaMultipartBucket, uploadIDPath)
reducedErr := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, xl.writeQuorum)
// get Quorum for this object
_, writeQuorum, err := objectQuorumFromMeta(xl, partsMetadata, errs)
if err != nil {
return oi, toObjectErr(err, bucket, object)
}
reducedErr := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, writeQuorum)
if errors.Cause(reducedErr) == errXLWriteQuorum { if errors.Cause(reducedErr) == errXLWriteQuorum {
return oi, toObjectErr(reducedErr, bucket, object) return oi, toObjectErr(reducedErr, bucket, object)
} }
@ -992,12 +1015,12 @@ func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, upload
} }
// Write unique `xl.json` for each disk. // Write unique `xl.json` for each disk.
if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempUploadIDPath, partsMetadata, xl.writeQuorum); err != nil { if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempUploadIDPath, partsMetadata, writeQuorum); err != nil {
return oi, toObjectErr(err, minioMetaTmpBucket, tempUploadIDPath) return oi, toObjectErr(err, minioMetaTmpBucket, tempUploadIDPath)
} }
var rErr error var rErr error
onlineDisks, rErr = commitXLMetadata(onlineDisks, minioMetaTmpBucket, tempUploadIDPath, minioMetaMultipartBucket, uploadIDPath, xl.writeQuorum) onlineDisks, rErr = commitXLMetadata(onlineDisks, minioMetaTmpBucket, tempUploadIDPath, minioMetaMultipartBucket, uploadIDPath, writeQuorum)
if rErr != nil { if rErr != nil {
return oi, toObjectErr(rErr, minioMetaMultipartBucket, uploadIDPath) return oi, toObjectErr(rErr, minioMetaMultipartBucket, uploadIDPath)
} }
@ -1025,7 +1048,7 @@ func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, upload
// NOTE: Do not use online disks slice here. // NOTE: Do not use online disks slice here.
// The reason is that existing object should be purged // The reason is that existing object should be purged
// regardless of `xl.json` status and rolled back in case of errors. // regardless of `xl.json` status and rolled back in case of errors.
_, err = renameObject(xl.storageDisks, bucket, object, minioMetaTmpBucket, newUniqueID, xl.writeQuorum) _, err = renameObject(xl.storageDisks, bucket, object, minioMetaTmpBucket, newUniqueID, writeQuorum)
if err != nil { if err != nil {
return oi, toObjectErr(err, bucket, object) return oi, toObjectErr(err, bucket, object)
} }
@ -1045,7 +1068,7 @@ func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, upload
} }
// Rename the multipart object to final location. // Rename the multipart object to final location.
if _, err = renameObject(onlineDisks, minioMetaMultipartBucket, uploadIDPath, bucket, object, xl.writeQuorum); err != nil { if _, err = renameObject(onlineDisks, minioMetaMultipartBucket, uploadIDPath, bucket, object, writeQuorum); err != nil {
return oi, toObjectErr(err, bucket, object) return oi, toObjectErr(err, bucket, object)
} }
@ -1060,7 +1083,7 @@ func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, upload
defer objectMPartPathLock.Unlock() defer objectMPartPathLock.Unlock()
// remove entry from uploads.json with quorum // remove entry from uploads.json with quorum
if err = xl.removeUploadID(bucket, object, uploadID); err != nil { if err = xl.removeUploadID(bucket, object, uploadID, writeQuorum); err != nil {
return oi, toObjectErr(err, minioMetaMultipartBucket, path.Join(bucket, object)) return oi, toObjectErr(err, minioMetaMultipartBucket, path.Join(bucket, object))
} }
@ -1081,13 +1104,10 @@ func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, upload
} }
// Wrapper which removes all the uploaded parts. // Wrapper which removes all the uploaded parts.
func (xl xlObjects) cleanupUploadedParts(bucket, object, uploadID string) error { func (xl xlObjects) cleanupUploadedParts(uploadIDPath string, writeQuorum int) error {
var errs = make([]error, len(xl.storageDisks)) var errs = make([]error, len(xl.storageDisks))
var wg = &sync.WaitGroup{} var wg = &sync.WaitGroup{}
// Construct uploadIDPath.
uploadIDPath := path.Join(bucket, object, uploadID)
// Cleanup uploadID for all disks. // Cleanup uploadID for all disks.
for index, disk := range xl.storageDisks { for index, disk := range xl.storageDisks {
if disk == nil { if disk == nil {
@ -1108,7 +1128,7 @@ func (xl xlObjects) cleanupUploadedParts(bucket, object, uploadID string) error
// Wait for all the cleanups to finish. // Wait for all the cleanups to finish.
wg.Wait() wg.Wait()
return reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, xl.writeQuorum) return reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, writeQuorum)
} }
// abortMultipartUpload - wrapper for purging an ongoing multipart // abortMultipartUpload - wrapper for purging an ongoing multipart
@ -1116,8 +1136,20 @@ func (xl xlObjects) cleanupUploadedParts(bucket, object, uploadID string) error
// the directory at '.minio.sys/multipart/bucket/object/uploadID' holding // the directory at '.minio.sys/multipart/bucket/object/uploadID' holding
// all the upload parts. // all the upload parts.
func (xl xlObjects) abortMultipartUpload(bucket, object, uploadID string) (err error) { func (xl xlObjects) abortMultipartUpload(bucket, object, uploadID string) (err error) {
// Construct uploadIDPath.
uploadIDPath := path.Join(bucket, object, uploadID)
// Read metadata associated with the object from all disks.
partsMetadata, errs := readAllXLMetadata(xl.storageDisks, minioMetaMultipartBucket, uploadIDPath)
// get Quorum for this object
_, writeQuorum, err := objectQuorumFromMeta(xl, partsMetadata, errs)
if err != nil {
return toObjectErr(err, bucket, object)
}
// Cleanup all uploaded parts. // Cleanup all uploaded parts.
if err = xl.cleanupUploadedParts(bucket, object, uploadID); err != nil { if err = xl.cleanupUploadedParts(uploadIDPath, writeQuorum); err != nil {
return toObjectErr(err, bucket, object) return toObjectErr(err, bucket, object)
} }
@ -1131,7 +1163,7 @@ func (xl xlObjects) abortMultipartUpload(bucket, object, uploadID string) (err e
defer objectMPartPathLock.Unlock() defer objectMPartPathLock.Unlock()
// remove entry from uploads.json with quorum // remove entry from uploads.json with quorum
if err = xl.removeUploadID(bucket, object, uploadID); err != nil { if err = xl.removeUploadID(bucket, object, uploadID, writeQuorum); err != nil {
return toObjectErr(err, bucket, object) return toObjectErr(err, bucket, object)
} }

View File

@ -142,19 +142,20 @@ func TestUpdateUploadJSON(t *testing.T) {
} }
testCases := []struct { testCases := []struct {
uploadID string uploadID string
initiated time.Time initiated time.Time
isRemove bool writeQuorum int
errVal error isRemove bool
errVal error
}{ }{
{"111abc", UTCNow(), false, nil}, {"111abc", UTCNow(), 9, false, nil},
{"222abc", UTCNow(), false, nil}, {"222abc", UTCNow(), 10, false, nil},
{"111abc", time.Time{}, true, nil}, {"111abc", time.Time{}, 11, true, nil},
} }
xl := obj.(*xlObjects) xl := obj.(*xlObjects)
for i, test := range testCases { for i, test := range testCases {
testErrVal := xl.updateUploadJSON(bucket, object, test.uploadID, test.initiated, test.isRemove) testErrVal := xl.updateUploadJSON(bucket, object, test.uploadID, test.initiated, test.writeQuorum, test.isRemove)
if testErrVal != test.errVal { if testErrVal != test.errVal {
t.Errorf("Test %d: Expected error value %v, but got %v", t.Errorf("Test %d: Expected error value %v, but got %v",
i+1, test.errVal, testErrVal) i+1, test.errVal, testErrVal)
@ -166,7 +167,7 @@ func TestUpdateUploadJSON(t *testing.T) {
xl.storageDisks[i] = newNaughtyDisk(xl.storageDisks[i].(*retryStorage), nil, errFaultyDisk) xl.storageDisks[i] = newNaughtyDisk(xl.storageDisks[i].(*retryStorage), nil, errFaultyDisk)
} }
testErrVal := xl.updateUploadJSON(bucket, object, "222abc", UTCNow(), false) testErrVal := xl.updateUploadJSON(bucket, object, "222abc", UTCNow(), 10, false)
if testErrVal == nil || testErrVal.Error() != errXLWriteQuorum.Error() { if testErrVal == nil || testErrVal.Error() != errXLWriteQuorum.Error() {
t.Errorf("Expected write quorum error, but got: %v", testErrVal) t.Errorf("Expected write quorum error, but got: %v", testErrVal)
} }

View File

@ -34,7 +34,7 @@ import (
var objectOpIgnoredErrs = append(baseIgnoredErrs, errDiskAccessDenied) var objectOpIgnoredErrs = append(baseIgnoredErrs, errDiskAccessDenied)
// prepareFile hints the bottom layer to optimize the creation of a new object // prepareFile hints the bottom layer to optimize the creation of a new object
func (xl xlObjects) prepareFile(bucket, object string, size int64, onlineDisks []StorageAPI, blockSize int64, dataBlocks int) error { func (xl xlObjects) prepareFile(bucket, object string, size int64, onlineDisks []StorageAPI, blockSize int64, dataBlocks, writeQuorum int) error {
pErrs := make([]error, len(onlineDisks)) pErrs := make([]error, len(onlineDisks))
// Calculate the real size of the part in one disk. // Calculate the real size of the part in one disk.
actualSize := xl.sizeOnDisk(size, blockSize, dataBlocks) actualSize := xl.sizeOnDisk(size, blockSize, dataBlocks)
@ -49,7 +49,7 @@ func (xl xlObjects) prepareFile(bucket, object string, size int64, onlineDisks [
} }
} }
} }
return reduceWriteQuorumErrs(pErrs, objectOpIgnoredErrs, xl.writeQuorum) return reduceWriteQuorumErrs(pErrs, objectOpIgnoredErrs, writeQuorum)
} }
/// Object Operations /// Object Operations
@ -60,7 +60,14 @@ func (xl xlObjects) prepareFile(bucket, object string, size int64, onlineDisks [
func (xl xlObjects) CopyObject(srcBucket, srcObject, dstBucket, dstObject string, metadata map[string]string) (oi ObjectInfo, e error) { func (xl xlObjects) CopyObject(srcBucket, srcObject, dstBucket, dstObject string, metadata map[string]string) (oi ObjectInfo, e error) {
// Read metadata associated with the object from all disks. // Read metadata associated with the object from all disks.
metaArr, errs := readAllXLMetadata(xl.storageDisks, srcBucket, srcObject) metaArr, errs := readAllXLMetadata(xl.storageDisks, srcBucket, srcObject)
if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, xl.readQuorum); reducedErr != nil {
// get Quorum for this object
readQuorum, writeQuorum, err := objectQuorumFromMeta(xl, metaArr, errs)
if err != nil {
return oi, toObjectErr(err, srcBucket, srcObject)
}
if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, readQuorum); reducedErr != nil {
return oi, toObjectErr(reducedErr, srcBucket, srcObject) return oi, toObjectErr(reducedErr, srcBucket, srcObject)
} }
@ -92,11 +99,11 @@ func (xl xlObjects) CopyObject(srcBucket, srcObject, dstBucket, dstObject string
tempObj := mustGetUUID() tempObj := mustGetUUID()
// Write unique `xl.json` for each disk. // Write unique `xl.json` for each disk.
if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, xl.writeQuorum); err != nil { if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, writeQuorum); err != nil {
return oi, toObjectErr(err, srcBucket, srcObject) return oi, toObjectErr(err, srcBucket, srcObject)
} }
// Rename atomically `xl.json` from tmp location to destination for each disk. // Rename atomically `xl.json` from tmp location to destination for each disk.
if _, err = renameXLMetadata(onlineDisks, minioMetaTmpBucket, tempObj, srcBucket, srcObject, xl.writeQuorum); err != nil { if _, err = renameXLMetadata(onlineDisks, minioMetaTmpBucket, tempObj, srcBucket, srcObject, writeQuorum); err != nil {
return oi, toObjectErr(err, srcBucket, srcObject) return oi, toObjectErr(err, srcBucket, srcObject)
} }
return xlMeta.ToObjectInfo(srcBucket, srcObject), nil return xlMeta.ToObjectInfo(srcBucket, srcObject), nil
@ -154,7 +161,14 @@ func (xl xlObjects) GetObject(bucket, object string, startOffset int64, length i
// Read metadata associated with the object from all disks. // Read metadata associated with the object from all disks.
metaArr, errs := readAllXLMetadata(xl.storageDisks, bucket, object) metaArr, errs := readAllXLMetadata(xl.storageDisks, bucket, object)
if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, xl.readQuorum); reducedErr != nil {
// get Quorum for this object
readQuorum, _, err := objectQuorumFromMeta(xl, metaArr, errs)
if err != nil {
return toObjectErr(err, bucket, object)
}
if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, readQuorum); reducedErr != nil {
return toObjectErr(reducedErr, bucket, object) return toObjectErr(reducedErr, bucket, object)
} }
@ -368,7 +382,7 @@ func undoRename(disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry str
// rename - common function that renamePart and renameObject use to rename // rename - common function that renamePart and renameObject use to rename
// the respective underlying storage layer representations. // the respective underlying storage layer representations.
func rename(disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry string, isDir bool, quorum int) ([]StorageAPI, error) { func rename(disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry string, isDir bool, writeQuorum int) ([]StorageAPI, error) {
// Initialize sync waitgroup. // Initialize sync waitgroup.
var wg = &sync.WaitGroup{} var wg = &sync.WaitGroup{}
@ -399,9 +413,9 @@ func rename(disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry string,
// Wait for all renames to finish. // Wait for all renames to finish.
wg.Wait() wg.Wait()
// We can safely allow RenameFile errors up to len(xl.storageDisks) - xl.writeQuorum // We can safely allow RenameFile errors up to len(xl.storageDisks) - writeQuorum
// otherwise return failure. Cleanup successful renames. // otherwise return failure. Cleanup successful renames.
err := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, quorum) err := reduceWriteQuorumErrs(errs, objectOpIgnoredErrs, writeQuorum)
if errors.Cause(err) == errXLWriteQuorum { if errors.Cause(err) == errXLWriteQuorum {
// Undo all the partial rename operations. // Undo all the partial rename operations.
undoRename(disks, srcBucket, srcEntry, dstBucket, dstEntry, isDir, errs) undoRename(disks, srcBucket, srcEntry, dstBucket, dstEntry, isDir, errs)
@ -493,11 +507,17 @@ func (xl xlObjects) PutObject(bucket string, object string, data *hash.Reader, m
} }
} }
} }
// Get parity and data drive count based on storage class metadata
dataDrives, parityDrives := getDrivesCount(metadata[amzStorageClass], xl.storageDisks)
// we now know the number of blocks this object needs for data and parity.
// writeQuorum is dataBlocks + 1
writeQuorum := dataDrives + 1
// Initialize parts metadata // Initialize parts metadata
partsMetadata := make([]xlMetaV1, len(xl.storageDisks)) partsMetadata := make([]xlMetaV1, len(xl.storageDisks))
xlMeta := newXLMetaV1(object, xl.dataBlocks, xl.parityBlocks) xlMeta := newXLMetaV1(object, dataDrives, parityDrives)
// Initialize xl meta. // Initialize xl meta.
for index := range partsMetadata { for index := range partsMetadata {
@ -541,7 +561,7 @@ func (xl xlObjects) PutObject(bucket string, object string, data *hash.Reader, m
// This is only an optimization. // This is only an optimization.
var curPartReader io.Reader var curPartReader io.Reader
if curPartSize > 0 { if curPartSize > 0 {
pErr := xl.prepareFile(minioMetaTmpBucket, tempErasureObj, curPartSize, storage.disks, xlMeta.Erasure.BlockSize, xlMeta.Erasure.DataBlocks) pErr := xl.prepareFile(minioMetaTmpBucket, tempErasureObj, curPartSize, storage.disks, xlMeta.Erasure.BlockSize, xlMeta.Erasure.DataBlocks, writeQuorum)
if pErr != nil { if pErr != nil {
return ObjectInfo{}, toObjectErr(pErr, bucket, object) return ObjectInfo{}, toObjectErr(pErr, bucket, object)
} }
@ -554,7 +574,7 @@ func (xl xlObjects) PutObject(bucket string, object string, data *hash.Reader, m
} }
file, erasureErr := storage.CreateFile(curPartReader, minioMetaTmpBucket, file, erasureErr := storage.CreateFile(curPartReader, minioMetaTmpBucket,
tempErasureObj, buffer, DefaultBitrotAlgorithm, xl.writeQuorum) tempErasureObj, buffer, DefaultBitrotAlgorithm, writeQuorum)
if erasureErr != nil { if erasureErr != nil {
return ObjectInfo{}, toObjectErr(erasureErr, minioMetaTmpBucket, tempErasureObj) return ObjectInfo{}, toObjectErr(erasureErr, minioMetaTmpBucket, tempErasureObj)
} }
@ -602,7 +622,7 @@ func (xl xlObjects) PutObject(bucket string, object string, data *hash.Reader, m
// NOTE: Do not use online disks slice here. // NOTE: Do not use online disks slice here.
// The reason is that existing object should be purged // The reason is that existing object should be purged
// regardless of `xl.json` status and rolled back in case of errors. // regardless of `xl.json` status and rolled back in case of errors.
_, err = renameObject(xl.storageDisks, bucket, object, minioMetaTmpBucket, newUniqueID, xl.writeQuorum) _, err = renameObject(xl.storageDisks, bucket, object, minioMetaTmpBucket, newUniqueID, writeQuorum)
if err != nil { if err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object) return ObjectInfo{}, toObjectErr(err, bucket, object)
} }
@ -617,12 +637,12 @@ func (xl xlObjects) PutObject(bucket string, object string, data *hash.Reader, m
} }
// Write unique `xl.json` for each disk. // Write unique `xl.json` for each disk.
if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, xl.writeQuorum); err != nil { if onlineDisks, err = writeUniqueXLMetadata(onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, writeQuorum); err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object) return ObjectInfo{}, toObjectErr(err, bucket, object)
} }
// Rename the successfully written temporary object to final location. // Rename the successfully written temporary object to final location.
if _, err = renameObject(onlineDisks, minioMetaTmpBucket, tempObj, bucket, object, xl.writeQuorum); err != nil { if _, err = renameObject(onlineDisks, minioMetaTmpBucket, tempObj, bucket, object, writeQuorum); err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object) return ObjectInfo{}, toObjectErr(err, bucket, object)
} }
@ -659,6 +679,15 @@ func (xl xlObjects) deleteObject(bucket, object string) error {
// Initialize sync waitgroup. // Initialize sync waitgroup.
var wg = &sync.WaitGroup{} var wg = &sync.WaitGroup{}
// Read metadata associated with the object from all disks.
metaArr, errs := readAllXLMetadata(xl.storageDisks, bucket, object)
// get Quorum for this object
_, writeQuorum, err := objectQuorumFromMeta(xl, metaArr, errs)
if err != nil {
return err
}
// Initialize list of errors. // Initialize list of errors.
var dErrs = make([]error, len(xl.storageDisks)) var dErrs = make([]error, len(xl.storageDisks))
@ -680,7 +709,7 @@ func (xl xlObjects) deleteObject(bucket, object string) error {
// Wait for all routines to finish. // Wait for all routines to finish.
wg.Wait() wg.Wait()
return reduceWriteQuorumErrs(dErrs, objectOpIgnoredErrs, xl.writeQuorum) return reduceWriteQuorumErrs(dErrs, objectOpIgnoredErrs, writeQuorum)
} }
// DeleteObject - deletes an object, this call doesn't necessary reply // DeleteObject - deletes an object, this call doesn't necessary reply

View File

@ -154,8 +154,9 @@ func TestXLDeleteObjectDiskNotFound(t *testing.T) {
xl.storageDisks[8] = nil xl.storageDisks[8] = nil
err = obj.DeleteObject(bucket, object) err = obj.DeleteObject(bucket, object)
err = errors.Cause(err) err = errors.Cause(err)
if err != toObjectErr(errXLWriteQuorum, bucket, object) { // since majority of disks are not available, metaquorum is not achieved and hence errXLReadQuorum error
t.Errorf("Expected deleteObject to fail with %v, but failed with %v", toObjectErr(errXLWriteQuorum, bucket, object), err) if err != toObjectErr(errXLReadQuorum, bucket, object) {
t.Errorf("Expected deleteObject to fail with %v, but failed with %v", toObjectErr(errXLReadQuorum, bucket, object), err)
} }
// Cleanup backend directories // Cleanup backend directories
removeRoots(fsDirs) removeRoots(fsDirs)

View File

@ -50,10 +50,6 @@ const (
type xlObjects struct { type xlObjects struct {
mutex *sync.Mutex mutex *sync.Mutex
storageDisks []StorageAPI // Collection of initialized backend disks. storageDisks []StorageAPI // Collection of initialized backend disks.
dataBlocks int // dataBlocks count caculated for erasure.
parityBlocks int // parityBlocks count calculated for erasure.
readQuorum int // readQuorum minimum required disks to read data.
writeQuorum int // writeQuorum minimum required disks to write data.
// ListObjects pool management. // ListObjects pool management.
listPool *treeWalkPool listPool *treeWalkPool
@ -92,6 +88,7 @@ func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
return nil, errInvalidArgument return nil, errInvalidArgument
} }
// figure out readQuorum for erasure format.json
readQuorum := len(storageDisks) / 2 readQuorum := len(storageDisks) / 2
writeQuorum := len(storageDisks)/2 + 1 writeQuorum := len(storageDisks)/2 + 1
@ -101,9 +98,6 @@ func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
return nil, fmt.Errorf("Unable to recognize backend format, %s", err) return nil, fmt.Errorf("Unable to recognize backend format, %s", err)
} }
// Calculate data and parity blocks.
dataBlocks, parityBlocks := len(newStorageDisks)/2, len(newStorageDisks)/2
// Initialize list pool. // Initialize list pool.
listPool := newTreeWalkPool(globalLookupTimeout) listPool := newTreeWalkPool(globalLookupTimeout)
@ -111,8 +105,6 @@ func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
xl := &xlObjects{ xl := &xlObjects{
mutex: &sync.Mutex{}, mutex: &sync.Mutex{},
storageDisks: newStorageDisks, storageDisks: newStorageDisks,
dataBlocks: dataBlocks,
parityBlocks: parityBlocks,
listPool: listPool, listPool: listPool,
} }
@ -144,11 +136,6 @@ func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
return nil, fmt.Errorf("Unable to initialize '.minio.sys' meta volume, %s", err) return nil, fmt.Errorf("Unable to initialize '.minio.sys' meta volume, %s", err)
} }
// Figure out read and write quorum based on number of storage disks.
// READ and WRITE quorum is always set to (N/2) number of disks.
xl.readQuorum = readQuorum
xl.writeQuorum = writeQuorum
// If the number of offline servers is equal to the readQuorum // If the number of offline servers is equal to the readQuorum
// (i.e. the number of online servers also equals the // (i.e. the number of online servers also equals the
// readQuorum), we cannot perform quick-heal (no // readQuorum), we cannot perform quick-heal (no
@ -160,7 +147,7 @@ func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
} }
// Perform a quick heal on the buckets and bucket metadata for any discrepancies. // Perform a quick heal on the buckets and bucket metadata for any discrepancies.
if err = quickHeal(xl.storageDisks, xl.writeQuorum, xl.readQuorum); err != nil { if err = quickHeal(*xl, writeQuorum, readQuorum); err != nil {
return nil, err return nil, err
} }
@ -258,13 +245,18 @@ func getStorageInfo(disks []StorageAPI) StorageInfo {
storageInfo.Backend.Type = Erasure storageInfo.Backend.Type = Erasure
storageInfo.Backend.OnlineDisks = onlineDisks storageInfo.Backend.OnlineDisks = onlineDisks
storageInfo.Backend.OfflineDisks = offlineDisks storageInfo.Backend.OfflineDisks = offlineDisks
_, scParity := getDrivesCount(standardStorageClass, disks)
storageInfo.Backend.standardSCParity = scParity
_, rrSCparity := getDrivesCount(reducedRedundancyStorageClass, disks)
storageInfo.Backend.rrSCParity = rrSCparity
return storageInfo return storageInfo
} }
// StorageInfo - returns underlying storage statistics. // StorageInfo - returns underlying storage statistics.
func (xl xlObjects) StorageInfo() StorageInfo { func (xl xlObjects) StorageInfo() StorageInfo {
storageInfo := getStorageInfo(xl.storageDisks) storageInfo := getStorageInfo(xl.storageDisks)
storageInfo.Backend.ReadQuorum = xl.readQuorum
storageInfo.Backend.WriteQuorum = xl.writeQuorum
return storageInfo return storageInfo
} }

View File

@ -0,0 +1,78 @@
# Minio Storage Class Quickstart Guide [![Slack](https://slack.minio.io/slack?type=svg)](https://slack.minio.io)
Minio server supports storage class in erasure coding mode. This allows configurable data and parity disks per object.
## Overview
Minio supports two storage classes, Reduced Redundancy class and Standard class. These classes can be defined using environment variables
set before starting Minio server. After the data and parity disks for each storage class are defined using environment variables,
you can set the storage class of an object via request metadata field `x-amz-storage-class`. Minio server then honors the storage class by
saving the object in specific number of data and parity disks.
### Values for standard storage class (SS)
Standard storage class implies more parity than RRS class. So, SS parity disks should be
- Greater than or equal to 2, if RRS parity is not set.
- Greater than RRS parity, if it is set.
As parity blocks can not be higher than data blocks, Standard storage class can not be higher than N/2. (N being total number of disks)
Default value for standard storage class is `N/2` (N is the total number of drives).
### Reduced redundancy storage class (RRS)
Reduced redundancy implies lesser parity than SS class. So, RRS parity disks should be
- Less than N/2, if SS parity is not set.
- Less than SS Parity, if it is set.
As parity below 2 is not recommended, RR storage class is not supported for 4 disks erasure coding setup.
Default value for reduced redundancy storage class is `2`.
## Get started with Storage Class
### Set storage class
The format to set storage class environment variables is as follows
`MINIO_STORAGE_CLASS_RRS=EC:parity`
`MINIO_STORAGE_CLASS_STANDARD=EC:parity`
For example, set RRS parity 2 and SS parity 3, in 8 disk erasure code setup.
```sh
export MINIO_STORAGE_CLASS_RRS=EC:2
export MINIO_STORAGE_CLASS_STANDARD=EC:3
```
If storage class is not defined before starting Minio server, and subsequent PutObject metadata field has `x-amz-storage-class` present
with values `MINIO_STORAGE_CLASS_RRS` or `MINIO_STORAGE_CLASS_STANDARD`, Minio server uses default parity values.
### Set metadata
In below example `minio-go` is used to set the storage class to `MINIO_STORAGE_CLASS_RRS`. This means this object will be split across 6 data disks and 2 parity disks (as per the storage class set in previous step).
```go
s3Client, err := minio.New("s3.amazonaws.com", "YOUR-ACCESSKEYID", "YOUR-SECRETACCESSKEY", true)
if err != nil {
log.Fatalln(err)
}
object, err := os.Open("my-testfile")
if err != nil {
log.Fatalln(err)
}
defer object.Close()
objectStat, err := object.Stat()
if err != nil {
log.Fatalln(err)
}
n, err := s3Client.PutObject("my-bucketname", "my-objectname", object, objectStat.Size(), minio.PutObjectOptions{ContentType: "application/octet-stream", StorageClass: "MINIO_STORAGE_CLASS_RRS"})
if err != nil {
log.Fatalln(err)
}
log.Println("Uploaded", "my-objectname", " of size: ", n, "Successfully.")
```