mirror of
https://github.com/minio/minio.git
synced 2025-11-13 07:11:44 -05:00
XL: Relax write quorum further to N/2 + 1. (#2018)
This changes behavior in some parts of the code as well address it. Fixes #2016
This commit is contained in:
committed by
Anand Babu (AB) Periasamy
parent
d484157d67
commit
ae936a0147
@@ -19,82 +19,21 @@ package main
|
||||
import (
|
||||
"encoding/json"
|
||||
"path"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// A uploadInfo represents the s3 compatible spec.
|
||||
type uploadInfo struct {
|
||||
UploadID string `json:"uploadId"` // UploadID for the active multipart upload.
|
||||
Deleted bool `json:"deleted"` // Currently unused, for future use.
|
||||
Initiated time.Time `json:"initiated"` // Indicates when the uploadID was initiated.
|
||||
}
|
||||
|
||||
// A uploadsV1 represents `uploads.json` metadata header.
|
||||
type uploadsV1 struct {
|
||||
Version string `json:"version"` // Version of the current `uploads.json`
|
||||
Format string `json:"format"` // Format of the current `uploads.json`
|
||||
Uploads []uploadInfo `json:"uploadIds"` // Captures all the upload ids for a given object.
|
||||
}
|
||||
|
||||
// byInitiatedTime is a collection satisfying sort.Interface.
|
||||
type byInitiatedTime []uploadInfo
|
||||
|
||||
func (t byInitiatedTime) Len() int { return len(t) }
|
||||
func (t byInitiatedTime) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
|
||||
func (t byInitiatedTime) Less(i, j int) bool {
|
||||
return t[i].Initiated.Before(t[j].Initiated)
|
||||
}
|
||||
|
||||
// AddUploadID - adds a new upload id in order of its initiated time.
|
||||
func (u *uploadsV1) AddUploadID(uploadID string, initiated time.Time) {
|
||||
u.Uploads = append(u.Uploads, uploadInfo{
|
||||
UploadID: uploadID,
|
||||
Initiated: initiated,
|
||||
})
|
||||
sort.Sort(byInitiatedTime(u.Uploads))
|
||||
}
|
||||
|
||||
// Index - returns the index of matching the upload id.
|
||||
func (u uploadsV1) Index(uploadID string) int {
|
||||
for i, u := range u.Uploads {
|
||||
if u.UploadID == uploadID {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// readUploadsJSON - get all the saved uploads JSON.
|
||||
func readUploadsJSON(bucket, object string, disk StorageAPI) (uploadIDs uploadsV1, err error) {
|
||||
uploadJSONPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
|
||||
// Reads entire `uploads.json`.
|
||||
buf, err := disk.ReadAll(minioMetaBucket, uploadJSONPath)
|
||||
if err != nil {
|
||||
return uploadsV1{}, err
|
||||
}
|
||||
|
||||
// Decode `uploads.json`.
|
||||
if err = json.Unmarshal(buf, &uploadIDs); err != nil {
|
||||
return uploadsV1{}, err
|
||||
}
|
||||
|
||||
// Success.
|
||||
return uploadIDs, nil
|
||||
}
|
||||
|
||||
// updateUploadsJSON - update `uploads.json` with new uploadsJSON for all disks.
|
||||
func updateUploadsJSON(bucket, object string, uploadsJSON uploadsV1, storageDisks ...StorageAPI) error {
|
||||
func (xl xlObjects) updateUploadsJSON(bucket, object string, uploadsJSON uploadsV1) (err error) {
|
||||
uploadsPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
|
||||
uniqueID := getUUID()
|
||||
tmpUploadsPath := path.Join(tmpMetaPrefix, uniqueID)
|
||||
var errs = make([]error, len(storageDisks))
|
||||
var errs = make([]error, len(xl.storageDisks))
|
||||
var wg = &sync.WaitGroup{}
|
||||
|
||||
// Update `uploads.json` for all the disks.
|
||||
for index, disk := range storageDisks {
|
||||
for index, disk := range xl.storageDisks {
|
||||
if disk == nil {
|
||||
errs[index] = errDiskNotFound
|
||||
continue
|
||||
@@ -122,25 +61,10 @@ func updateUploadsJSON(bucket, object string, uploadsJSON uploadsV1, storageDisk
|
||||
// Wait for all the routines to finish updating `uploads.json`
|
||||
wg.Wait()
|
||||
|
||||
// For only single disk return first error.
|
||||
if len(storageDisks) == 1 {
|
||||
return errs[0]
|
||||
} // else count all the errors for quorum validation.
|
||||
var errCount = 0
|
||||
// Return for first error.
|
||||
for _, err := range errs {
|
||||
if err != nil {
|
||||
errCount++
|
||||
}
|
||||
}
|
||||
// Count all the errors and validate if we have write quorum.
|
||||
if errCount > len(storageDisks)-len(storageDisks)/2+3 {
|
||||
// Validate if we have read quorum return success.
|
||||
if errCount > len(storageDisks)-len(storageDisks)/2+1 {
|
||||
return nil
|
||||
}
|
||||
if !isQuorum(errs, xl.writeQuorum) {
|
||||
// Rename `uploads.json` left over back to tmp location.
|
||||
for index, disk := range storageDisks {
|
||||
for index, disk := range xl.storageDisks {
|
||||
if disk == nil {
|
||||
continue
|
||||
}
|
||||
@@ -160,25 +84,17 @@ func updateUploadsJSON(bucket, object string, uploadsJSON uploadsV1, storageDisk
|
||||
return nil
|
||||
}
|
||||
|
||||
// newUploadsV1 - initialize new uploads v1.
|
||||
func newUploadsV1(format string) uploadsV1 {
|
||||
uploadIDs := uploadsV1{}
|
||||
uploadIDs.Version = "1"
|
||||
uploadIDs.Format = format
|
||||
return uploadIDs
|
||||
}
|
||||
|
||||
// writeUploadJSON - create `uploads.json` or update it with new uploadID.
|
||||
func writeUploadJSON(bucket, object, uploadID string, initiated time.Time, storageDisks ...StorageAPI) (err error) {
|
||||
func (xl xlObjects) writeUploadJSON(bucket, object, uploadID string, initiated time.Time) (err error) {
|
||||
uploadsPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
|
||||
uniqueID := getUUID()
|
||||
tmpUploadsPath := path.Join(tmpMetaPrefix, uniqueID)
|
||||
|
||||
var errs = make([]error, len(storageDisks))
|
||||
var errs = make([]error, len(xl.storageDisks))
|
||||
var wg = &sync.WaitGroup{}
|
||||
|
||||
var uploadsJSON uploadsV1
|
||||
for _, disk := range storageDisks {
|
||||
for _, disk := range xl.storageDisks {
|
||||
if disk == nil {
|
||||
continue
|
||||
}
|
||||
@@ -190,19 +106,14 @@ func writeUploadJSON(bucket, object, uploadID string, initiated time.Time, stora
|
||||
if err != errFileNotFound {
|
||||
return err
|
||||
}
|
||||
if len(storageDisks) == 1 {
|
||||
// Set uploads format to `fs` for single disk.
|
||||
uploadsJSON = newUploadsV1("fs")
|
||||
} else {
|
||||
// Set uploads format to `xl` otherwise.
|
||||
uploadsJSON = newUploadsV1("xl")
|
||||
}
|
||||
// Set uploads format to `xl` otherwise.
|
||||
uploadsJSON = newUploadsV1("xl")
|
||||
}
|
||||
// Add a new upload id.
|
||||
uploadsJSON.AddUploadID(uploadID, initiated)
|
||||
|
||||
// Update `uploads.json` on all disks.
|
||||
for index, disk := range storageDisks {
|
||||
for index, disk := range xl.storageDisks {
|
||||
if disk == nil {
|
||||
errs[index] = errDiskNotFound
|
||||
continue
|
||||
@@ -234,28 +145,13 @@ func writeUploadJSON(bucket, object, uploadID string, initiated time.Time, stora
|
||||
}(index, disk)
|
||||
}
|
||||
|
||||
// Wait for all the writes to finish.
|
||||
// Wait here for all the writes to finish.
|
||||
wg.Wait()
|
||||
|
||||
// For only single disk return first error.
|
||||
if len(storageDisks) == 1 {
|
||||
return errs[0]
|
||||
} // else count all the errors for quorum validation.
|
||||
var errCount = 0
|
||||
// Return for first error.
|
||||
for _, err := range errs {
|
||||
if err != nil {
|
||||
errCount++
|
||||
}
|
||||
}
|
||||
// Count all the errors and validate if we have write quorum.
|
||||
if errCount > len(storageDisks)-len(storageDisks)/2+3 {
|
||||
// Validate if we have read quorum return success.
|
||||
if errCount > len(storageDisks)-len(storageDisks)/2+1 {
|
||||
return nil
|
||||
}
|
||||
if !isQuorum(errs, xl.writeQuorum) {
|
||||
// Rename `uploads.json` left over back to tmp location.
|
||||
for index, disk := range storageDisks {
|
||||
for index, disk := range xl.storageDisks {
|
||||
if disk == nil {
|
||||
continue
|
||||
}
|
||||
@@ -275,79 +171,6 @@ func writeUploadJSON(bucket, object, uploadID string, initiated time.Time, stora
|
||||
return nil
|
||||
}
|
||||
|
||||
// Wrapper which removes all the uploaded parts.
|
||||
func cleanupUploadedParts(bucket, object, uploadID string, storageDisks ...StorageAPI) error {
|
||||
var errs = make([]error, len(storageDisks))
|
||||
var wg = &sync.WaitGroup{}
|
||||
|
||||
// Construct uploadIDPath.
|
||||
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
|
||||
|
||||
// Cleanup uploadID for all disks.
|
||||
for index, disk := range storageDisks {
|
||||
if disk == nil {
|
||||
errs[index] = errDiskNotFound
|
||||
continue
|
||||
}
|
||||
wg.Add(1)
|
||||
// Cleanup each uploadID in a routine.
|
||||
go func(index int, disk StorageAPI) {
|
||||
defer wg.Done()
|
||||
err := cleanupDir(disk, minioMetaBucket, uploadIDPath)
|
||||
if err != nil {
|
||||
errs[index] = err
|
||||
return
|
||||
}
|
||||
errs[index] = nil
|
||||
}(index, disk)
|
||||
}
|
||||
|
||||
// Wait for all the cleanups to finish.
|
||||
wg.Wait()
|
||||
|
||||
// Return first error.
|
||||
for _, err := range errs {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// listMultipartUploadIDs - list all the upload ids from a marker up to 'count'.
|
||||
func listMultipartUploadIDs(bucketName, objectName, uploadIDMarker string, count int, disk StorageAPI) ([]uploadMetadata, bool, error) {
|
||||
var uploads []uploadMetadata
|
||||
// Read `uploads.json`.
|
||||
uploadsJSON, err := readUploadsJSON(bucketName, objectName, disk)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
index := 0
|
||||
if uploadIDMarker != "" {
|
||||
for ; index < len(uploadsJSON.Uploads); index++ {
|
||||
if uploadsJSON.Uploads[index].UploadID == uploadIDMarker {
|
||||
// Skip the uploadID as it would already be listed in previous listing.
|
||||
index++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
for index < len(uploadsJSON.Uploads) {
|
||||
uploads = append(uploads, uploadMetadata{
|
||||
Object: objectName,
|
||||
UploadID: uploadsJSON.Uploads[index].UploadID,
|
||||
Initiated: uploadsJSON.Uploads[index].Initiated,
|
||||
})
|
||||
count--
|
||||
index++
|
||||
if count == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
end := (index == len(uploadsJSON.Uploads))
|
||||
return uploads, end, nil
|
||||
}
|
||||
|
||||
// Returns if the prefix is a multipart upload.
|
||||
func (xl xlObjects) isMultipartUpload(bucket, prefix string) bool {
|
||||
for _, disk := range xl.getLoadBalancedQuorumDisks() {
|
||||
@@ -476,11 +299,6 @@ func (xl xlObjects) commitXLMetadata(srcPrefix, dstPrefix string) error {
|
||||
|
||||
// Do we have write quorum?.
|
||||
if !isQuorum(mErrs, xl.writeQuorum) {
|
||||
// Do we have read quorum?.
|
||||
if isQuorum(mErrs, xl.readQuorum) {
|
||||
// Return success on read quorum.
|
||||
return nil
|
||||
}
|
||||
return errXLWriteQuorum
|
||||
}
|
||||
// For all other errors return.
|
||||
|
||||
Reference in New Issue
Block a user