mirror of
https://github.com/minio/minio.git
synced 2025-04-06 04:40:38 -04:00
xl: add quorum support for read file and name space locking. (#1333)
This commit is contained in:
parent
a98a7fb1ad
commit
ada0f82b9a
61
xl-v1-namespace.go
Normal file
61
xl-v1-namespace.go
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
* Minio Cloud Storage, (C) 2016 Minio, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "sync"
|
||||||
|
|
||||||
|
type nameSpaceParam struct {
|
||||||
|
volume string
|
||||||
|
path string
|
||||||
|
}
|
||||||
|
|
||||||
|
type nameSpaceLock struct {
|
||||||
|
rwMutex *sync.RWMutex
|
||||||
|
count uint
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nsLock nameSpaceLock) InUse() bool {
|
||||||
|
return nsLock.count != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nsLock nameSpaceLock) Lock() {
|
||||||
|
nsLock.Lock()
|
||||||
|
nsLock.count++
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nsLock nameSpaceLock) Unlock() {
|
||||||
|
nsLock.Unlock()
|
||||||
|
if nsLock.count != 0 {
|
||||||
|
nsLock.count--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nsLock nameSpaceLock) RLock() {
|
||||||
|
nsLock.RLock()
|
||||||
|
nsLock.count++
|
||||||
|
}
|
||||||
|
|
||||||
|
func (nsLock nameSpaceLock) RUnlock() {
|
||||||
|
nsLock.RUnlock()
|
||||||
|
if nsLock.count != 0 {
|
||||||
|
nsLock.count--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newNameSpaceLock() nameSpaceLock {
|
||||||
|
return nameSpaceLock{rwMutex: &sync.RWMutex{}, count: 0}
|
||||||
|
}
|
@ -17,10 +17,12 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
slashpath "path"
|
slashpath "path"
|
||||||
|
"strconv"
|
||||||
)
|
)
|
||||||
|
|
||||||
// checkBlockSize return the size of a single block.
|
// checkBlockSize return the size of a single block.
|
||||||
@ -42,6 +44,85 @@ func getEncodedBlockLen(inputLen, dataBlocks int) (curBlockSize int) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (xl XL) getMetaDataFileVersions(volume, path string) (diskVersionMap map[StorageAPI]int64) {
|
||||||
|
metadataFilePath := slashpath.Join(path, metadataFile)
|
||||||
|
// set offset to 0 to read entire file
|
||||||
|
offset := int64(0)
|
||||||
|
metadata := make(map[string]string)
|
||||||
|
|
||||||
|
// read meta data from all disks
|
||||||
|
for _, disk := range xl.storageDisks {
|
||||||
|
diskVersionMap[disk] = -1
|
||||||
|
|
||||||
|
if metadataReader, err := disk.ReadFile(volume, metadataFilePath, offset); err != nil {
|
||||||
|
// error reading meta data file
|
||||||
|
// TODO: log it
|
||||||
|
continue
|
||||||
|
} else if err := json.NewDecoder(metadataReader).Decode(&metadata); err != nil {
|
||||||
|
// error in parsing json
|
||||||
|
// TODO: log it
|
||||||
|
continue
|
||||||
|
} else if _, ok := metadata["file.version"]; !ok {
|
||||||
|
// missing "file.version" is completely valid
|
||||||
|
diskVersionMap[disk] = 0
|
||||||
|
continue
|
||||||
|
} else if fileVersion, err := strconv.ParseInt(metadata["file.version"], 10, 64); err != nil {
|
||||||
|
// version is not a number
|
||||||
|
// TODO: log it
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
diskVersionMap[disk] = fileVersion
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
type quorumDisk struct {
|
||||||
|
disk StorageAPI
|
||||||
|
index int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (xl XL) getReadFileQuorumDisks(volume, path string) (quorumDisks []quorumDisk) {
|
||||||
|
diskVersionMap := xl.getMetaDataFileVersions(volume, path)
|
||||||
|
higherVersion := int64(0)
|
||||||
|
i := 0
|
||||||
|
for disk, version := range diskVersionMap {
|
||||||
|
if version > higherVersion {
|
||||||
|
higherVersion = version
|
||||||
|
quorumDisks = []quorumDisk{quorumDisk{disk, i}}
|
||||||
|
} else if version == higherVersion {
|
||||||
|
quorumDisks = append(quorumDisks, quorumDisk{disk, i})
|
||||||
|
}
|
||||||
|
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (xl XL) getFileSize(volume, path string, disk StorageAPI) (size int64, err error) {
|
||||||
|
metadataFilePath := slashpath.Join(path, metadataFile)
|
||||||
|
// set offset to 0 to read entire file
|
||||||
|
offset := int64(0)
|
||||||
|
metadata := make(map[string]string)
|
||||||
|
|
||||||
|
metadataReader, err := disk.ReadFile(volume, metadataFilePath, offset)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = json.NewDecoder(metadataReader).Decode(&metadata); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, ok := metadata["file.size"]; !ok {
|
||||||
|
return 0, errors.New("missing 'file.size' in meta data")
|
||||||
|
}
|
||||||
|
|
||||||
|
return strconv.ParseInt(metadata["file.size"], 10, 64)
|
||||||
|
}
|
||||||
|
|
||||||
// ReadFile - read file
|
// ReadFile - read file
|
||||||
func (xl XL) ReadFile(volume, path string, offset int64) (io.ReadCloser, error) {
|
func (xl XL) ReadFile(volume, path string, offset int64) (io.ReadCloser, error) {
|
||||||
// Input validation.
|
// Input validation.
|
||||||
@ -52,38 +133,48 @@ func (xl XL) ReadFile(volume, path string, offset int64) (io.ReadCloser, error)
|
|||||||
return nil, errInvalidArgument
|
return nil, errInvalidArgument
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize all readers.
|
xl.lockNameSpace(volume, path, true)
|
||||||
var readers = make([]io.ReadCloser, len(xl.storageDisks))
|
defer xl.unlockNameSpace(volume, path, true)
|
||||||
|
|
||||||
// Extract metadata.
|
// check read quorum
|
||||||
metadata, err := xl.extractMetadata(volume, path)
|
quorumDisks := xl.getReadFileQuorumDisks(volume, path)
|
||||||
|
if len(quorumDisks) < xl.readQuorum {
|
||||||
|
return nil, errors.New("I/O error. do not meet read quorum")
|
||||||
|
}
|
||||||
|
|
||||||
|
// get file size
|
||||||
|
size, err := xl.getFileSize(volume, path, quorumDisks[0].disk)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
totalBlocks := xl.DataBlocks + xl.ParityBlocks // Total blocks.
|
||||||
|
|
||||||
// Loop through and verify if all metadata files are in-tact.
|
readers := []io.ReadCloser{}
|
||||||
for index, disk := range xl.storageDisks {
|
readFileError := 0
|
||||||
offset := int64(0)
|
i := 0
|
||||||
erasurePart := slashpath.Join(path, fmt.Sprintf("part.%d", index))
|
for _, quorumDisk := range quorumDisks {
|
||||||
|
erasurePart := slashpath.Join(path, fmt.Sprintf("part.%d", quorumDisk.index))
|
||||||
var erasuredPartReader io.ReadCloser
|
var erasuredPartReader io.ReadCloser
|
||||||
erasuredPartReader, err = disk.ReadFile(volume, erasurePart, offset)
|
if erasuredPartReader, err = quorumDisk.disk.ReadFile(volume, erasurePart, offset); err != nil {
|
||||||
if err != nil {
|
// we can safely allow ReadFile errors up to len(quorumDisks) - xl.readQuorum
|
||||||
// One of parts not found, we need to re-construct.
|
// otherwise return failure
|
||||||
if err == errFileNotFound {
|
if readFileError < len(quorumDisks)-xl.readQuorum {
|
||||||
readers[index] = nil
|
readFileError++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// For all other errors return to the caller.
|
|
||||||
|
// TODO: handle currently available io.Reader in readers variable
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
readers[index] = erasuredPartReader
|
|
||||||
|
readers[i] = erasuredPartReader
|
||||||
|
i++
|
||||||
}
|
}
|
||||||
totalBlocks := xl.DataBlocks + xl.ParityBlocks // Total blocks.
|
|
||||||
|
|
||||||
// Initialize pipe.
|
// Initialize pipe.
|
||||||
pipeReader, pipeWriter := io.Pipe()
|
pipeReader, pipeWriter := io.Pipe()
|
||||||
go func() {
|
go func() {
|
||||||
var totalLeft = metadata.Size
|
var totalLeft = size
|
||||||
// Read until the totalLeft.
|
// Read until the totalLeft.
|
||||||
for totalLeft > 0 {
|
for totalLeft > 0 {
|
||||||
// Figure out the right blockSize as it was encoded before.
|
// Figure out the right blockSize as it was encoded before.
|
||||||
|
78
xl-v1.go
78
xl-v1.go
@ -26,19 +26,12 @@ import (
|
|||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/klauspost/reedsolomon"
|
"github.com/klauspost/reedsolomon"
|
||||||
)
|
)
|
||||||
|
|
||||||
// XL layer structure.
|
|
||||||
type XL struct {
|
|
||||||
ReedSolomon reedsolomon.Encoder // Erasure encoder/decoder.
|
|
||||||
DataBlocks int
|
|
||||||
ParityBlocks int
|
|
||||||
storageDisks []StorageAPI
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// Part metadata file.
|
// Part metadata file.
|
||||||
metadataFile = "part.json"
|
metadataFile = "part.json"
|
||||||
@ -46,6 +39,55 @@ const (
|
|||||||
maxErasureBlocks = 16
|
maxErasureBlocks = 16
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// XL layer structure.
|
||||||
|
type XL struct {
|
||||||
|
ReedSolomon reedsolomon.Encoder // Erasure encoder/decoder.
|
||||||
|
DataBlocks int
|
||||||
|
ParityBlocks int
|
||||||
|
storageDisks []StorageAPI
|
||||||
|
nameSpaceLockMap map[nameSpaceParam]nameSpaceLock
|
||||||
|
nameSpaceLockMapMutex *sync.Mutex
|
||||||
|
readQuorum int
|
||||||
|
writeQuorum int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (xl XL) lockNameSpace(volume, path string, readOnly bool) {
|
||||||
|
xl.nameSpaceLockMapMutex.Lock()
|
||||||
|
defer xl.nameSpaceLockMapMutex.Unlock()
|
||||||
|
|
||||||
|
param := nameSpaceParam{volume, path}
|
||||||
|
nsLock, found := xl.nameSpaceLockMap[param]
|
||||||
|
if !found {
|
||||||
|
nsLock = newNameSpaceLock()
|
||||||
|
}
|
||||||
|
|
||||||
|
if readOnly {
|
||||||
|
nsLock.RLock()
|
||||||
|
} else {
|
||||||
|
nsLock.Lock()
|
||||||
|
}
|
||||||
|
|
||||||
|
xl.nameSpaceLockMap[param] = nsLock
|
||||||
|
}
|
||||||
|
|
||||||
|
func (xl XL) unlockNameSpace(volume, path string, readOnly bool) {
|
||||||
|
xl.nameSpaceLockMapMutex.Lock()
|
||||||
|
defer xl.nameSpaceLockMapMutex.Unlock()
|
||||||
|
|
||||||
|
param := nameSpaceParam{volume, path}
|
||||||
|
if nsLock, found := xl.nameSpaceLockMap[param]; found {
|
||||||
|
if readOnly {
|
||||||
|
nsLock.RUnlock()
|
||||||
|
} else {
|
||||||
|
nsLock.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
if nsLock.InUse() {
|
||||||
|
xl.nameSpaceLockMap[param] = nsLock
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// newXL instantiate a new XL.
|
// newXL instantiate a new XL.
|
||||||
func newXL(disks ...string) (StorageAPI, error) {
|
func newXL(disks ...string) (StorageAPI, error) {
|
||||||
// Initialize XL.
|
// Initialize XL.
|
||||||
@ -94,6 +136,14 @@ func newXL(disks ...string) (StorageAPI, error) {
|
|||||||
// Save all the initialized storage disks.
|
// Save all the initialized storage disks.
|
||||||
xl.storageDisks = storageDisks
|
xl.storageDisks = storageDisks
|
||||||
|
|
||||||
|
xl.nameSpaceLockMap = make(map[nameSpaceParam]nameSpaceLock)
|
||||||
|
xl.nameSpaceLockMapMutex = &sync.Mutex{}
|
||||||
|
xl.readQuorum = len(xl.storageDisks) / 2
|
||||||
|
xl.writeQuorum = xl.readQuorum + 3
|
||||||
|
if xl.writeQuorum > len(xl.storageDisks) {
|
||||||
|
xl.writeQuorum = len(xl.storageDisks)
|
||||||
|
}
|
||||||
|
|
||||||
// Return successfully initialized.
|
// Return successfully initialized.
|
||||||
return xl, nil
|
return xl, nil
|
||||||
}
|
}
|
||||||
@ -187,6 +237,7 @@ type fileMetadata struct {
|
|||||||
Block512Sum string
|
Block512Sum string
|
||||||
DataBlocks int
|
DataBlocks int
|
||||||
ParityBlocks int
|
ParityBlocks int
|
||||||
|
fileVersion int64
|
||||||
}
|
}
|
||||||
|
|
||||||
// extractMetadata - extract file metadata.
|
// extractMetadata - extract file metadata.
|
||||||
@ -221,6 +272,16 @@ func (xl XL) extractMetadata(volume, path string) (fileMetadata, error) {
|
|||||||
return fileMetadata{}, err
|
return fileMetadata{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify if file.version is parsable.
|
||||||
|
var fileVersion int64
|
||||||
|
// missing file.version is valid
|
||||||
|
if _, ok := metadata["file.version"]; ok {
|
||||||
|
fileVersion, err = strconv.ParseInt(metadata["file.version"], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return fileMetadata{}, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Verify if block size is parsable.
|
// Verify if block size is parsable.
|
||||||
var blockSize int64
|
var blockSize int64
|
||||||
blockSize, err = strconv.ParseInt(metadata["file.xl.blockSize"], 10, 64)
|
blockSize, err = strconv.ParseInt(metadata["file.xl.blockSize"], 10, 64)
|
||||||
@ -254,6 +315,7 @@ func (xl XL) extractMetadata(volume, path string) (fileMetadata, error) {
|
|||||||
Block512Sum: sha512Sum,
|
Block512Sum: sha512Sum,
|
||||||
DataBlocks: dataBlocks,
|
DataBlocks: dataBlocks,
|
||||||
ParityBlocks: parityBlocks,
|
ParityBlocks: parityBlocks,
|
||||||
|
fileVersion: fileVersion,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user