From ada0f82b9a60defbacd781f72c0c8b6b2630733d Mon Sep 17 00:00:00 2001 From: Bala FA Date: Tue, 19 Apr 2016 22:53:20 +0530 Subject: [PATCH] xl: add quorum support for read file and name space locking. (#1333) --- xl-v1-namespace.go | 61 ++++++++++++++++++++++ xl-v1-readfile.go | 125 +++++++++++++++++++++++++++++++++++++++------ xl-v1.go | 78 +++++++++++++++++++++++++--- 3 files changed, 239 insertions(+), 25 deletions(-) create mode 100644 xl-v1-namespace.go diff --git a/xl-v1-namespace.go b/xl-v1-namespace.go new file mode 100644 index 000000000..f1adfe3a9 --- /dev/null +++ b/xl-v1-namespace.go @@ -0,0 +1,61 @@ +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import "sync" + +type nameSpaceParam struct { + volume string + path string +} + +type nameSpaceLock struct { + rwMutex *sync.RWMutex + count uint +} + +func (nsLock nameSpaceLock) InUse() bool { + return nsLock.count != 0 +} + +func (nsLock nameSpaceLock) Lock() { + nsLock.Lock() + nsLock.count++ +} + +func (nsLock nameSpaceLock) Unlock() { + nsLock.Unlock() + if nsLock.count != 0 { + nsLock.count-- + } +} + +func (nsLock nameSpaceLock) RLock() { + nsLock.RLock() + nsLock.count++ +} + +func (nsLock nameSpaceLock) RUnlock() { + nsLock.RUnlock() + if nsLock.count != 0 { + nsLock.count-- + } +} + +func newNameSpaceLock() nameSpaceLock { + return nameSpaceLock{rwMutex: &sync.RWMutex{}, count: 0} +} diff --git a/xl-v1-readfile.go b/xl-v1-readfile.go index ca8e169f5..54a611db1 100644 --- a/xl-v1-readfile.go +++ b/xl-v1-readfile.go @@ -17,10 +17,12 @@ package main import ( + "encoding/json" "errors" "fmt" "io" slashpath "path" + "strconv" ) // checkBlockSize return the size of a single block. @@ -42,6 +44,85 @@ func getEncodedBlockLen(inputLen, dataBlocks int) (curBlockSize int) { return } +func (xl XL) getMetaDataFileVersions(volume, path string) (diskVersionMap map[StorageAPI]int64) { + metadataFilePath := slashpath.Join(path, metadataFile) + // set offset to 0 to read entire file + offset := int64(0) + metadata := make(map[string]string) + + // read meta data from all disks + for _, disk := range xl.storageDisks { + diskVersionMap[disk] = -1 + + if metadataReader, err := disk.ReadFile(volume, metadataFilePath, offset); err != nil { + // error reading meta data file + // TODO: log it + continue + } else if err := json.NewDecoder(metadataReader).Decode(&metadata); err != nil { + // error in parsing json + // TODO: log it + continue + } else if _, ok := metadata["file.version"]; !ok { + // missing "file.version" is completely valid + diskVersionMap[disk] = 0 + continue + } else if fileVersion, err := strconv.ParseInt(metadata["file.version"], 10, 64); err != nil { + // version is not a number + // TODO: log it + continue + } else { + diskVersionMap[disk] = fileVersion + } + } + + return +} + +type quorumDisk struct { + disk StorageAPI + index int +} + +func (xl XL) getReadFileQuorumDisks(volume, path string) (quorumDisks []quorumDisk) { + diskVersionMap := xl.getMetaDataFileVersions(volume, path) + higherVersion := int64(0) + i := 0 + for disk, version := range diskVersionMap { + if version > higherVersion { + higherVersion = version + quorumDisks = []quorumDisk{quorumDisk{disk, i}} + } else if version == higherVersion { + quorumDisks = append(quorumDisks, quorumDisk{disk, i}) + } + + i++ + } + + return +} + +func (xl XL) getFileSize(volume, path string, disk StorageAPI) (size int64, err error) { + metadataFilePath := slashpath.Join(path, metadataFile) + // set offset to 0 to read entire file + offset := int64(0) + metadata := make(map[string]string) + + metadataReader, err := disk.ReadFile(volume, metadataFilePath, offset) + if err != nil { + return 0, err + } + + if err = json.NewDecoder(metadataReader).Decode(&metadata); err != nil { + return 0, err + } + + if _, ok := metadata["file.size"]; !ok { + return 0, errors.New("missing 'file.size' in meta data") + } + + return strconv.ParseInt(metadata["file.size"], 10, 64) +} + // ReadFile - read file func (xl XL) ReadFile(volume, path string, offset int64) (io.ReadCloser, error) { // Input validation. @@ -52,38 +133,48 @@ func (xl XL) ReadFile(volume, path string, offset int64) (io.ReadCloser, error) return nil, errInvalidArgument } - // Initialize all readers. - var readers = make([]io.ReadCloser, len(xl.storageDisks)) + xl.lockNameSpace(volume, path, true) + defer xl.unlockNameSpace(volume, path, true) - // Extract metadata. - metadata, err := xl.extractMetadata(volume, path) + // check read quorum + quorumDisks := xl.getReadFileQuorumDisks(volume, path) + if len(quorumDisks) < xl.readQuorum { + return nil, errors.New("I/O error. do not meet read quorum") + } + + // get file size + size, err := xl.getFileSize(volume, path, quorumDisks[0].disk) if err != nil { return nil, err } + totalBlocks := xl.DataBlocks + xl.ParityBlocks // Total blocks. - // Loop through and verify if all metadata files are in-tact. - for index, disk := range xl.storageDisks { - offset := int64(0) - erasurePart := slashpath.Join(path, fmt.Sprintf("part.%d", index)) + readers := []io.ReadCloser{} + readFileError := 0 + i := 0 + for _, quorumDisk := range quorumDisks { + erasurePart := slashpath.Join(path, fmt.Sprintf("part.%d", quorumDisk.index)) var erasuredPartReader io.ReadCloser - erasuredPartReader, err = disk.ReadFile(volume, erasurePart, offset) - if err != nil { - // One of parts not found, we need to re-construct. - if err == errFileNotFound { - readers[index] = nil + if erasuredPartReader, err = quorumDisk.disk.ReadFile(volume, erasurePart, offset); err != nil { + // we can safely allow ReadFile errors up to len(quorumDisks) - xl.readQuorum + // otherwise return failure + if readFileError < len(quorumDisks)-xl.readQuorum { + readFileError++ continue } - // For all other errors return to the caller. + + // TODO: handle currently available io.Reader in readers variable return nil, err } - readers[index] = erasuredPartReader + + readers[i] = erasuredPartReader + i++ } - totalBlocks := xl.DataBlocks + xl.ParityBlocks // Total blocks. // Initialize pipe. pipeReader, pipeWriter := io.Pipe() go func() { - var totalLeft = metadata.Size + var totalLeft = size // Read until the totalLeft. for totalLeft > 0 { // Figure out the right blockSize as it was encoded before. diff --git a/xl-v1.go b/xl-v1.go index 3e01c3e3d..3025f6fb5 100644 --- a/xl-v1.go +++ b/xl-v1.go @@ -26,19 +26,12 @@ import ( "sort" "strconv" "strings" + "sync" "time" "github.com/klauspost/reedsolomon" ) -// XL layer structure. -type XL struct { - ReedSolomon reedsolomon.Encoder // Erasure encoder/decoder. - DataBlocks int - ParityBlocks int - storageDisks []StorageAPI -} - const ( // Part metadata file. metadataFile = "part.json" @@ -46,6 +39,55 @@ const ( maxErasureBlocks = 16 ) +// XL layer structure. +type XL struct { + ReedSolomon reedsolomon.Encoder // Erasure encoder/decoder. + DataBlocks int + ParityBlocks int + storageDisks []StorageAPI + nameSpaceLockMap map[nameSpaceParam]nameSpaceLock + nameSpaceLockMapMutex *sync.Mutex + readQuorum int + writeQuorum int +} + +func (xl XL) lockNameSpace(volume, path string, readOnly bool) { + xl.nameSpaceLockMapMutex.Lock() + defer xl.nameSpaceLockMapMutex.Unlock() + + param := nameSpaceParam{volume, path} + nsLock, found := xl.nameSpaceLockMap[param] + if !found { + nsLock = newNameSpaceLock() + } + + if readOnly { + nsLock.RLock() + } else { + nsLock.Lock() + } + + xl.nameSpaceLockMap[param] = nsLock +} + +func (xl XL) unlockNameSpace(volume, path string, readOnly bool) { + xl.nameSpaceLockMapMutex.Lock() + defer xl.nameSpaceLockMapMutex.Unlock() + + param := nameSpaceParam{volume, path} + if nsLock, found := xl.nameSpaceLockMap[param]; found { + if readOnly { + nsLock.RUnlock() + } else { + nsLock.Unlock() + } + + if nsLock.InUse() { + xl.nameSpaceLockMap[param] = nsLock + } + } +} + // newXL instantiate a new XL. func newXL(disks ...string) (StorageAPI, error) { // Initialize XL. @@ -94,6 +136,14 @@ func newXL(disks ...string) (StorageAPI, error) { // Save all the initialized storage disks. xl.storageDisks = storageDisks + xl.nameSpaceLockMap = make(map[nameSpaceParam]nameSpaceLock) + xl.nameSpaceLockMapMutex = &sync.Mutex{} + xl.readQuorum = len(xl.storageDisks) / 2 + xl.writeQuorum = xl.readQuorum + 3 + if xl.writeQuorum > len(xl.storageDisks) { + xl.writeQuorum = len(xl.storageDisks) + } + // Return successfully initialized. return xl, nil } @@ -187,6 +237,7 @@ type fileMetadata struct { Block512Sum string DataBlocks int ParityBlocks int + fileVersion int64 } // extractMetadata - extract file metadata. @@ -221,6 +272,16 @@ func (xl XL) extractMetadata(volume, path string) (fileMetadata, error) { return fileMetadata{}, err } + // Verify if file.version is parsable. + var fileVersion int64 + // missing file.version is valid + if _, ok := metadata["file.version"]; ok { + fileVersion, err = strconv.ParseInt(metadata["file.version"], 10, 64) + if err != nil { + return fileMetadata{}, err + } + } + // Verify if block size is parsable. var blockSize int64 blockSize, err = strconv.ParseInt(metadata["file.xl.blockSize"], 10, 64) @@ -254,6 +315,7 @@ func (xl XL) extractMetadata(volume, path string) (fileMetadata, error) { Block512Sum: sha512Sum, DataBlocks: dataBlocks, ParityBlocks: parityBlocks, + fileVersion: fileVersion, }, nil }