xl: Fix ReadFile to keep the order always for reading the data back. (#1339)

Also fixes a stackoverflow bug in namespace locking.
This commit is contained in:
Harshavardhana 2016-04-19 21:58:11 -07:00 committed by Harshavardhana
parent c7bf471c9e
commit 141a44bfbf
4 changed files with 72 additions and 64 deletions

View File

@ -297,6 +297,9 @@ func (o objectAPI) NewMultipartUpload(bucket, object string) (string, *probe.Err
// uploadIDPath doesn't exist, so create empty file to reserve the name // uploadIDPath doesn't exist, so create empty file to reserve the name
var w io.WriteCloser var w io.WriteCloser
if w, e = o.storage.CreateFile(minioMetaVolume, uploadIDPath); e == nil { if w, e = o.storage.CreateFile(minioMetaVolume, uploadIDPath); e == nil {
// Just write some data for erasure code, rather than zero bytes.
w.Write([]byte(uploadID))
// Close the writer.
if e = w.Close(); e != nil { if e = w.Close(); e != nil {
return "", probe.NewError(e) return "", probe.NewError(e)
} }

View File

@ -55,6 +55,9 @@ func (xl XL) writeErasure(volume, path string, reader *io.PipeReader) {
var sha512Writers = make([]hash.Hash, len(xl.storageDisks)) var sha512Writers = make([]hash.Hash, len(xl.storageDisks))
var metadataWriters = make([]io.WriteCloser, len(xl.storageDisks)) var metadataWriters = make([]io.WriteCloser, len(xl.storageDisks))
// Save additional erasureMetadata.
modTime := time.Now().UTC()
// Initialize storage disks, get all the writers and corresponding // Initialize storage disks, get all the writers and corresponding
// metadata writers. // metadata writers.
for index, disk := range xl.storageDisks { for index, disk := range xl.storageDisks {
@ -131,9 +134,6 @@ func (xl XL) writeErasure(volume, path string, reader *io.PipeReader) {
} }
} }
// Save additional erasureMetadata.
modTime := time.Now().UTC()
// Initialize metadata map, save all erasure related metadata. // Initialize metadata map, save all erasure related metadata.
metadata := make(map[string]string) metadata := make(map[string]string)
metadata["version"] = minioVersion metadata["version"] = minioVersion

View File

@ -27,39 +27,38 @@ type nameSpaceParam struct {
// nameSpaceLock - provides primitives for locking critical namespace regions. // nameSpaceLock - provides primitives for locking critical namespace regions.
type nameSpaceLock struct { type nameSpaceLock struct {
rwMutex *sync.RWMutex rwMutex *sync.RWMutex
rcount uint count uint
wcount uint
} }
func (nsLock *nameSpaceLock) InUse() bool { func (nsLock *nameSpaceLock) InUse() bool {
return nsLock.rcount != 0 || nsLock.wcount != 0 return nsLock.count != 0
} }
// Lock acquires write lock and increments the namespace counter. // Lock acquires write lock and increments the namespace counter.
func (nsLock *nameSpaceLock) Lock() { func (nsLock *nameSpaceLock) Lock() {
nsLock.Lock() nsLock.rwMutex.Lock()
nsLock.wcount++ nsLock.count++
} }
// Unlock releases write lock and decrements the namespace counter. // Unlock releases write lock and decrements the namespace counter.
func (nsLock *nameSpaceLock) Unlock() { func (nsLock *nameSpaceLock) Unlock() {
nsLock.Unlock() nsLock.rwMutex.Unlock()
if nsLock.wcount != 0 { if nsLock.count != 0 {
nsLock.wcount-- nsLock.count--
} }
} }
// RLock acquires read lock and increments the namespace counter. // RLock acquires read lock and increments the namespace counter.
func (nsLock *nameSpaceLock) RLock() { func (nsLock *nameSpaceLock) RLock() {
nsLock.RLock() nsLock.rwMutex.RLock()
nsLock.rcount++ nsLock.count++
} }
// RUnlock release read lock and decrements the namespace counter. // RUnlock release read lock and decrements the namespace counter.
func (nsLock *nameSpaceLock) RUnlock() { func (nsLock *nameSpaceLock) RUnlock() {
nsLock.RUnlock() nsLock.rwMutex.RUnlock()
if nsLock.rcount != 0 { if nsLock.count != 0 {
nsLock.rcount-- nsLock.count--
} }
} }
@ -67,7 +66,6 @@ func (nsLock *nameSpaceLock) RUnlock() {
func newNSLock() *nameSpaceLock { func newNSLock() *nameSpaceLock {
return &nameSpaceLock{ return &nameSpaceLock{
rwMutex: &sync.RWMutex{}, rwMutex: &sync.RWMutex{},
rcount: 0, count: 0,
wcount: 0,
} }
} }

View File

@ -44,38 +44,37 @@ func getEncodedBlockLen(inputLen, dataBlocks int) (curBlockSize int) {
return return
} }
func (xl XL) getMetaDataFileVersions(volume, path string) (diskVersionMap map[StorageAPI]int64) { func (xl XL) getMetaFileVersionMap(volume, path string) (diskFileVersionMap map[int]int64) {
metadataFilePath := slashpath.Join(path, metadataFile) metadataFilePath := slashpath.Join(path, metadataFile)
// set offset to 0 to read entire file // Set offset to 0 to read entire file.
offset := int64(0) offset := int64(0)
metadata := make(map[string]string) metadata := make(map[string]string)
// read meta data from all disks // Allocate disk index format map - do not use maps directly without allocating.
for _, disk := range xl.storageDisks { diskFileVersionMap = make(map[int]int64)
diskVersionMap[disk] = -1
if metadataReader, err := disk.ReadFile(volume, metadataFilePath, offset); err != nil { // TODO - all errors should be logged here.
// error reading meta data file
// TODO: log it // Read meta data from all disks
for index, disk := range xl.storageDisks {
diskFileVersionMap[index] = -1
metadataReader, err := disk.ReadFile(volume, metadataFilePath, offset)
if err != nil {
continue continue
} else if err := json.NewDecoder(metadataReader).Decode(&metadata); err != nil { } else if err = json.NewDecoder(metadataReader).Decode(&metadata); err != nil {
// error in parsing json
// TODO: log it
continue continue
} else if _, ok := metadata["file.version"]; !ok { } else if _, ok := metadata["file.version"]; !ok {
// missing "file.version" is completely valid diskFileVersionMap[index] = 0
diskVersionMap[disk] = 0
continue
} else if fileVersion, err := strconv.ParseInt(metadata["file.version"], 10, 64); err != nil {
// version is not a number
// TODO: log it
continue
} else {
diskVersionMap[disk] = fileVersion
} }
// Convert string to integer.
fileVersion, err := strconv.ParseInt(metadata["file.version"], 10, 64)
if err != nil {
continue
}
diskFileVersionMap[index] = fileVersion
} }
return diskFileVersionMap
return
} }
type quorumDisk struct { type quorumDisk struct {
@ -83,24 +82,28 @@ type quorumDisk struct {
index int index int
} }
// getReadFileQuorumDisks - get the current quorum disks.
func (xl XL) getReadFileQuorumDisks(volume, path string) (quorumDisks []quorumDisk) { func (xl XL) getReadFileQuorumDisks(volume, path string) (quorumDisks []quorumDisk) {
diskVersionMap := xl.getMetaDataFileVersions(volume, path) diskVersionMap := xl.getMetaFileVersionMap(volume, path)
higherVersion := int64(0) higherVersion := int64(0)
i := 0 for diskIndex, formatVersion := range diskVersionMap {
for disk, version := range diskVersionMap { if formatVersion > higherVersion {
if version > higherVersion { higherVersion = formatVersion
higherVersion = version quorumDisks = []quorumDisk{{
quorumDisks = []quorumDisk{{disk, i}} disk: xl.storageDisks[diskIndex],
} else if version == higherVersion { index: diskIndex,
quorumDisks = append(quorumDisks, quorumDisk{disk, i}) }}
} else if formatVersion == higherVersion {
quorumDisks = append(quorumDisks, quorumDisk{
disk: xl.storageDisks[diskIndex],
index: diskIndex,
})
} }
i++
} }
return return
} }
// getFileSize - extract file size from metadata.
func (xl XL) getFileSize(volume, path string, disk StorageAPI) (size int64, err error) { func (xl XL) getFileSize(volume, path string, disk StorageAPI) (size int64, err error) {
metadataFilePath := slashpath.Join(path, metadataFile) metadataFilePath := slashpath.Join(path, metadataFile)
// set offset to 0 to read entire file // set offset to 0 to read entire file
@ -133,10 +136,10 @@ func (xl XL) ReadFile(volume, path string, offset int64) (io.ReadCloser, error)
return nil, errInvalidArgument return nil, errInvalidArgument
} }
// Acquire a read lock. - TODO - disable this due to stack overflow bug. // Acquire a read lock.
// readLock := true readLock := true
// xl.lockNS(volume, path, readLock) xl.lockNS(volume, path, readLock)
// defer xl.unlockNS(volume, path, readLock) defer xl.unlockNS(volume, path, readLock)
// Check read quorum. // Check read quorum.
quorumDisks := xl.getReadFileQuorumDisks(volume, path) quorumDisks := xl.getReadFileQuorumDisks(volume, path)
@ -151,26 +154,30 @@ func (xl XL) ReadFile(volume, path string, offset int64) (io.ReadCloser, error)
} }
totalBlocks := xl.DataBlocks + xl.ParityBlocks // Total blocks. totalBlocks := xl.DataBlocks + xl.ParityBlocks // Total blocks.
readers := []io.ReadCloser{} readers := make([]io.ReadCloser, len(quorumDisks))
readFileError := 0 readFileError := 0
i := 0
for _, quorumDisk := range quorumDisks { for _, quorumDisk := range quorumDisks {
erasurePart := slashpath.Join(path, fmt.Sprintf("part.%d", quorumDisk.index)) erasurePart := slashpath.Join(path, fmt.Sprintf("part.%d", quorumDisk.index))
var erasuredPartReader io.ReadCloser var erasuredPartReader io.ReadCloser
if erasuredPartReader, err = quorumDisk.disk.ReadFile(volume, erasurePart, offset); err != nil { if erasuredPartReader, err = quorumDisk.disk.ReadFile(volume, erasurePart, offset); err != nil {
// we can safely allow ReadFile errors up to len(quorumDisks) - xl.readQuorum // We can safely allow ReadFile errors up to len(quorumDisks) - xl.readQuorum
// otherwise return failure // otherwise return failure
if readFileError < len(quorumDisks)-xl.readQuorum { if readFileError < len(quorumDisks)-xl.readQuorum {
// Set the reader to 'nil' to be able to reconstruct later.
readers[quorumDisk.index] = nil
readFileError++ readFileError++
continue continue
} }
// Control reaches here we do not have quorum
// TODO: handle currently available io.Reader in readers variable // anymore. Close all the readers.
return nil, err for _, reader := range readers {
if reader != nil {
reader.Close()
}
}
return nil, errReadQuorum
} }
readers[quorumDisk.index] = erasuredPartReader
readers[i] = erasuredPartReader
i++
} }
// Initialize pipe. // Initialize pipe.