2016-03-27 21:52:38 -07:00
|
|
|
/*
|
|
|
|
* Minio Cloud Storage, (C) 2016 Minio, Inc.
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2016-08-18 16:23:42 -07:00
|
|
|
package cmd
|
2016-03-27 21:52:38 -07:00
|
|
|
|
2016-05-31 20:23:31 -07:00
|
|
|
import (
|
2016-06-20 02:05:26 +05:30
|
|
|
"io"
|
2016-06-01 16:43:31 -07:00
|
|
|
|
2016-07-26 02:47:01 +05:30
|
|
|
"github.com/minio/minio/pkg/bpool"
|
2016-05-31 20:23:31 -07:00
|
|
|
)
|
2016-03-27 21:52:38 -07:00
|
|
|
|
2017-08-14 18:08:42 -07:00
|
|
|
// ReadFile reads as much data as requested from the file under the given volume and path and writes the data to the provided writer.
|
|
|
|
// The algorithm and the keys/checksums are used to verify the integrity of the given file. ReadFile will read data from the given offset
|
|
|
|
// up to the given length. If parts of the file are corrupted ReadFile tries to reconstruct the data.
|
|
|
|
func (s ErasureStorage) ReadFile(writer io.Writer, volume, path string, offset, length int64, totalLength int64, checksums [][]byte, algorithm BitrotAlgorithm, blocksize int64, pool *bpool.BytePool) (f ErasureFileInfo, err error) {
|
|
|
|
if offset < 0 || length < 0 {
|
|
|
|
return f, traceError(errUnexpected)
|
2016-06-24 02:06:23 -07:00
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
if offset+length > totalLength {
|
|
|
|
return f, traceError(errUnexpected)
|
|
|
|
}
|
|
|
|
if !algorithm.Available() {
|
|
|
|
return f, traceError(errBitrotHashAlgoInvalid)
|
2016-06-24 02:06:23 -07:00
|
|
|
}
|
|
|
|
|
2017-08-14 18:08:42 -07:00
|
|
|
f.Checksums = make([][]byte, len(s.disks))
|
|
|
|
verifiers := make([]*BitrotVerifier, len(s.disks))
|
|
|
|
for i, disk := range s.disks {
|
|
|
|
if disk == OfflineDisk {
|
2016-06-24 18:00:34 -07:00
|
|
|
continue
|
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
verifiers[i] = NewBitrotVerifier(algorithm, checksums[i])
|
2016-06-24 18:00:34 -07:00
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
errChans := make([]chan error, len(s.disks))
|
|
|
|
for i := range errChans {
|
|
|
|
errChans[i] = make(chan error, 1)
|
2016-06-24 18:00:34 -07:00
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
lastBlock := totalLength / blocksize
|
|
|
|
startOffset := offset % blocksize
|
|
|
|
chunksize := getChunkSize(blocksize, s.dataBlocks)
|
2016-06-24 18:00:34 -07:00
|
|
|
|
2017-08-14 18:08:42 -07:00
|
|
|
blocks := make([][]byte, len(s.disks))
|
|
|
|
for off := offset / blocksize; length > 0; off++ {
|
|
|
|
blockOffset := off * chunksize
|
|
|
|
pool.Reset()
|
|
|
|
|
|
|
|
if currentBlock := (offset + f.Size) / blocksize; currentBlock == lastBlock {
|
|
|
|
blocksize = totalLength % blocksize
|
|
|
|
chunksize = getChunkSize(blocksize, s.dataBlocks)
|
2016-06-24 18:00:34 -07:00
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
err = s.readConcurrent(volume, path, blockOffset, chunksize, blocks, verifiers, errChans, pool)
|
|
|
|
if err != nil {
|
|
|
|
return f, traceError(errXLReadQuorum)
|
2016-06-24 18:00:34 -07:00
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
|
|
|
|
writeLength := blocksize - startOffset
|
|
|
|
if length < writeLength {
|
|
|
|
writeLength = length
|
2016-06-24 18:00:34 -07:00
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
n, err := writeDataBlocks(writer, blocks, s.dataBlocks, startOffset, writeLength)
|
|
|
|
if err != nil {
|
|
|
|
return f, err
|
|
|
|
}
|
|
|
|
startOffset = 0
|
2017-08-18 11:45:16 -07:00
|
|
|
f.Size += n
|
|
|
|
length -= n
|
2016-06-24 18:00:34 -07:00
|
|
|
}
|
|
|
|
|
2017-08-14 18:08:42 -07:00
|
|
|
f.Algorithm = algorithm
|
|
|
|
for i, disk := range s.disks {
|
|
|
|
if disk == OfflineDisk {
|
2016-06-28 01:54:55 +05:30
|
|
|
continue
|
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
f.Checksums[i] = verifiers[i].Sum(nil)
|
2016-06-28 01:54:55 +05:30
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
return f, nil
|
2016-06-28 01:54:55 +05:30
|
|
|
}
|
|
|
|
|
2017-08-14 18:08:42 -07:00
|
|
|
func erasureCountMissingBlocks(blocks [][]byte, limit int) int {
|
|
|
|
missing := 0
|
|
|
|
for i := range blocks[:limit] {
|
|
|
|
if blocks[i] == nil {
|
|
|
|
missing++
|
2016-06-22 03:04:11 +05:30
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
}
|
|
|
|
return missing
|
|
|
|
}
|
2016-06-21 10:10:10 +05:30
|
|
|
|
2017-08-14 18:08:42 -07:00
|
|
|
// readConcurrent reads all requested data concurrently from the disks into blocks. It returns an error if
|
|
|
|
// too many disks failed while reading.
|
|
|
|
func (s *ErasureStorage) readConcurrent(volume, path string, offset int64, length int64, blocks [][]byte, verifiers []*BitrotVerifier, errChans []chan error, pool *bpool.BytePool) (err error) {
|
|
|
|
errs := make([]error, len(s.disks))
|
|
|
|
for i := range blocks {
|
|
|
|
blocks[i], err = pool.Get()
|
|
|
|
if err != nil {
|
|
|
|
return traceErrorf("failed to get new buffer from pool: %v", err)
|
2016-06-20 02:05:26 +05:30
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
blocks[i] = blocks[i][:length]
|
|
|
|
}
|
2016-05-29 15:38:14 -07:00
|
|
|
|
2017-08-14 18:08:42 -07:00
|
|
|
erasureReadBlocksConcurrent(s.disks[:s.dataBlocks], volume, path, offset, blocks[:s.dataBlocks], verifiers[:s.dataBlocks], errs[:s.dataBlocks], errChans[:s.dataBlocks])
|
|
|
|
missingDataBlocks := erasureCountMissingBlocks(blocks, s.dataBlocks)
|
|
|
|
mustReconstruct := missingDataBlocks > 0
|
|
|
|
if mustReconstruct {
|
|
|
|
requiredReads := s.dataBlocks + missingDataBlocks
|
|
|
|
if requiredReads > s.dataBlocks+s.parityBlocks {
|
|
|
|
return errXLReadQuorum
|
2016-06-20 02:05:26 +05:30
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
erasureReadBlocksConcurrent(s.disks[s.dataBlocks:requiredReads], volume, path, offset, blocks[s.dataBlocks:requiredReads], verifiers[s.dataBlocks:requiredReads], errs[s.dataBlocks:requiredReads], errChans[s.dataBlocks:requiredReads])
|
|
|
|
if erasureCountMissingBlocks(blocks, requiredReads) > 0 {
|
|
|
|
erasureReadBlocksConcurrent(s.disks[requiredReads:], volume, path, offset, blocks[requiredReads:], verifiers[requiredReads:], errs[requiredReads:], errChans[requiredReads:])
|
2016-06-20 02:05:26 +05:30
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
}
|
|
|
|
if err = reduceReadQuorumErrs(errs, []error{}, s.dataBlocks); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if mustReconstruct {
|
|
|
|
if err = s.ErasureDecodeDataBlocks(blocks); err != nil {
|
|
|
|
return err
|
2016-07-20 14:00:30 +05:30
|
|
|
}
|
2016-05-31 20:23:31 -07:00
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
return nil
|
2016-03-27 21:52:38 -07:00
|
|
|
}
|
2016-06-02 01:49:46 -07:00
|
|
|
|
2017-08-14 18:08:42 -07:00
|
|
|
// erasureReadBlocksConcurrent reads all data from each disk to each data block in parallel.
|
|
|
|
// Therefore disks, blocks, verifiers errors and locks must have the same length.
|
|
|
|
func erasureReadBlocksConcurrent(disks []StorageAPI, volume, path string, offset int64, blocks [][]byte, verifiers []*BitrotVerifier, errors []error, errChans []chan error) {
|
|
|
|
for i := range errChans {
|
|
|
|
go erasureReadFromFile(disks[i], volume, path, offset, blocks[i], verifiers[i], errChans[i])
|
|
|
|
}
|
|
|
|
for i := range errChans {
|
|
|
|
errors[i] = <-errChans[i] // blocks until the go routine 'i' is done - no data race
|
|
|
|
if errors[i] != nil {
|
|
|
|
disks[i] = OfflineDisk
|
|
|
|
blocks[i] = nil
|
|
|
|
}
|
2016-06-02 01:49:46 -07:00
|
|
|
}
|
2017-08-11 18:24:48 -07:00
|
|
|
}
|
2016-06-24 02:06:23 -07:00
|
|
|
|
2017-08-14 18:08:42 -07:00
|
|
|
// erasureReadFromFile reads data from the disk to buffer in parallel.
|
|
|
|
// It sends the returned error through the error channel.
|
|
|
|
func erasureReadFromFile(disk StorageAPI, volume, path string, offset int64, buffer []byte, verifier *BitrotVerifier, errChan chan<- error) {
|
|
|
|
if disk == OfflineDisk {
|
|
|
|
errChan <- traceError(errDiskNotFound)
|
|
|
|
return
|
2016-06-02 01:49:46 -07:00
|
|
|
}
|
2017-08-14 18:08:42 -07:00
|
|
|
var err error
|
|
|
|
if !verifier.IsVerified() {
|
|
|
|
_, err = disk.ReadFileWithVerify(volume, path, offset, buffer, verifier)
|
|
|
|
} else {
|
|
|
|
_, err = disk.ReadFile(volume, path, offset, buffer)
|
|
|
|
}
|
|
|
|
errChan <- err
|
2016-06-02 01:49:46 -07:00
|
|
|
}
|