mirror of
https://github.com/minio/minio.git
synced 2025-04-12 15:32:07 -04:00
Refactor xl.GetObject and erasureReadFile. (#2211)
* XL: Refactor xl.GetObject and erasureReadFile. erasureReadFile() responsible for just erasure coding, it takes ordered disks and checkSum slice. * move getOrderedPartsMetadata and getOrderedDisks to xl-v1-utils.go * Review fixes.
This commit is contained in:
parent
2d38046a5a
commit
8cc163e51a
@ -65,38 +65,6 @@ func isSuccessDataBlocks(enBlocks [][]byte, dataBlocks int) bool {
|
|||||||
return successDataBlocksCount >= dataBlocks
|
return successDataBlocksCount >= dataBlocks
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return ordered partsMetadata depeinding on distribution.
|
|
||||||
func getOrderedPartsMetadata(distribution []int, partsMetadata []xlMetaV1) (orderedPartsMetadata []xlMetaV1) {
|
|
||||||
orderedPartsMetadata = make([]xlMetaV1, len(partsMetadata))
|
|
||||||
for index := range partsMetadata {
|
|
||||||
blockIndex := distribution[index]
|
|
||||||
orderedPartsMetadata[blockIndex-1] = partsMetadata[index]
|
|
||||||
}
|
|
||||||
return orderedPartsMetadata
|
|
||||||
}
|
|
||||||
|
|
||||||
// getOrderedDisks - get ordered disks from erasure distribution.
|
|
||||||
// returns ordered slice of disks from their actual distribution.
|
|
||||||
func getOrderedDisks(distribution []int, disks []StorageAPI) (orderedDisks []StorageAPI) {
|
|
||||||
orderedDisks = make([]StorageAPI, len(disks))
|
|
||||||
// From disks gets ordered disks.
|
|
||||||
for index := range disks {
|
|
||||||
blockIndex := distribution[index]
|
|
||||||
orderedDisks[blockIndex-1] = disks[index]
|
|
||||||
}
|
|
||||||
return orderedDisks
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return ordered CheckSums depending on the distribution.
|
|
||||||
func getOrderedCheckSums(distribution []int, blockCheckSums []checkSumInfo) (orderedBlockCheckSums []checkSumInfo) {
|
|
||||||
orderedBlockCheckSums = make([]checkSumInfo, len(blockCheckSums))
|
|
||||||
for index := range blockCheckSums {
|
|
||||||
blockIndex := distribution[index]
|
|
||||||
orderedBlockCheckSums[blockIndex-1] = blockCheckSums[index]
|
|
||||||
}
|
|
||||||
return orderedBlockCheckSums
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return readable disks slice from which we can read parallelly.
|
// Return readable disks slice from which we can read parallelly.
|
||||||
func getReadDisks(orderedDisks []StorageAPI, index int, dataBlocks int) (readDisks []StorageAPI, nextIndex int, err error) {
|
func getReadDisks(orderedDisks []StorageAPI, index int, dataBlocks int) (readDisks []StorageAPI, nextIndex int, err error) {
|
||||||
readDisks = make([]StorageAPI, len(orderedDisks))
|
readDisks = make([]StorageAPI, len(orderedDisks))
|
||||||
@ -192,27 +160,16 @@ func parallelRead(volume, path string, readDisks []StorageAPI, orderedDisks []St
|
|||||||
// are decoded into a data block. Data block is trimmed for given offset and length,
|
// are decoded into a data block. Data block is trimmed for given offset and length,
|
||||||
// then written to given writer. This function also supports bit-rot detection by
|
// then written to given writer. This function also supports bit-rot detection by
|
||||||
// verifying checksum of individual block's checksum.
|
// verifying checksum of individual block's checksum.
|
||||||
func erasureReadFile(writer io.Writer, disks []StorageAPI, volume string, path string, partName string, eInfos []erasureInfo, offset int64, length int64, totalLength int64) (int64, error) {
|
func erasureReadFile(writer io.Writer, disks []StorageAPI, volume string, path string, offset int64, length int64, totalLength int64, blockSize int64, dataBlocks int, parityBlocks int, checkSums []string) (int64, error) {
|
||||||
// Offset and length cannot be negative.
|
// Offset and length cannot be negative.
|
||||||
if offset < 0 || length < 0 {
|
if offset < 0 || length < 0 {
|
||||||
return 0, errUnexpected
|
return 0, errUnexpected
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pick one erasure info.
|
|
||||||
eInfo := pickValidErasureInfo(eInfos)
|
|
||||||
|
|
||||||
// Gather previously calculated block checksums.
|
|
||||||
blockCheckSums := metaPartBlockChecksums(disks, eInfos, partName)
|
|
||||||
|
|
||||||
// []orderedDisks will have first eInfo.DataBlocks disks as data
|
|
||||||
// disks and rest will be parity.
|
|
||||||
orderedDisks := getOrderedDisks(eInfo.Distribution, disks)
|
|
||||||
orderedBlockCheckSums := getOrderedCheckSums(eInfo.Distribution, blockCheckSums)
|
|
||||||
|
|
||||||
// bitRotVerify verifies if the file on a particular disk doesn't have bitrot
|
// bitRotVerify verifies if the file on a particular disk doesn't have bitrot
|
||||||
// by verifying the hash of the contents of the file.
|
// by verifying the hash of the contents of the file.
|
||||||
bitRotVerify := func() func(diskIndex int) bool {
|
bitRotVerify := func() func(diskIndex int) bool {
|
||||||
verified := make([]bool, len(orderedDisks))
|
verified := make([]bool, len(disks))
|
||||||
// Return closure so that we have reference to []verified and
|
// Return closure so that we have reference to []verified and
|
||||||
// not recalculate the hash on it every time the function is
|
// not recalculate the hash on it every time the function is
|
||||||
// called for the same disk.
|
// called for the same disk.
|
||||||
@ -222,7 +179,7 @@ func erasureReadFile(writer io.Writer, disks []StorageAPI, volume string, path s
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// Is this a valid block?
|
// Is this a valid block?
|
||||||
isValid := isValidBlock(orderedDisks[diskIndex], volume, path, orderedBlockCheckSums[diskIndex])
|
isValid := isValidBlock(disks[diskIndex], volume, path, checkSums[diskIndex])
|
||||||
verified[diskIndex] = isValid
|
verified[diskIndex] = isValid
|
||||||
return isValid
|
return isValid
|
||||||
}
|
}
|
||||||
@ -235,32 +192,32 @@ func erasureReadFile(writer io.Writer, disks []StorageAPI, volume string, path s
|
|||||||
// chunkSize is calculated such that chunkSize*DataBlocks accommodates BlockSize bytes.
|
// chunkSize is calculated such that chunkSize*DataBlocks accommodates BlockSize bytes.
|
||||||
// So chunkSize*DataBlocks can be slightly larger than BlockSize if BlockSize is not divisible by
|
// So chunkSize*DataBlocks can be slightly larger than BlockSize if BlockSize is not divisible by
|
||||||
// DataBlocks. The extra space will have 0-padding.
|
// DataBlocks. The extra space will have 0-padding.
|
||||||
chunkSize := getEncodedBlockLen(eInfo.BlockSize, eInfo.DataBlocks)
|
chunkSize := getEncodedBlockLen(blockSize, dataBlocks)
|
||||||
|
|
||||||
// Get start and end block, also bytes to be skipped based on the input offset.
|
// Get start and end block, also bytes to be skipped based on the input offset.
|
||||||
startBlock, endBlock, bytesToSkip := getBlockInfo(offset, totalLength, eInfo.BlockSize)
|
startBlock, endBlock, bytesToSkip := getBlockInfo(offset, totalLength, blockSize)
|
||||||
|
|
||||||
// For each block, read chunk from each disk. If we are able to read all the data disks then we don't
|
// For each block, read chunk from each disk. If we are able to read all the data disks then we don't
|
||||||
// need to read parity disks. If one of the data disk is missing we need to read DataBlocks+1 number
|
// need to read parity disks. If one of the data disk is missing we need to read DataBlocks+1 number
|
||||||
// of disks. Once read, we Reconstruct() missing data if needed and write it to the given writer.
|
// of disks. Once read, we Reconstruct() missing data if needed and write it to the given writer.
|
||||||
for block := startBlock; bytesWritten < length; block++ {
|
for block := startBlock; bytesWritten < length; block++ {
|
||||||
// Each element of enBlocks holds curChunkSize'd amount of data read from its corresponding disk.
|
// Each element of enBlocks holds curChunkSize'd amount of data read from its corresponding disk.
|
||||||
enBlocks := make([][]byte, len(orderedDisks))
|
enBlocks := make([][]byte, len(disks))
|
||||||
|
|
||||||
// enBlocks data can have 0-padding hence we need to figure the exact number
|
// enBlocks data can have 0-padding hence we need to figure the exact number
|
||||||
// of bytes we want to read from enBlocks.
|
// of bytes we want to read from enBlocks.
|
||||||
blockSize := eInfo.BlockSize
|
blockSize := blockSize
|
||||||
|
|
||||||
// curChunkSize is chunkSize until end block.
|
// curChunkSize is chunkSize until end block.
|
||||||
curChunkSize := chunkSize
|
curChunkSize := chunkSize
|
||||||
|
|
||||||
// We have endBlock, verify if we need to have padding.
|
// We have endBlock, verify if we need to have padding.
|
||||||
if block == endBlock && (totalLength%eInfo.BlockSize != 0) {
|
if block == endBlock && (totalLength%blockSize != 0) {
|
||||||
// If this is the last block and size of the block is < BlockSize.
|
// If this is the last block and size of the block is < BlockSize.
|
||||||
curChunkSize = getEncodedBlockLen(totalLength%eInfo.BlockSize, eInfo.DataBlocks)
|
curChunkSize = getEncodedBlockLen(totalLength%blockSize, dataBlocks)
|
||||||
|
|
||||||
// For the last block, the block size can be less than BlockSize.
|
// For the last block, the block size can be less than BlockSize.
|
||||||
blockSize = totalLength % eInfo.BlockSize
|
blockSize = totalLength % blockSize
|
||||||
}
|
}
|
||||||
|
|
||||||
// Block offset.
|
// Block offset.
|
||||||
@ -277,25 +234,29 @@ func erasureReadFile(writer io.Writer, disks []StorageAPI, volume string, path s
|
|||||||
// readDisks - disks from which we need to read in parallel.
|
// readDisks - disks from which we need to read in parallel.
|
||||||
var readDisks []StorageAPI
|
var readDisks []StorageAPI
|
||||||
var err error
|
var err error
|
||||||
readDisks, nextIndex, err = getReadDisks(orderedDisks, nextIndex, eInfo.DataBlocks)
|
// get readable disks slice from which we can read parallelly.
|
||||||
|
readDisks, nextIndex, err = getReadDisks(disks, nextIndex, dataBlocks)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return bytesWritten, err
|
return bytesWritten, err
|
||||||
}
|
}
|
||||||
parallelRead(volume, path, readDisks, orderedDisks, enBlocks, blockOffset, curChunkSize, bitRotVerify)
|
// Issue a parallel read across the disks specified in readDisks.
|
||||||
if isSuccessDecodeBlocks(enBlocks, eInfo.DataBlocks) {
|
parallelRead(volume, path, readDisks, disks, enBlocks, blockOffset, curChunkSize, bitRotVerify)
|
||||||
|
if isSuccessDecodeBlocks(enBlocks, dataBlocks) {
|
||||||
// If enough blocks are available to do rs.Reconstruct()
|
// If enough blocks are available to do rs.Reconstruct()
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if nextIndex == len(orderedDisks) {
|
if nextIndex == len(disks) {
|
||||||
// No more disks to read from.
|
// No more disks to read from.
|
||||||
return bytesWritten, errXLReadQuorum
|
return bytesWritten, errXLReadQuorum
|
||||||
}
|
}
|
||||||
|
// We do not have enough enough data blocks to reconstruct the data
|
||||||
|
// hence continue the for-loop till we have enough data blocks.
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we have all the data blocks no need to decode, continue to write.
|
// If we have all the data blocks no need to decode, continue to write.
|
||||||
if !isSuccessDataBlocks(enBlocks, eInfo.DataBlocks) {
|
if !isSuccessDataBlocks(enBlocks, dataBlocks) {
|
||||||
// Reconstruct the missing data blocks.
|
// Reconstruct the missing data blocks.
|
||||||
if err := decodeData(enBlocks, eInfo.DataBlocks, eInfo.ParityBlocks); err != nil {
|
if err := decodeData(enBlocks, dataBlocks, parityBlocks); err != nil {
|
||||||
return bytesWritten, err
|
return bytesWritten, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -314,7 +275,7 @@ func erasureReadFile(writer io.Writer, disks []StorageAPI, volume string, path s
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Write data blocks.
|
// Write data blocks.
|
||||||
n, err := writeDataBlocks(writer, enBlocks, eInfo.DataBlocks, outOffset, outSize)
|
n, err := writeDataBlocks(writer, enBlocks, dataBlocks, outOffset, outSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return bytesWritten, err
|
return bytesWritten, err
|
||||||
}
|
}
|
||||||
@ -337,34 +298,21 @@ func (e erasureInfo) PartObjectChecksum(partName string) checkSumInfo {
|
|||||||
return checkSumInfo{}
|
return checkSumInfo{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// xlMetaPartBlockChecksums - get block checksums for a given part.
|
|
||||||
func metaPartBlockChecksums(disks []StorageAPI, eInfos []erasureInfo, partName string) (blockCheckSums []checkSumInfo) {
|
|
||||||
for index := range disks {
|
|
||||||
if eInfos[index].IsValid() {
|
|
||||||
// Save the read checksums for a given part.
|
|
||||||
blockCheckSums = append(blockCheckSums, eInfos[index].PartObjectChecksum(partName))
|
|
||||||
} else {
|
|
||||||
blockCheckSums = append(blockCheckSums, checkSumInfo{})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return blockCheckSums
|
|
||||||
}
|
|
||||||
|
|
||||||
// isValidBlock - calculates the checksum hash for the block and
|
// isValidBlock - calculates the checksum hash for the block and
|
||||||
// validates if its correct returns true for valid cases, false otherwise.
|
// validates if its correct returns true for valid cases, false otherwise.
|
||||||
func isValidBlock(disk StorageAPI, volume, path string, blockCheckSum checkSumInfo) (ok bool) {
|
func isValidBlock(disk StorageAPI, volume, path string, checksum string) (ok bool) {
|
||||||
// Disk is not available, not a valid block.
|
// Disk is not available, not a valid block.
|
||||||
if disk == nil {
|
if disk == nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
// Read everything for a given block and calculate hash.
|
// Read everything for a given block and calculate hash.
|
||||||
hashWriter := newHash(blockCheckSum.Algorithm)
|
hashWriter := newHash("blake2b")
|
||||||
hashBytes, err := hashSum(disk, volume, path, hashWriter)
|
hashBytes, err := hashSum(disk, volume, path, hashWriter)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errorIf(err, "Unable to calculate checksum %s/%s", volume, path)
|
errorIf(err, "Unable to calculate checksum %s/%s", volume, path)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
return hex.EncodeToString(hashBytes) == blockCheckSum.Hash
|
return hex.EncodeToString(hashBytes) == checksum
|
||||||
}
|
}
|
||||||
|
|
||||||
// decodeData - decode encoded blocks.
|
// decodeData - decode encoded blocks.
|
||||||
|
@ -86,6 +86,8 @@ func (xl xlObjects) GetObject(bucket, object string, startOffset int64, length i
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
onlineDisks = getOrderedDisks(xlMeta.Erasure.Distribution, onlineDisks)
|
||||||
|
metaArr = getOrderedPartsMetadata(xlMeta.Erasure.Distribution, metaArr)
|
||||||
|
|
||||||
// Reply back invalid range if the input offset and length fall out of range.
|
// Reply back invalid range if the input offset and length fall out of range.
|
||||||
if startOffset > xlMeta.Stat.Size || length > xlMeta.Stat.Size {
|
if startOffset > xlMeta.Stat.Size || length > xlMeta.Stat.Size {
|
||||||
@ -109,12 +111,6 @@ func (xl xlObjects) GetObject(bucket, object string, startOffset int64, length i
|
|||||||
return toObjectErr(err, bucket, object)
|
return toObjectErr(err, bucket, object)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collect all the previous erasure infos across the disk.
|
|
||||||
var eInfos []erasureInfo
|
|
||||||
for index := range onlineDisks {
|
|
||||||
eInfos = append(eInfos, metaArr[index].Erasure)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save the writer.
|
// Save the writer.
|
||||||
mw := writer
|
mw := writer
|
||||||
|
|
||||||
@ -173,8 +169,17 @@ func (xl xlObjects) GetObject(bucket, object string, startOffset int64, length i
|
|||||||
readSize = length - totalBytesRead
|
readSize = length - totalBytesRead
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get the checksums of the current part.
|
||||||
|
checkSums := make([]string, len(onlineDisks))
|
||||||
|
for index := range onlineDisks {
|
||||||
|
checkSums[index], _, err = metaArr[index].GetCheckSum(partName)
|
||||||
|
if err != nil {
|
||||||
|
return toObjectErr(err, bucket, object)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Start reading the part name.
|
// Start reading the part name.
|
||||||
n, err := erasureReadFile(mw, onlineDisks, bucket, pathJoin(object, partName), partName, eInfos, partOffset, readSize, partSize)
|
n, err := erasureReadFile(mw, onlineDisks, bucket, pathJoin(object, partName), partOffset, readSize, partSize, xlMeta.Erasure.BlockSize, xlMeta.Erasure.DataBlocks, xlMeta.Erasure.ParityBlocks, checkSums)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return toObjectErr(err, bucket, object)
|
return toObjectErr(err, bucket, object)
|
||||||
}
|
}
|
||||||
|
@ -142,3 +142,25 @@ func unionChecksumInfos(cur []checkSumInfo, updated []checkSumInfo, curPartName
|
|||||||
}
|
}
|
||||||
return finalChecksums
|
return finalChecksums
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return ordered partsMetadata depeinding on distribution.
|
||||||
|
func getOrderedPartsMetadata(distribution []int, partsMetadata []xlMetaV1) (orderedPartsMetadata []xlMetaV1) {
|
||||||
|
orderedPartsMetadata = make([]xlMetaV1, len(partsMetadata))
|
||||||
|
for index := range partsMetadata {
|
||||||
|
blockIndex := distribution[index]
|
||||||
|
orderedPartsMetadata[blockIndex-1] = partsMetadata[index]
|
||||||
|
}
|
||||||
|
return orderedPartsMetadata
|
||||||
|
}
|
||||||
|
|
||||||
|
// getOrderedDisks - get ordered disks from erasure distribution.
|
||||||
|
// returns ordered slice of disks from their actual distribution.
|
||||||
|
func getOrderedDisks(distribution []int, disks []StorageAPI) (orderedDisks []StorageAPI) {
|
||||||
|
orderedDisks = make([]StorageAPI, len(disks))
|
||||||
|
// From disks gets ordered disks.
|
||||||
|
for index := range disks {
|
||||||
|
blockIndex := distribution[index]
|
||||||
|
orderedDisks[blockIndex-1] = disks[index]
|
||||||
|
}
|
||||||
|
return orderedDisks
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user