mirror of
https://github.com/minio/minio.git
synced 2025-01-14 16:25:01 -05:00
8975da4e84
This is an enhancement to the XL/distributed-XL mode. FS mode is unaffected. The ReadFileWithVerify storage-layer call is similar to ReadFile with the additional functionality of performing bit-rot checking. It accepts additional parameters for a hashing algorithm to use and the expected hex-encoded hash string. This patch provides significant performance improvement because: 1. combines the step of reading the file (during erasure-decoding/reconstruction) with bit-rot verification; 2. limits the number of file-reads; and 3. avoids transferring the file over the network for bit-rot verification. ReadFile API is implemented as ReadFileWithVerify with empty hashing arguments. Credits to AB and Harsha for the algorithmic improvement. Fixes #4236.
223 lines
6.2 KiB
Go
223 lines
6.2 KiB
Go
/*
|
|
* Minio Cloud Storage, (C) 2016 Minio, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package cmd
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"hash"
|
|
"io"
|
|
"sync"
|
|
|
|
"github.com/klauspost/reedsolomon"
|
|
"github.com/minio/sha256-simd"
|
|
"golang.org/x/crypto/blake2b"
|
|
)
|
|
|
|
// newHashWriters - inititialize a slice of hashes for the disk count.
|
|
func newHashWriters(diskCount int, algo HashAlgo) []hash.Hash {
|
|
hashWriters := make([]hash.Hash, diskCount)
|
|
for index := range hashWriters {
|
|
hashWriters[index] = newHash(algo)
|
|
}
|
|
return hashWriters
|
|
}
|
|
|
|
// newHash - gives you a newly allocated hash depending on the input algorithm.
|
|
func newHash(algo HashAlgo) (h hash.Hash) {
|
|
switch algo {
|
|
case HashSha256:
|
|
// sha256 checksum specially on ARM64 platforms or whenever
|
|
// requested as dictated by `xl.json` entry.
|
|
h = sha256.New()
|
|
case HashBlake2b:
|
|
// ignore the error, because New512 without a key never fails
|
|
// New512 only returns a non-nil error, if the length of the passed
|
|
// key > 64 bytes - but we use blake2b as hash function (no key)
|
|
h, _ = blake2b.New512(nil)
|
|
// Add new hashes here.
|
|
default:
|
|
// Default to blake2b.
|
|
// ignore the error, because New512 without a key never fails
|
|
// New512 only returns a non-nil error, if the length of the passed
|
|
// key > 64 bytes - but we use blake2b as hash function (no key)
|
|
h, _ = blake2b.New512(nil)
|
|
}
|
|
return h
|
|
}
|
|
|
|
// Hash buffer pool is a pool of reusable
|
|
// buffers used while checksumming a stream.
|
|
var hashBufferPool = sync.Pool{
|
|
New: func() interface{} {
|
|
b := make([]byte, readSizeV1)
|
|
return &b
|
|
},
|
|
}
|
|
|
|
// hashSum calculates the hash of the entire path and returns.
|
|
func hashSum(disk StorageAPI, volume, path string, writer hash.Hash) ([]byte, error) {
|
|
// Fetch a new staging buffer from the pool.
|
|
bufp := hashBufferPool.Get().(*[]byte)
|
|
defer hashBufferPool.Put(bufp)
|
|
|
|
// Copy entire buffer to writer.
|
|
if err := copyBuffer(writer, disk, volume, path, *bufp); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Return the final hash sum.
|
|
return writer.Sum(nil), nil
|
|
}
|
|
|
|
// getDataBlockLen - get length of data blocks from encoded blocks.
|
|
func getDataBlockLen(enBlocks [][]byte, dataBlocks int) int {
|
|
size := 0
|
|
// Figure out the data block length.
|
|
for _, block := range enBlocks[:dataBlocks] {
|
|
size += len(block)
|
|
}
|
|
return size
|
|
}
|
|
|
|
// Writes all the data blocks from encoded blocks until requested
|
|
// outSize length. Provides a way to skip bytes until the offset.
|
|
func writeDataBlocks(dst io.Writer, enBlocks [][]byte, dataBlocks int, offset int64, length int64) (int64, error) {
|
|
// Offset and out size cannot be negative.
|
|
if offset < 0 || length < 0 {
|
|
return 0, traceError(errUnexpected)
|
|
}
|
|
|
|
// Do we have enough blocks?
|
|
if len(enBlocks) < dataBlocks {
|
|
return 0, traceError(reedsolomon.ErrTooFewShards)
|
|
}
|
|
|
|
// Do we have enough data?
|
|
if int64(getDataBlockLen(enBlocks, dataBlocks)) < length {
|
|
return 0, traceError(reedsolomon.ErrShortData)
|
|
}
|
|
|
|
// Counter to decrement total left to write.
|
|
write := length
|
|
|
|
// Counter to increment total written.
|
|
var totalWritten int64
|
|
|
|
// Write all data blocks to dst.
|
|
for _, block := range enBlocks[:dataBlocks] {
|
|
// Skip blocks until we have reached our offset.
|
|
if offset >= int64(len(block)) {
|
|
// Decrement offset.
|
|
offset -= int64(len(block))
|
|
continue
|
|
} else {
|
|
// Skip until offset.
|
|
block = block[offset:]
|
|
|
|
// Reset the offset for next iteration to read everything
|
|
// from subsequent blocks.
|
|
offset = 0
|
|
}
|
|
// We have written all the blocks, write the last remaining block.
|
|
if write < int64(len(block)) {
|
|
n, err := io.Copy(dst, bytes.NewReader(block[:write]))
|
|
if err != nil {
|
|
return 0, traceError(err)
|
|
}
|
|
totalWritten += n
|
|
break
|
|
}
|
|
// Copy the block.
|
|
n, err := io.Copy(dst, bytes.NewReader(block))
|
|
if err != nil {
|
|
return 0, traceError(err)
|
|
}
|
|
|
|
// Decrement output size.
|
|
write -= n
|
|
|
|
// Increment written.
|
|
totalWritten += n
|
|
}
|
|
|
|
// Success.
|
|
return totalWritten, nil
|
|
}
|
|
|
|
// chunkSize is roughly BlockSize/DataBlocks.
|
|
// chunkSize is calculated such that chunkSize*DataBlocks accommodates BlockSize bytes.
|
|
// So chunkSize*DataBlocks can be slightly larger than BlockSize if BlockSize is not divisible by
|
|
// DataBlocks. The extra space will have 0-padding.
|
|
func getChunkSize(blockSize int64, dataBlocks int) int64 {
|
|
return (blockSize + int64(dataBlocks) - 1) / int64(dataBlocks)
|
|
}
|
|
|
|
// copyBuffer - copies from disk, volume, path to input writer until either EOF
|
|
// is reached at volume, path or an error occurs. A success copyBuffer returns
|
|
// err == nil, not err == EOF. Because copyBuffer is defined to read from path
|
|
// until EOF. It does not treat an EOF from ReadFile an error to be reported.
|
|
// Additionally copyBuffer stages through the provided buffer; otherwise if it
|
|
// has zero length, returns error.
|
|
func copyBuffer(writer io.Writer, disk StorageAPI, volume string, path string, buf []byte) error {
|
|
// Error condition of zero length buffer.
|
|
if buf != nil && len(buf) == 0 {
|
|
return errors.New("empty buffer in readBuffer")
|
|
}
|
|
|
|
// Starting offset for Reading the file.
|
|
var startOffset int64
|
|
|
|
// Read until io.EOF.
|
|
for {
|
|
n, err := disk.ReadFile(volume, path, startOffset, buf)
|
|
if n > 0 {
|
|
m, wErr := writer.Write(buf[:n])
|
|
if wErr != nil {
|
|
return wErr
|
|
}
|
|
if int64(m) != n {
|
|
return io.ErrShortWrite
|
|
}
|
|
}
|
|
if err != nil {
|
|
if err == io.EOF || err == io.ErrUnexpectedEOF {
|
|
break
|
|
}
|
|
return err
|
|
}
|
|
// Progress the offset.
|
|
startOffset += n
|
|
}
|
|
|
|
// Success.
|
|
return nil
|
|
}
|
|
|
|
// bitRotVerifier - type representing bit-rot verification process for
|
|
// a single under-lying object (currently whole files)
|
|
type bitRotVerifier struct {
|
|
// has the bit-rot verification been done?
|
|
isVerified bool
|
|
// is the data free of bit-rot?
|
|
hasBitRot bool
|
|
// hashing algorithm
|
|
algo HashAlgo
|
|
// hex-encoded expected raw-hash value
|
|
checkSum string
|
|
}
|