Add new ReadFileWithVerify storage-layer API (#4349)

This is an enhancement to the XL/distributed-XL mode. FS mode is
unaffected.

The ReadFileWithVerify storage-layer call is similar to ReadFile with
the additional functionality of performing bit-rot checking. It
accepts additional parameters for a hashing algorithm to use and the
expected hex-encoded hash string.

This patch provides significant performance improvement because:

1. combines the step of reading the file (during
erasure-decoding/reconstruction) with bit-rot verification;

2. limits the number of file-reads; and

3. avoids transferring the file over the network for bit-rot
verification.

ReadFile API is implemented as ReadFileWithVerify with empty hashing
arguments.

Credits to AB and Harsha for the algorithmic improvement.

Fixes #4236.
This commit is contained in:
Aditya Manthramurthy
2017-05-16 14:21:52 -07:00
committed by Harshavardhana
parent cae4683971
commit 8975da4e84
20 changed files with 471 additions and 88 deletions

View File

@@ -29,7 +29,7 @@ import (
)
// newHashWriters - inititialize a slice of hashes for the disk count.
func newHashWriters(diskCount int, algo string) []hash.Hash {
func newHashWriters(diskCount int, algo HashAlgo) []hash.Hash {
hashWriters := make([]hash.Hash, diskCount)
for index := range hashWriters {
hashWriters[index] = newHash(algo)
@@ -38,13 +38,13 @@ func newHashWriters(diskCount int, algo string) []hash.Hash {
}
// newHash - gives you a newly allocated hash depending on the input algorithm.
func newHash(algo string) (h hash.Hash) {
func newHash(algo HashAlgo) (h hash.Hash) {
switch algo {
case sha256Algo:
case HashSha256:
// sha256 checksum specially on ARM64 platforms or whenever
// requested as dictated by `xl.json` entry.
h = sha256.New()
case blake2bAlgo:
case HashBlake2b:
// ignore the error, because New512 without a key never fails
// New512 only returns a non-nil error, if the length of the passed
// key > 64 bytes - but we use blake2b as hash function (no key)
@@ -71,7 +71,7 @@ var hashBufferPool = sync.Pool{
// hashSum calculates the hash of the entire path and returns.
func hashSum(disk StorageAPI, volume, path string, writer hash.Hash) ([]byte, error) {
// Fetch staging a new staging buffer from the pool.
// Fetch a new staging buffer from the pool.
bufp := hashBufferPool.Get().(*[]byte)
defer hashBufferPool.Put(bufp)
@@ -207,3 +207,16 @@ func copyBuffer(writer io.Writer, disk StorageAPI, volume string, path string, b
// Success.
return nil
}
// bitRotVerifier - type representing bit-rot verification process for
// a single under-lying object (currently whole files)
type bitRotVerifier struct {
// has the bit-rot verification been done?
isVerified bool
// is the data free of bit-rot?
hasBitRot bool
// hashing algorithm
algo HashAlgo
// hex-encoded expected raw-hash value
checkSum string
}