mirror of
https://github.com/minio/minio.git
synced 2025-11-22 18:47:43 -05:00
Add new ReadFileWithVerify storage-layer API (#4349)
This is an enhancement to the XL/distributed-XL mode. FS mode is unaffected. The ReadFileWithVerify storage-layer call is similar to ReadFile with the additional functionality of performing bit-rot checking. It accepts additional parameters for a hashing algorithm to use and the expected hex-encoded hash string. This patch provides significant performance improvement because: 1. combines the step of reading the file (during erasure-decoding/reconstruction) with bit-rot verification; 2. limits the number of file-reads; and 3. avoids transferring the file over the network for bit-rot verification. ReadFile API is implemented as ReadFileWithVerify with empty hashing arguments. Credits to AB and Harsha for the algorithmic improvement. Fixes #4236.
This commit is contained in:
committed by
Harshavardhana
parent
cae4683971
commit
8975da4e84
86
cmd/posix.go
86
cmd/posix.go
@@ -18,6 +18,8 @@ package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/hex"
|
||||
"hash"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
@@ -512,11 +514,30 @@ func (s *posix) ReadAll(volume, path string) (buf []byte, err error) {
|
||||
// number of bytes copied. The error is EOF only if no bytes were
|
||||
// read. On return, n == len(buf) if and only if err == nil. n == 0
|
||||
// for io.EOF.
|
||||
//
|
||||
// If an EOF happens after reading some but not all the bytes,
|
||||
// ReadFull returns ErrUnexpectedEOF.
|
||||
// Additionally ReadFile also starts reading from an offset.
|
||||
// ReadFile symantics are same as io.ReadFull
|
||||
func (s *posix) ReadFile(volume string, path string, offset int64, buf []byte) (n int64, err error) {
|
||||
//
|
||||
// Additionally ReadFile also starts reading from an offset. ReadFile
|
||||
// semantics are same as io.ReadFull.
|
||||
func (s *posix) ReadFile(volume, path string, offset int64, buf []byte) (n int64, err error) {
|
||||
|
||||
return s.ReadFileWithVerify(volume, path, offset, buf, "", "")
|
||||
}
|
||||
|
||||
// ReadFileWithVerify is the same as ReadFile but with hashsum
|
||||
// verification: the operation will fail if the hash verification
|
||||
// fails.
|
||||
//
|
||||
// The `expectedHash` is the expected hex-encoded hash string for
|
||||
// verification. With an empty expected hash string, hash verification
|
||||
// is skipped. An empty HashAlgo defaults to `blake2b`.
|
||||
//
|
||||
// The function takes care to minimize the number of disk read
|
||||
// operations.
|
||||
func (s *posix) ReadFileWithVerify(volume, path string, offset int64, buf []byte,
|
||||
algo HashAlgo, expectedHash string) (n int64, err error) {
|
||||
|
||||
defer func() {
|
||||
if err == syscall.EIO {
|
||||
atomic.AddInt32(&s.ioErrCount, 1)
|
||||
@@ -571,19 +592,66 @@ func (s *posix) ReadFile(volume string, path string, offset int64, buf []byte) (
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Verify if its not a regular file, since subsequent Seek is undefined.
|
||||
// Verify it is a regular file, otherwise subsequent Seek is
|
||||
// undefined.
|
||||
if !st.Mode().IsRegular() {
|
||||
return 0, errIsNotRegular
|
||||
}
|
||||
|
||||
// Seek to requested offset.
|
||||
_, err = file.Seek(offset, os.SEEK_SET)
|
||||
if err != nil {
|
||||
// If expected hash string is empty hash verification is
|
||||
// skipped.
|
||||
needToHash := expectedHash != ""
|
||||
var hasher hash.Hash
|
||||
|
||||
if needToHash {
|
||||
// If the hashing algo is invalid, return an error.
|
||||
if !isValidHashAlgo(algo) {
|
||||
return 0, errBitrotHashAlgoInvalid
|
||||
}
|
||||
|
||||
// Compute hash of object from start to the byte at
|
||||
// (offset - 1), and as a result of this read, seek to
|
||||
// `offset`.
|
||||
hasher = newHash(algo)
|
||||
if offset > 0 {
|
||||
_, err = io.CopyN(hasher, file, offset)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Seek to requested offset.
|
||||
_, err = file.Seek(offset, os.SEEK_SET)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
// Read until buffer is full.
|
||||
m, err := io.ReadFull(file, buf)
|
||||
if err == io.EOF {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Read full until buffer.
|
||||
m, err := io.ReadFull(file, buf)
|
||||
if needToHash {
|
||||
// Continue computing hash with buf.
|
||||
_, err = hasher.Write(buf)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Continue computing hash until end of file.
|
||||
_, err = io.Copy(hasher, file)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Verify the computed hash.
|
||||
computedHash := hex.EncodeToString(hasher.Sum(nil))
|
||||
if computedHash != expectedHash {
|
||||
return 0, hashMismatchError{expectedHash, computedHash}
|
||||
}
|
||||
}
|
||||
|
||||
// Success.
|
||||
return int64(m), err
|
||||
|
||||
Reference in New Issue
Block a user