From 58d90ed73c011468b712faa53fa12142fa5b6e0d Mon Sep 17 00:00:00 2001 From: Krishna Srinivas <634494+krishnasrinivas@users.noreply.github.com> Date: Mon, 8 Jul 2019 13:51:18 -0700 Subject: [PATCH] Avoid network transfer for bitrot verification during healing (#7375) --- cmd/bitrot-streaming.go | 2 +- cmd/bitrot.go | 30 ----------- cmd/naughty-disk_test.go | 7 +++ cmd/posix.go | 103 +++++++++++++++++++++++++++++++++++- cmd/posix_test.go | 101 +++++++++++++++++++++++++++++++++-- cmd/server-main.go | 2 + cmd/storage-errors.go | 15 +++--- cmd/storage-interface.go | 1 + cmd/storage-rest-client.go | 36 ++++++++++++- cmd/storage-rest-common.go | 3 +- cmd/storage-rest-server.go | 61 +++++++++++++++++++++ cmd/xl-v1-healing-common.go | 3 +- cmd/xl-v1-healing.go | 4 +- 13 files changed, 317 insertions(+), 51 deletions(-) diff --git a/cmd/bitrot-streaming.go b/cmd/bitrot-streaming.go index 2dc7b707c..8faa45da2 100644 --- a/cmd/bitrot-streaming.go +++ b/cmd/bitrot-streaming.go @@ -131,7 +131,7 @@ func (b *streamingBitrotReader) ReadAt(buf []byte, offset int64) (int, error) { b.h.Write(buf) if !bytes.Equal(b.h.Sum(nil), b.hashBytes) { - err = hashMismatchError{hex.EncodeToString(b.hashBytes), hex.EncodeToString(b.h.Sum(nil))} + err = HashMismatchError{hex.EncodeToString(b.hashBytes), hex.EncodeToString(b.h.Sum(nil))} logger.LogIf(context.Background(), err) return 0, err } diff --git a/cmd/bitrot.go b/cmd/bitrot.go index 14a7bde4a..42b07e2e0 100644 --- a/cmd/bitrot.go +++ b/cmd/bitrot.go @@ -155,33 +155,3 @@ func bitrotWriterSum(w io.Writer) []byte { } return nil } - -// Verify if a file has bitrot error. -func bitrotCheckFile(disk StorageAPI, volume string, filePath string, tillOffset int64, algo BitrotAlgorithm, sum []byte, shardSize int64) (err error) { - if algo != HighwayHash256S { - buf := []byte{} - // For whole-file bitrot we don't need to read the entire file as the bitrot verify happens on the server side even if we read 0-bytes. - _, err = disk.ReadFile(volume, filePath, 0, buf, NewBitrotVerifier(algo, sum)) - return err - } - buf := make([]byte, shardSize) - r := newStreamingBitrotReader(disk, volume, filePath, tillOffset, algo, shardSize) - defer closeBitrotReaders([]io.ReaderAt{r}) - var offset int64 - for { - if offset == tillOffset { - break - } - var n int - tmpBuf := buf - if int64(len(tmpBuf)) > (tillOffset - offset) { - tmpBuf = tmpBuf[:(tillOffset - offset)] - } - n, err = r.ReadAt(tmpBuf, offset) - if err != nil { - return err - } - offset += int64(n) - } - return nil -} diff --git a/cmd/naughty-disk_test.go b/cmd/naughty-disk_test.go index 2a6f1f1cc..cf1725ac3 100644 --- a/cmd/naughty-disk_test.go +++ b/cmd/naughty-disk_test.go @@ -195,3 +195,10 @@ func (d *naughtyDisk) ReadAll(volume string, path string) (buf []byte, err error } return d.disk.ReadAll(volume, path) } + +func (d *naughtyDisk) VerifyFile(volume, path string, algo BitrotAlgorithm, sum []byte, shardSize int64) error { + if err := d.calcError(); err != nil { + return err + } + return d.disk.VerifyFile(volume, path, algo, sum, shardSize) +} diff --git a/cmd/posix.go b/cmd/posix.go index 78ae3afce..8975ae7d3 100644 --- a/cmd/posix.go +++ b/cmd/posix.go @@ -958,7 +958,7 @@ func (s *posix) ReadFile(volume, path string, offset int64, buffer []byte, verif } if !bytes.Equal(h.Sum(nil), verifier.sum) { - return 0, hashMismatchError{hex.EncodeToString(verifier.sum), hex.EncodeToString(h.Sum(nil))} + return 0, HashMismatchError{hex.EncodeToString(verifier.sum), hex.EncodeToString(h.Sum(nil))} } return int64(len(buffer)), nil @@ -1525,3 +1525,104 @@ func (s *posix) RenameFile(srcVolume, srcPath, dstVolume, dstPath string) (err e return nil } + +func (s *posix) VerifyFile(volume, path string, algo BitrotAlgorithm, sum []byte, shardSize int64) (err error) { + defer func() { + if err == errFaultyDisk { + atomic.AddInt32(&s.ioErrCount, 1) + } + }() + + if atomic.LoadInt32(&s.ioErrCount) > maxAllowedIOError { + return errFaultyDisk + } + + if err = s.checkDiskFound(); err != nil { + return err + } + + volumeDir, err := s.getVolDir(volume) + if err != nil { + return err + } + // Stat a volume entry. + _, err = os.Stat((volumeDir)) + if err != nil { + if os.IsNotExist(err) { + return errVolumeNotFound + } + return err + } + + // Validate effective path length before reading. + filePath := pathJoin(volumeDir, path) + if err = checkPathLength((filePath)); err != nil { + return err + } + + // Open the file for reading. + file, err := os.Open((filePath)) + if err != nil { + switch { + case os.IsNotExist(err): + return errFileNotFound + case os.IsPermission(err): + return errFileAccessDenied + case isSysErrNotDir(err): + return errFileAccessDenied + case isSysErrIO(err): + return errFaultyDisk + default: + return err + } + } + + // Close the file descriptor. + defer file.Close() + + if algo != HighwayHash256S { + bufp := s.pool.Get().(*[]byte) + defer s.pool.Put(bufp) + + h := algo.New() + if _, err = io.CopyBuffer(h, file, *bufp); err != nil { + return err + } + if !bytes.Equal(h.Sum(nil), sum) { + return HashMismatchError{hex.EncodeToString(sum), hex.EncodeToString(h.Sum(nil))} + } + return nil + } + + buf := make([]byte, shardSize) + h := algo.New() + hashBuf := make([]byte, h.Size()) + fi, err := file.Stat() + if err != nil { + return err + } + size := fi.Size() + for { + if size == 0 { + return nil + } + h.Reset() + n, err := file.Read(hashBuf) + if err != nil { + return err + } + size -= int64(n) + if size < int64(len(buf)) { + buf = buf[:size] + } + n, err = file.Read(buf) + if err != nil { + return err + } + size -= int64(n) + h.Write(buf) + if !bytes.Equal(h.Sum(nil), hashBuf) { + return HashMismatchError{hex.EncodeToString(hashBuf), hex.EncodeToString(h.Sum(nil))} + } + } +} diff --git a/cmd/posix_test.go b/cmd/posix_test.go index 89933a4ec..16148faa5 100644 --- a/cmd/posix_test.go +++ b/cmd/posix_test.go @@ -1251,13 +1251,13 @@ var posixReadFileWithVerifyTests = []struct { {file: "myobject", offset: 25, length: 74, algorithm: SHA256, expError: nil}, // 1 {file: "myobject", offset: 29, length: 70, algorithm: SHA256, expError: nil}, // 2 {file: "myobject", offset: 100, length: 0, algorithm: SHA256, expError: nil}, // 3 - {file: "myobject", offset: 1, length: 120, algorithm: SHA256, expError: hashMismatchError{}}, // 4 + {file: "myobject", offset: 1, length: 120, algorithm: SHA256, expError: HashMismatchError{}}, // 4 {file: "myobject", offset: 3, length: 1100, algorithm: SHA256, expError: nil}, // 5 - {file: "myobject", offset: 2, length: 100, algorithm: SHA256, expError: hashMismatchError{}}, // 6 + {file: "myobject", offset: 2, length: 100, algorithm: SHA256, expError: HashMismatchError{}}, // 6 {file: "myobject", offset: 1000, length: 1001, algorithm: SHA256, expError: nil}, // 7 - {file: "myobject", offset: 0, length: 100, algorithm: BLAKE2b512, expError: hashMismatchError{}}, // 8 + {file: "myobject", offset: 0, length: 100, algorithm: BLAKE2b512, expError: HashMismatchError{}}, // 8 {file: "myobject", offset: 25, length: 74, algorithm: BLAKE2b512, expError: nil}, // 9 - {file: "myobject", offset: 29, length: 70, algorithm: BLAKE2b512, expError: hashMismatchError{}}, // 10 + {file: "myobject", offset: 29, length: 70, algorithm: BLAKE2b512, expError: HashMismatchError{}}, // 10 {file: "myobject", offset: 100, length: 0, algorithm: BLAKE2b512, expError: nil}, // 11 {file: "myobject", offset: 1, length: 120, algorithm: BLAKE2b512, expError: nil}, // 12 {file: "myobject", offset: 3, length: 1100, algorithm: BLAKE2b512, expError: nil}, // 13 @@ -1296,7 +1296,7 @@ func TestPosixReadFileWithVerify(t *testing.T) { if test.expError != nil { expected := h.Sum(nil) h.Write([]byte{0}) - test.expError = hashMismatchError{hex.EncodeToString(h.Sum(nil)), hex.EncodeToString(expected)} + test.expError = HashMismatchError{hex.EncodeToString(h.Sum(nil)), hex.EncodeToString(expected)} } buffer := make([]byte, test.length) @@ -1763,6 +1763,97 @@ func TestPosixStatFile(t *testing.T) { } } +// Test posix.VerifyFile() +func TestPosixVerifyFile(t *testing.T) { + // We test 4 cases: + // 1) Whole-file bitrot check on proper file + // 2) Whole-file bitrot check on corrupted file + // 3) Streaming bitrot check on proper file + // 4) Streaming bitrot check on corrupted file + + // create posix test setup + posixStorage, path, err := newPosixTestSetup() + if err != nil { + t.Fatalf("Unable to create posix test setup, %s", err) + } + defer os.RemoveAll(path) + + volName := "testvol" + fileName := "testfile" + if err := posixStorage.MakeVol(volName); err != nil { + t.Fatal(err) + } + + // 1) Whole-file bitrot check on proper file + size := int64(4*1024*1024 + 100*1024) // 4.1 MB + data := make([]byte, size) + if _, err := rand.Read(data); err != nil { + t.Fatal(err) + } + algo := HighwayHash256 + h := algo.New() + h.Write(data) + hashBytes := h.Sum(nil) + if err := posixStorage.WriteAll(volName, fileName, bytes.NewBuffer(data)); err != nil { + t.Fatal(err) + } + if err := posixStorage.VerifyFile(volName, fileName, algo, hashBytes, 0); err != nil { + t.Fatal(err) + } + + // 2) Whole-file bitrot check on corrupted file + if err := posixStorage.AppendFile(volName, fileName, []byte("a")); err != nil { + t.Fatal(err) + } + if err := posixStorage.VerifyFile(volName, fileName, algo, hashBytes, 0); err == nil { + t.Fatal("expected to fail bitrot check") + } + + if err := posixStorage.DeleteFile(volName, fileName); err != nil { + t.Fatal(err) + } + + // 3) Streaming bitrot check on proper file + algo = HighwayHash256S + shardSize := int64(1024 * 1024) + shard := make([]byte, shardSize) + w := newStreamingBitrotWriter(posixStorage, volName, fileName, size, algo, shardSize) + reader := bytes.NewReader(data) + for { + // Using io.CopyBuffer instead of this loop will not work for us as io.CopyBuffer + // will use bytes.Buffer.ReadFrom() which will not do shardSize'ed writes causing error. + n, err := reader.Read(shard) + w.Write(shard[:n]) + if err == nil { + continue + } + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + } + w.Close() + if err := posixStorage.VerifyFile(volName, fileName, algo, nil, shardSize); err != nil { + t.Fatal(err) + } + + // 4) Streaming bitrot check on corrupted file + filePath := pathJoin(posixStorage.String(), volName, fileName) + f, err := os.OpenFile(filePath, os.O_WRONLY, 0644) + if err != nil { + t.Fatal(err) + } + if _, err := f.WriteString("a"); err != nil { + t.Fatal(err) + } + f.Close() + if err := posixStorage.VerifyFile(volName, fileName, algo, nil, shardSize); err == nil { + t.Fatal("expected to fail bitrot check") + } +} + // Checks for restrictions for min total disk space and inodes. func TestCheckDiskTotalMin(t *testing.T) { testCases := []struct { diff --git a/cmd/server-main.go b/cmd/server-main.go index 1989025da..1afe16a3e 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -18,6 +18,7 @@ package cmd import ( "context" + "encoding/gob" "errors" "fmt" "net/http" @@ -36,6 +37,7 @@ import ( func init() { logger.Init(GOPATH, GOROOT) logger.RegisterUIError(fmtError) + gob.Register(HashMismatchError{}) } // ServerFlags - server command specific flags diff --git a/cmd/storage-errors.go b/cmd/storage-errors.go index 3537af961..ced8a63e4 100644 --- a/cmd/storage-errors.go +++ b/cmd/storage-errors.go @@ -91,18 +91,17 @@ var errLessData = errors.New("less data available than what was requested") // errMoreData = returned when more data was sent by the caller than what it was supposed to. var errMoreData = errors.New("more data was sent than what was advertised") -// hashMisMatchError - represents a bit-rot hash verification failure -// error. -type hashMismatchError struct { - expected string - computed string +// HashMismatchError represents a bit-rot hash verification failure error. +type HashMismatchError struct { + Expected string + Computed string } -// error method for the hashMismatchError -func (h hashMismatchError) Error() string { +// Error method for the hashMismatchError +func (h HashMismatchError) Error() string { return fmt.Sprintf( "Bitrot verification mismatch - expected %v, received %v", - h.expected, h.computed) + h.Expected, h.Computed) } // Collection of basic errors. diff --git a/cmd/storage-interface.go b/cmd/storage-interface.go index 74197f3de..4fbdca226 100644 --- a/cmd/storage-interface.go +++ b/cmd/storage-interface.go @@ -52,6 +52,7 @@ type StorageAPI interface { StatFile(volume string, path string) (file FileInfo, err error) DeleteFile(volume string, path string) (err error) DeleteFileBulk(volume string, paths []string) (errs []error, err error) + VerifyFile(volume, path string, algo BitrotAlgorithm, sum []byte, shardSize int64) error // Write all data, syncs the data to disk. WriteAll(volume string, path string, reader io.Reader) (err error) diff --git a/cmd/storage-rest-client.go b/cmd/storage-rest-client.go index 46ed4fe49..11079bf7e 100644 --- a/cmd/storage-rest-client.go +++ b/cmd/storage-rest-client.go @@ -17,6 +17,7 @@ package cmd import ( + "bufio" "bytes" "crypto/tls" "io" @@ -105,7 +106,7 @@ func toStorageErr(err error) error { fmt.Sscanf(err.Error(), "Bitrot verification mismatch - expected %s received %s", &expected, &received) // Go's Sscanf %s scans "," that comes after the expected hash, hence remove it. Providing "," in the format string does not help. expected = strings.TrimSuffix(expected, ",") - bitrotErr := hashMismatchError{expected, received} + bitrotErr := HashMismatchError{expected, received} return bitrotErr } return err @@ -433,6 +434,39 @@ func (client *storageRESTClient) getInstanceID() (err error) { return nil } +func (client *storageRESTClient) VerifyFile(volume, path string, algo BitrotAlgorithm, sum []byte, shardSize int64) error { + values := make(url.Values) + values.Set(storageRESTVolume, volume) + values.Set(storageRESTFilePath, path) + values.Set(storageRESTBitrotAlgo, algo.String()) + values.Set(storageRESTLength, strconv.Itoa(int(shardSize))) + if len(sum) != 0 { + values.Set(storageRESTBitrotHash, hex.EncodeToString(sum)) + } + respBody, err := client.call(storageRESTMethodVerifyFile, values, nil, -1) + defer http.DrainBody(respBody) + if err != nil { + return err + } + reader := bufio.NewReader(respBody) + for { + b, err := reader.ReadByte() + if err != nil { + return err + } + if b != ' ' { + reader.UnreadByte() + break + } + } + verifyResp := &VerifyFileResp{} + err = gob.NewDecoder(reader).Decode(verifyResp) + if err != nil { + return err + } + return toStorageErr(verifyResp.Err) +} + // Close - marks the client as closed. func (client *storageRESTClient) Close() error { client.connected = false diff --git a/cmd/storage-rest-common.go b/cmd/storage-rest-common.go index ca64d4a93..5f687f606 100644 --- a/cmd/storage-rest-common.go +++ b/cmd/storage-rest-common.go @@ -16,7 +16,7 @@ package cmd -const storageRESTVersion = "v6" +const storageRESTVersion = "v7" const storageRESTPath = minioReservedBucketPath + "/storage/" + storageRESTVersion + "/" const ( @@ -38,6 +38,7 @@ const ( storageRESTMethodDeleteFile = "deletefile" storageRESTMethodDeleteFileBulk = "deletefilebulk" storageRESTMethodRenameFile = "renamefile" + storageRESTMethodVerifyFile = "verifyfile" storageRESTMethodGetInstanceID = "getinstanceid" ) diff --git a/cmd/storage-rest-server.go b/cmd/storage-rest-server.go index 2c61b050b..fd29e0a02 100644 --- a/cmd/storage-rest-server.go +++ b/cmd/storage-rest-server.go @@ -487,6 +487,65 @@ func (s *storageRESTServer) RenameFileHandler(w http.ResponseWriter, r *http.Req } } +// Send whitespace to the client to avoid timeouts as bitrot verification can take time on spinning/slow disks. +func sendWhiteSpaceVerifyFile(w http.ResponseWriter) <-chan struct{} { + doneCh := make(chan struct{}) + go func() { + ticker := time.NewTicker(time.Second * 10) + for { + select { + case <-ticker.C: + w.Write([]byte(" ")) + w.(http.Flusher).Flush() + case doneCh <- struct{}{}: + ticker.Stop() + return + } + } + + }() + return doneCh +} + +// VerifyFileResp - VerifyFile()'s response. +type VerifyFileResp struct { + Err error +} + +// VerifyFile - Verify the file for bitrot errors. +func (s *storageRESTServer) VerifyFile(w http.ResponseWriter, r *http.Request) { + if !s.IsValid(w, r) { + return + } + vars := mux.Vars(r) + volume := vars[storageRESTVolume] + filePath := vars[storageRESTFilePath] + shardSize, err := strconv.Atoi(vars[storageRESTLength]) + if err != nil { + s.writeErrorResponse(w, err) + return + } + hashStr := vars[storageRESTBitrotHash] + var hash []byte + if hashStr != "" { + hash, err = hex.DecodeString(hashStr) + if err != nil { + s.writeErrorResponse(w, err) + return + } + } + algoStr := vars[storageRESTBitrotAlgo] + if algoStr == "" { + s.writeErrorResponse(w, errInvalidArgument) + return + } + algo := BitrotAlgorithmFromString(algoStr) + doneCh := sendWhiteSpaceVerifyFile(w) + err = s.storage.VerifyFile(volume, filePath, algo, hash, int64(shardSize)) + <-doneCh + gob.NewEncoder(w).Encode(VerifyFileResp{err}) +} + // registerStorageRPCRouter - register storage rpc router. func registerStorageRESTHandlers(router *mux.Router, endpoints EndpointList) { for _, endpoint := range endpoints { @@ -534,6 +593,8 @@ func registerStorageRESTHandlers(router *mux.Router, endpoints EndpointList) { subrouter.Methods(http.MethodPost).Path("/" + storageRESTMethodRenameFile).HandlerFunc(httpTraceHdrs(server.RenameFileHandler)). Queries(restQueries(storageRESTSrcVolume, storageRESTSrcPath, storageRESTDstVolume, storageRESTDstPath)...) + subrouter.Methods(http.MethodPost).Path("/" + storageRESTMethodVerifyFile).HandlerFunc(httpTraceHdrs(server.VerifyFile)). + Queries(restQueries(storageRESTVolume, storageRESTFilePath, storageRESTBitrotAlgo, storageRESTLength)...) subrouter.Methods(http.MethodPost).Path("/" + storageRESTMethodGetInstanceID).HandlerFunc(httpTraceAll(server.GetInstanceID)) } diff --git a/cmd/xl-v1-healing-common.go b/cmd/xl-v1-healing-common.go index 19dffe1ef..7a1470621 100644 --- a/cmd/xl-v1-healing-common.go +++ b/cmd/xl-v1-healing-common.go @@ -183,8 +183,7 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad // it needs healing too. for _, part := range partsMetadata[i].Parts { checksumInfo := erasureInfo.GetChecksumInfo(part.Name) - tillOffset := erasure.ShardFileTillOffset(0, part.Size, part.Size) - err = bitrotCheckFile(onlineDisk, bucket, pathJoin(object, part.Name), tillOffset, checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize()) + err = onlineDisk.VerifyFile(bucket, pathJoin(object, part.Name), checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize()) if err != nil { isCorrupt := strings.HasPrefix(err.Error(), "Bitrot verification mismatch - expected ") if !isCorrupt && err != errFileNotFound && err != errVolumeNotFound { diff --git a/cmd/xl-v1-healing.go b/cmd/xl-v1-healing.go index f99dddbc9..622761e70 100644 --- a/cmd/xl-v1-healing.go +++ b/cmd/xl-v1-healing.go @@ -195,7 +195,7 @@ func shouldHealObjectOnDisk(xlErr, dataErr error, meta xlMetaV1, quorumModTime t if dataErr == errFileNotFound { return true } - if _, ok := dataErr.(hashMismatchError); ok { + if _, ok := dataErr.(HashMismatchError); ok { return true } if quorumModTime != meta.Stat.ModTime { @@ -351,7 +351,7 @@ func (xl xlObjects) healObject(ctx context.Context, bucket string, object string } // Reorder so that we have data disks first and parity disks next. - latestDisks = shuffleDisks(latestDisks, latestMeta.Erasure.Distribution) + latestDisks = shuffleDisks(availableDisks, latestMeta.Erasure.Distribution) outDatedDisks = shuffleDisks(outDatedDisks, latestMeta.Erasure.Distribution) partsMetadata = shufflePartsMetadata(partsMetadata, latestMeta.Erasure.Distribution) for i := range outDatedDisks {