minio/cmd/erasure-healfile.go

/*
 * Minio Cloud Storage, (C) 2016 Minio, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cmd

import (
	"hash"
)

// HealFile tries to reconstruct an erasure-coded file spread over all
// available disks. HealFile will read the valid parts of the file,
// reconstruct the missing data and write the reconstructed parts back
// to `staleDisks`.
//
// `staleDisks` is a slice of disks where each non-nil entry has stale
// or no data, and so will be healed.
//
// It is required that `s.disks` have a (read-quorum) majority of
// disks with valid data for healing to work.
//
// In addition, `staleDisks` and `s.disks` must have the same ordering
// of disks w.r.t. erasure coding of the object.
//
// The function will try to read the valid parts from the file under
// the given volume and path and tries to reconstruct the file under
// the given healVolume and healPath (on staleDisks). The given
// algorithm will be used to verify the valid parts and to protect the
// reconstructed file.
//
// It returns bitrot checksums for the non-nil staleDisks.
func (s ErasureStorage) HealFile(staleDisks []StorageAPI, volume, path string,
	blocksize int64, healVolume, healPath string, size int64,
	algorithm BitrotAlgorithm, checksums [][]byte) (f ErasureFileInfo,
	err error) {

	if !algorithm.Available() {
		return f, traceError(errBitrotHashAlgoInvalid)
	}

	// Initialization
	f.Checksums = make([][]byte, len(s.disks))
	hashers := make([]hash.Hash, len(s.disks))
	verifiers := make([]*BitrotVerifier, len(s.disks))
	for i, disk := range s.disks {
		switch {
		case staleDisks[i] != nil:
			hashers[i] = algorithm.New()
		case disk == nil:
			// disregard unavailable disk
			continue
		default:
			verifiers[i] = NewBitrotVerifier(algorithm, checksums[i])
			f.Checksums[i] = checksums[i]
		}
	}

	// Scan part files on disk, block-by-block reconstruct it and
	// write to stale disks.
	chunksize := getChunkSize(blocksize, s.dataBlocks)
	var chunkOffset, blockOffset int64
	for ; blockOffset < size; blockOffset += blocksize {
		// last iteration may have less than blocksize data
		// left, so chunksize needs to be recomputed.
		if size < blockOffset+blocksize {
			blocksize = size - blockOffset
			chunksize = getChunkSize(blocksize, s.dataBlocks)
		}

		// read a chunk from each disk, until we have
		// `s.dataBlocks` number of chunks set to non-nil in
		// `blocks`
		blocks := make([][]byte, len(s.disks))
		var buffer []byte
		numReads := 0
		for i, disk := range s.disks {
			// skip reading from unavailable or stale disks
			if disk == nil || staleDisks[i] != nil {
				continue
			}
			// allocate buffer only when needed - when
			// reads fail, the buffer can be reused
			if int64(len(buffer)) != chunksize {
				buffer = make([]byte, chunksize)
			}
			_, err = disk.ReadFile(volume, path, chunkOffset, buffer, verifiers[i])
			if err != nil {
				// LOG FIXME: add a conditional log
				// for read failures, once per-disk
				// per-function-invocation.
				continue
			}

			// read was successful, so set the buffer as
			// blocks[i], and reset buffer to nil to force
			// allocation on next iteration
			blocks[i], buffer = buffer, nil

			numReads++
			if numReads == s.dataBlocks {
				// we have enough data to reconstruct
				break
			}
		}

		// advance the chunk offset to prepare for next loop
		// iteration
		chunkOffset += chunksize

		// reconstruct data - this computes all data and parity shards
		if err = s.ErasureDecodeDataAndParityBlocks(blocks); err != nil {
			return f, err
		}

		// write computed shards as chunks on file in each
		// stale disk
		for i, disk := range staleDisks {
			if disk == nil {
				continue
			}

			err = disk.AppendFile(healVolume, healPath, blocks[i])
			if err != nil {
				return f, traceError(err)
			}
			hashers[i].Write(blocks[i])
		}
	}

	// copy computed file hashes into output variable
	f.Size = size
	f.Algorithm = algorithm
	for i, disk := range staleDisks {
		if disk == nil {
			continue
		}
		f.Checksums[i] = hashers[i].Sum(nil)
	}
	return f, nil
}