2021-04-18 15:41:13 -04:00
|
|
|
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
|
|
//
|
|
|
|
// This file is part of MinIO Object Storage stack
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2016-05-30 19:51:59 -04:00
|
|
|
|
2016-08-18 19:23:42 -04:00
|
|
|
package cmd
|
2016-05-25 19:42:31 -04:00
|
|
|
|
|
|
|
import (
|
2018-04-05 18:04:40 -04:00
|
|
|
"context"
|
2017-01-31 18:34:49 -05:00
|
|
|
"errors"
|
2021-03-18 23:16:50 -04:00
|
|
|
"fmt"
|
2016-07-08 10:33:21 -04:00
|
|
|
"hash/crc32"
|
2016-09-09 01:38:18 -04:00
|
|
|
|
2021-06-01 17:59:40 -04:00
|
|
|
"github.com/minio/minio/internal/logger"
|
|
|
|
"github.com/minio/minio/internal/sync/errgroup"
|
2016-05-25 19:42:31 -04:00
|
|
|
)
|
|
|
|
|
2016-07-13 14:56:25 -04:00
|
|
|
// Returns number of errors that occurred the most (incl. nil) and the
|
2017-08-14 21:09:29 -04:00
|
|
|
// corresponding error value. NB When there is more than one error value that
|
2016-07-13 14:56:25 -04:00
|
|
|
// occurs maximum number of times, the error value returned depends on how
|
|
|
|
// golang's map orders keys. This doesn't affect correctness as long as quorum
|
|
|
|
// value is greater than or equal to simple majority, since none of the equally
|
|
|
|
// maximal values would occur quorum or more number of times.
|
2016-11-21 04:47:26 -05:00
|
|
|
func reduceErrs(errs []error, ignoredErrs []error) (maxCount int, maxErr error) {
|
2016-07-13 14:56:25 -04:00
|
|
|
errorCounts := make(map[error]int)
|
2016-07-09 16:01:32 -04:00
|
|
|
for _, err := range errs {
|
2018-04-10 12:36:37 -04:00
|
|
|
if IsErrIgnored(err, ignoredErrs...) {
|
2016-07-19 22:24:32 -04:00
|
|
|
continue
|
|
|
|
}
|
2021-11-16 18:26:48 -05:00
|
|
|
// Errors due to context cancelation may be wrapped - group them by context.Canceled.
|
|
|
|
if errors.Is(err, context.Canceled) {
|
|
|
|
errorCounts[context.Canceled]++
|
|
|
|
continue
|
|
|
|
}
|
2016-07-13 14:56:25 -04:00
|
|
|
errorCounts[err]++
|
2016-07-09 16:01:32 -04:00
|
|
|
}
|
2017-03-22 13:15:16 -04:00
|
|
|
|
2016-07-09 16:01:32 -04:00
|
|
|
max := 0
|
2016-07-13 14:56:25 -04:00
|
|
|
for err, count := range errorCounts {
|
2017-03-22 13:15:16 -04:00
|
|
|
switch {
|
|
|
|
case max < count:
|
2016-07-13 14:56:25 -04:00
|
|
|
max = count
|
2016-11-21 04:47:26 -05:00
|
|
|
maxErr = err
|
2017-03-22 13:15:16 -04:00
|
|
|
|
|
|
|
// Prefer `nil` over other error values with the same
|
|
|
|
// number of occurrences.
|
|
|
|
case max == count && err == nil:
|
|
|
|
maxErr = err
|
2016-07-09 16:01:32 -04:00
|
|
|
}
|
|
|
|
}
|
2016-11-21 04:47:26 -05:00
|
|
|
return max, maxErr
|
|
|
|
}
|
|
|
|
|
|
|
|
// reduceQuorumErrs behaves like reduceErrs by only for returning
|
|
|
|
// values of maximally occurring errors validated against a generic
|
2017-08-14 21:09:29 -04:00
|
|
|
// quorum number that can be read or write quorum depending on usage.
|
2018-04-05 18:04:40 -04:00
|
|
|
func reduceQuorumErrs(ctx context.Context, errs []error, ignoredErrs []error, quorum int, quorumErr error) error {
|
2022-02-23 14:59:13 -05:00
|
|
|
if contextCanceled(ctx) {
|
|
|
|
return context.Canceled
|
|
|
|
}
|
2018-04-05 18:04:40 -04:00
|
|
|
maxCount, maxErr := reduceErrs(errs, ignoredErrs)
|
|
|
|
if maxCount >= quorum {
|
|
|
|
return maxErr
|
2016-11-21 04:47:26 -05:00
|
|
|
}
|
2018-04-05 18:04:40 -04:00
|
|
|
return quorumErr
|
2016-11-21 04:47:26 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// reduceReadQuorumErrs behaves like reduceErrs but only for returning
|
|
|
|
// values of maximally occurring errors validated against readQuorum.
|
2018-04-05 18:04:40 -04:00
|
|
|
func reduceReadQuorumErrs(ctx context.Context, errs []error, ignoredErrs []error, readQuorum int) (maxErr error) {
|
2020-06-12 23:04:01 -04:00
|
|
|
return reduceQuorumErrs(ctx, errs, ignoredErrs, readQuorum, errErasureReadQuorum)
|
2016-11-21 04:47:26 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// reduceWriteQuorumErrs behaves like reduceErrs but only for returning
|
|
|
|
// values of maximally occurring errors validated against writeQuorum.
|
2018-04-05 18:04:40 -04:00
|
|
|
func reduceWriteQuorumErrs(ctx context.Context, errs []error, ignoredErrs []error, writeQuorum int) (maxErr error) {
|
2020-06-12 23:04:01 -04:00
|
|
|
return reduceQuorumErrs(ctx, errs, ignoredErrs, writeQuorum, errErasureWriteQuorum)
|
2016-07-09 16:01:32 -04:00
|
|
|
}
|
|
|
|
|
2017-08-14 21:09:29 -04:00
|
|
|
// Similar to 'len(slice)' but returns the actual elements count
|
2016-06-17 14:57:51 -04:00
|
|
|
// skipping the unallocated elements.
|
|
|
|
func diskCount(disks []StorageAPI) int {
|
|
|
|
diskCount := 0
|
|
|
|
for _, disk := range disks {
|
|
|
|
if disk == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
diskCount++
|
|
|
|
}
|
|
|
|
return diskCount
|
|
|
|
}
|
|
|
|
|
2017-08-14 21:09:29 -04:00
|
|
|
// hashOrder - hashes input key to return consistent
|
2016-07-21 22:07:00 -04:00
|
|
|
// hashed integer slice. Returned integer order is salted
|
|
|
|
// with an input key. This results in consistent order.
|
|
|
|
// NOTE: collisions are fine, we are not looking for uniqueness
|
|
|
|
// in the slices returned.
|
|
|
|
func hashOrder(key string, cardinality int) []int {
|
2018-02-15 20:45:57 -05:00
|
|
|
if cardinality <= 0 {
|
|
|
|
// Returns an empty int slice for cardinality < 0.
|
2016-07-21 22:07:00 -04:00
|
|
|
return nil
|
2016-05-25 19:42:31 -04:00
|
|
|
}
|
2018-02-15 20:45:57 -05:00
|
|
|
|
2016-07-21 22:07:00 -04:00
|
|
|
nums := make([]int, cardinality)
|
|
|
|
keyCrc := crc32.Checksum([]byte(key), crc32.IEEETable)
|
2016-07-08 10:33:21 -04:00
|
|
|
|
2017-11-30 15:57:03 -05:00
|
|
|
start := int(keyCrc % uint32(cardinality))
|
2016-07-21 22:07:00 -04:00
|
|
|
for i := 1; i <= cardinality; i++ {
|
|
|
|
nums[i-1] = 1 + ((start + i) % cardinality)
|
2016-05-25 19:42:31 -04:00
|
|
|
}
|
2016-07-08 10:33:21 -04:00
|
|
|
return nums
|
2016-05-25 19:42:31 -04:00
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
// Reads all `xl.meta` metadata as a FileInfo slice.
|
2016-07-26 14:34:48 -04:00
|
|
|
// Returns error slice indicating the failed metadata reads.
|
2021-01-07 22:27:31 -05:00
|
|
|
func readAllFileInfo(ctx context.Context, disks []StorageAPI, bucket, object, versionID string, readData bool) ([]FileInfo, []error) {
|
2020-06-12 23:04:01 -04:00
|
|
|
metadataArray := make([]FileInfo, len(disks))
|
2019-10-14 12:44:51 -04:00
|
|
|
|
|
|
|
g := errgroup.WithNErrs(len(disks))
|
2020-09-14 20:19:54 -04:00
|
|
|
// Read `xl.meta` in parallel across disks.
|
2019-10-14 12:44:51 -04:00
|
|
|
for index := range disks {
|
|
|
|
index := index
|
|
|
|
g.Go(func() (err error) {
|
|
|
|
if disks[index] == nil {
|
|
|
|
return errDiskNotFound
|
|
|
|
}
|
2021-01-07 22:27:31 -05:00
|
|
|
metadataArray[index], err = disks[index].ReadVersion(ctx, bucket, object, versionID, readData)
|
2020-06-12 23:04:01 -04:00
|
|
|
if err != nil {
|
2021-01-25 12:45:09 -05:00
|
|
|
if !IsErr(err, []error{
|
|
|
|
errFileNotFound,
|
|
|
|
errVolumeNotFound,
|
|
|
|
errFileVersionNotFound,
|
|
|
|
errDiskNotFound,
|
|
|
|
}...) {
|
2021-04-03 12:03:42 -04:00
|
|
|
logger.LogOnceIf(ctx, fmt.Errorf("Drive %s, path (%s/%s) returned an error (%w)",
|
|
|
|
disks[index], bucket, object, err),
|
2021-03-18 23:16:50 -04:00
|
|
|
disks[index].String())
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
}
|
2019-10-14 12:44:51 -04:00
|
|
|
return err
|
|
|
|
}, index)
|
2016-07-26 14:34:48 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// Return all the metadata.
|
2019-10-14 12:44:51 -04:00
|
|
|
return metadataArray, g.Wait()
|
2016-07-26 14:34:48 -04:00
|
|
|
}
|
|
|
|
|
2021-04-21 22:06:08 -04:00
|
|
|
// shuffleDisksAndPartsMetadataByIndex this function should be always used by GetObjectNInfo()
|
|
|
|
// and CompleteMultipartUpload code path, it is not meant to be used with PutObject,
|
|
|
|
// NewMultipartUpload metadata shuffling.
|
2021-03-15 23:03:13 -04:00
|
|
|
func shuffleDisksAndPartsMetadataByIndex(disks []StorageAPI, metaArr []FileInfo, fi FileInfo) (shuffledDisks []StorageAPI, shuffledPartsMetadata []FileInfo) {
|
2020-10-26 19:19:42 -04:00
|
|
|
shuffledDisks = make([]StorageAPI, len(disks))
|
2020-10-28 03:09:15 -04:00
|
|
|
shuffledPartsMetadata = make([]FileInfo, len(disks))
|
2021-03-15 23:03:13 -04:00
|
|
|
distribution := fi.Erasure.Distribution
|
2021-04-21 22:06:08 -04:00
|
|
|
|
|
|
|
var inconsistent int
|
2020-10-26 19:19:42 -04:00
|
|
|
for i, meta := range metaArr {
|
|
|
|
if disks[i] == nil {
|
2020-10-28 03:09:15 -04:00
|
|
|
// Assuming offline drives as inconsistent,
|
|
|
|
// to be safe and fallback to original
|
|
|
|
// distribution order.
|
|
|
|
inconsistent++
|
|
|
|
continue
|
|
|
|
}
|
2021-04-21 22:06:08 -04:00
|
|
|
if !meta.IsValid() {
|
|
|
|
inconsistent++
|
|
|
|
continue
|
|
|
|
}
|
2021-11-21 13:41:30 -05:00
|
|
|
if meta.XLV1 != fi.XLV1 {
|
|
|
|
inconsistent++
|
|
|
|
continue
|
|
|
|
}
|
2020-10-28 03:09:15 -04:00
|
|
|
// check if erasure distribution order matches the index
|
|
|
|
// position if this is not correct we discard the disk
|
|
|
|
// and move to collect others
|
|
|
|
if distribution[i] != meta.Erasure.Index {
|
|
|
|
inconsistent++ // keep track of inconsistent entries
|
2020-10-26 19:19:42 -04:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
shuffledDisks[meta.Erasure.Index-1] = disks[i]
|
2020-10-28 03:09:15 -04:00
|
|
|
shuffledPartsMetadata[meta.Erasure.Index-1] = metaArr[i]
|
2020-10-26 19:19:42 -04:00
|
|
|
}
|
2020-10-28 03:09:15 -04:00
|
|
|
|
|
|
|
// Inconsistent meta info is with in the limit of
|
|
|
|
// expected quorum, proceed with EcIndex based
|
|
|
|
// disk order.
|
2021-03-15 23:03:13 -04:00
|
|
|
if inconsistent < fi.Erasure.ParityBlocks {
|
2020-10-28 03:09:15 -04:00
|
|
|
return shuffledDisks, shuffledPartsMetadata
|
|
|
|
}
|
|
|
|
|
|
|
|
// fall back to original distribution based order.
|
2021-04-21 22:06:08 -04:00
|
|
|
return shuffleDisksAndPartsMetadata(disks, metaArr, fi)
|
2020-10-26 19:19:42 -04:00
|
|
|
}
|
|
|
|
|
2021-04-21 22:06:08 -04:00
|
|
|
// Return shuffled partsMetadata depending on fi.Distribution.
|
|
|
|
// additional validation is attempted and invalid metadata is
|
|
|
|
// automatically skipped only when fi.ModTime is non-zero
|
|
|
|
// indicating that this is called during read-phase
|
|
|
|
func shuffleDisksAndPartsMetadata(disks []StorageAPI, partsMetadata []FileInfo, fi FileInfo) (shuffledDisks []StorageAPI, shuffledPartsMetadata []FileInfo) {
|
2020-10-28 03:09:15 -04:00
|
|
|
shuffledDisks = make([]StorageAPI, len(disks))
|
|
|
|
shuffledPartsMetadata = make([]FileInfo, len(partsMetadata))
|
2021-04-21 22:06:08 -04:00
|
|
|
distribution := fi.Erasure.Distribution
|
|
|
|
|
|
|
|
init := fi.ModTime.IsZero()
|
2020-10-28 03:09:15 -04:00
|
|
|
// Shuffle slice xl metadata for expected distribution.
|
|
|
|
for index := range partsMetadata {
|
2021-04-21 22:06:08 -04:00
|
|
|
if disks[index] == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if !init && !partsMetadata[index].IsValid() {
|
|
|
|
// Check for parts metadata validity for only
|
|
|
|
// fi.ModTime is not empty - ModTime is always set,
|
|
|
|
// if object was ever written previously.
|
|
|
|
continue
|
|
|
|
}
|
2021-11-21 13:41:30 -05:00
|
|
|
if !init && fi.XLV1 != partsMetadata[index].XLV1 {
|
|
|
|
continue
|
|
|
|
}
|
2020-10-28 03:09:15 -04:00
|
|
|
blockIndex := distribution[index]
|
|
|
|
shuffledPartsMetadata[blockIndex-1] = partsMetadata[index]
|
|
|
|
shuffledDisks[blockIndex-1] = disks[index]
|
2020-10-26 19:19:42 -04:00
|
|
|
}
|
2020-10-28 03:09:15 -04:00
|
|
|
return shuffledDisks, shuffledPartsMetadata
|
2020-10-26 19:19:42 -04:00
|
|
|
}
|
|
|
|
|
2017-02-24 12:20:40 -05:00
|
|
|
// Return shuffled partsMetadata depending on distribution.
|
2020-06-12 23:04:01 -04:00
|
|
|
func shufflePartsMetadata(partsMetadata []FileInfo, distribution []int) (shuffledPartsMetadata []FileInfo) {
|
2017-02-24 12:20:40 -05:00
|
|
|
if distribution == nil {
|
|
|
|
return partsMetadata
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
shuffledPartsMetadata = make([]FileInfo, len(partsMetadata))
|
2017-02-24 12:20:40 -05:00
|
|
|
// Shuffle slice xl metadata for expected distribution.
|
2016-07-16 11:35:30 -04:00
|
|
|
for index := range partsMetadata {
|
|
|
|
blockIndex := distribution[index]
|
2017-02-24 12:20:40 -05:00
|
|
|
shuffledPartsMetadata[blockIndex-1] = partsMetadata[index]
|
2016-07-16 11:35:30 -04:00
|
|
|
}
|
2017-02-24 12:20:40 -05:00
|
|
|
return shuffledPartsMetadata
|
2016-07-16 11:35:30 -04:00
|
|
|
}
|
|
|
|
|
2017-02-24 12:20:40 -05:00
|
|
|
// shuffleDisks - shuffle input disks slice depending on the
|
2017-08-14 21:09:29 -04:00
|
|
|
// erasure distribution. Return shuffled slice of disks with
|
2017-02-24 12:20:40 -05:00
|
|
|
// their expected distribution.
|
|
|
|
func shuffleDisks(disks []StorageAPI, distribution []int) (shuffledDisks []StorageAPI) {
|
|
|
|
if distribution == nil {
|
|
|
|
return disks
|
|
|
|
}
|
|
|
|
shuffledDisks = make([]StorageAPI, len(disks))
|
|
|
|
// Shuffle disks for expected distribution.
|
2016-07-16 11:35:30 -04:00
|
|
|
for index := range disks {
|
|
|
|
blockIndex := distribution[index]
|
2017-02-24 12:20:40 -05:00
|
|
|
shuffledDisks[blockIndex-1] = disks[index]
|
2016-07-16 11:35:30 -04:00
|
|
|
}
|
2017-02-24 12:20:40 -05:00
|
|
|
return shuffledDisks
|
2016-07-16 11:35:30 -04:00
|
|
|
}
|
2017-01-30 18:44:42 -05:00
|
|
|
|
2017-06-14 20:14:27 -04:00
|
|
|
// evalDisks - returns a new slice of disks where nil is set if
|
2017-08-14 21:09:29 -04:00
|
|
|
// the corresponding error in errs slice is not nil
|
2017-06-14 20:14:27 -04:00
|
|
|
func evalDisks(disks []StorageAPI, errs []error) []StorageAPI {
|
|
|
|
if len(errs) != len(disks) {
|
2020-04-09 12:30:02 -04:00
|
|
|
logger.LogIf(GlobalContext, errors.New("unexpected disks/errors slice length"))
|
2017-06-14 20:14:27 -04:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
newDisks := make([]StorageAPI, len(disks))
|
|
|
|
for index := range errs {
|
|
|
|
if errs[index] == nil {
|
|
|
|
newDisks[index] = disks[index]
|
|
|
|
} else {
|
|
|
|
newDisks[index] = nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return newDisks
|
|
|
|
}
|
|
|
|
|
2017-10-06 12:38:01 -04:00
|
|
|
// Errors specifically generated by calculatePartSizeFromIdx function.
|
2017-01-31 18:34:49 -05:00
|
|
|
var (
|
|
|
|
errPartSizeZero = errors.New("Part size cannot be zero")
|
|
|
|
errPartSizeIndex = errors.New("Part index cannot be smaller than 1")
|
|
|
|
)
|
|
|
|
|
2017-10-06 12:38:01 -04:00
|
|
|
// calculatePartSizeFromIdx calculates the part size according to input index.
|
|
|
|
// returns error if totalSize is -1, partSize is 0, partIndex is 0.
|
2018-04-05 18:04:40 -04:00
|
|
|
func calculatePartSizeFromIdx(ctx context.Context, totalSize int64, partSize int64, partIndex int) (currPartSize int64, err error) {
|
2018-09-27 23:36:17 -04:00
|
|
|
if totalSize < -1 {
|
2018-04-05 18:04:40 -04:00
|
|
|
logger.LogIf(ctx, errInvalidArgument)
|
|
|
|
return 0, errInvalidArgument
|
2017-10-06 12:38:01 -04:00
|
|
|
}
|
2017-01-30 18:44:42 -05:00
|
|
|
if partSize == 0 {
|
2018-04-05 18:04:40 -04:00
|
|
|
logger.LogIf(ctx, errPartSizeZero)
|
|
|
|
return 0, errPartSizeZero
|
2017-01-30 18:44:42 -05:00
|
|
|
}
|
|
|
|
if partIndex < 1 {
|
2018-04-05 18:04:40 -04:00
|
|
|
logger.LogIf(ctx, errPartSizeIndex)
|
|
|
|
return 0, errPartSizeIndex
|
2017-01-30 18:44:42 -05:00
|
|
|
}
|
2019-04-30 19:27:31 -04:00
|
|
|
if totalSize == -1 {
|
|
|
|
return -1, nil
|
|
|
|
}
|
2017-10-06 12:38:01 -04:00
|
|
|
if totalSize > 0 {
|
|
|
|
// Compute the total count of parts
|
|
|
|
partsCount := totalSize/partSize + 1
|
|
|
|
// Return the part's size
|
|
|
|
switch {
|
|
|
|
case int64(partIndex) < partsCount:
|
|
|
|
currPartSize = partSize
|
|
|
|
case int64(partIndex) == partsCount:
|
|
|
|
// Size of last part
|
|
|
|
currPartSize = totalSize % partSize
|
|
|
|
default:
|
|
|
|
currPartSize = 0
|
|
|
|
}
|
2017-01-30 18:44:42 -05:00
|
|
|
}
|
2017-10-06 12:38:01 -04:00
|
|
|
return currPartSize, nil
|
2017-01-30 18:44:42 -05:00
|
|
|
}
|