mirror of
https://github.com/minio/minio.git
synced 2025-01-11 23:13:23 -05:00
c27ece409b
In the algorithm to check if an object requires healing, in addition to checking if all disks have xl.json present we should check if all parts of the object are present and have valid blake2b checksums. Also fixed a minor compilation error in heal-objects-list.go.
278 lines
8.8 KiB
Go
278 lines
8.8 KiB
Go
/*
|
|
* Minio Cloud Storage, (C) 2016, 2017 Minio, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package cmd
|
|
|
|
import (
|
|
"encoding/hex"
|
|
"path/filepath"
|
|
"time"
|
|
)
|
|
|
|
// commonTime returns a maximally occurring time from a list of time.
|
|
func commonTime(modTimes []time.Time) (modTime time.Time, count int) {
|
|
var maxima int // Counter for remembering max occurrence of elements.
|
|
timeOccurenceMap := make(map[time.Time]int)
|
|
// Ignore the uuid sentinel and count the rest.
|
|
for _, time := range modTimes {
|
|
if time == timeSentinel {
|
|
continue
|
|
}
|
|
timeOccurenceMap[time]++
|
|
}
|
|
// Find the common cardinality from previously collected
|
|
// occurrences of elements.
|
|
for time, count := range timeOccurenceMap {
|
|
if count == maxima && time.After(modTime) {
|
|
maxima = count
|
|
modTime = time
|
|
|
|
} else if count > maxima {
|
|
maxima = count
|
|
modTime = time
|
|
}
|
|
}
|
|
// Return the collected common uuid.
|
|
return modTime, maxima
|
|
}
|
|
|
|
// Beginning of unix time is treated as sentinel value here.
|
|
var timeSentinel = time.Unix(0, 0).UTC()
|
|
|
|
// Boot modTimes up to disk count, setting the value to time sentinel.
|
|
func bootModtimes(diskCount int) []time.Time {
|
|
modTimes := make([]time.Time, diskCount)
|
|
// Boots up all the modtimes.
|
|
for i := range modTimes {
|
|
modTimes[i] = timeSentinel
|
|
}
|
|
return modTimes
|
|
}
|
|
|
|
// Extracts list of times from xlMetaV1 slice and returns, skips
|
|
// slice elements which have errors.
|
|
func listObjectModtimes(partsMetadata []xlMetaV1, errs []error) (modTimes []time.Time) {
|
|
modTimes = bootModtimes(len(partsMetadata))
|
|
for index, metadata := range partsMetadata {
|
|
if errs[index] != nil {
|
|
continue
|
|
}
|
|
// Once the file is found, save the uuid saved on disk.
|
|
modTimes[index] = metadata.Stat.ModTime
|
|
}
|
|
return modTimes
|
|
}
|
|
|
|
// Notes:
|
|
// There are 5 possible states a disk could be in,
|
|
// 1. __online__ - has the latest copy of xl.json - returned by listOnlineDisks
|
|
//
|
|
// 2. __offline__ - err == errDiskNotFound
|
|
//
|
|
// 3. __availableWithParts__ - has the latest copy of xl.json and has all
|
|
// parts with checksums matching; returned by disksWithAllParts
|
|
//
|
|
// 4. __outdated__ - returned by outDatedDisk, provided []StorageAPI
|
|
// returned by diskWithAllParts is passed for latestDisks.
|
|
// - has an old copy of xl.json
|
|
// - doesn't have xl.json (errFileNotFound)
|
|
// - has the latest xl.json but one or more parts are corrupt
|
|
//
|
|
// 5. __missingParts__ - has the latest copy of xl.json but has some parts
|
|
// missing. This is identified separately since this may need manual
|
|
// inspection to understand the root cause. E.g, this could be due to
|
|
// backend filesystem corruption.
|
|
|
|
// listOnlineDisks - returns
|
|
// - a slice of disks where disk having 'older' xl.json (or nothing)
|
|
// are set to nil.
|
|
// - latest (in time) of the maximally occurring modTime(s).
|
|
func listOnlineDisks(disks []StorageAPI, partsMetadata []xlMetaV1, errs []error) (onlineDisks []StorageAPI, modTime time.Time) {
|
|
onlineDisks = make([]StorageAPI, len(disks))
|
|
|
|
// List all the file commit ids from parts metadata.
|
|
modTimes := listObjectModtimes(partsMetadata, errs)
|
|
|
|
// Reduce list of UUIDs to a single common value.
|
|
modTime, _ = commonTime(modTimes)
|
|
|
|
// Create a new online disks slice, which have common uuid.
|
|
for index, t := range modTimes {
|
|
if t == modTime {
|
|
onlineDisks[index] = disks[index]
|
|
} else {
|
|
onlineDisks[index] = nil
|
|
}
|
|
}
|
|
return onlineDisks, modTime
|
|
}
|
|
|
|
// outDatedDisks - return disks which don't have the latest object (i.e xl.json).
|
|
// disks that are offline are not 'marked' outdated.
|
|
func outDatedDisks(disks, latestDisks []StorageAPI, errs []error, partsMetadata []xlMetaV1,
|
|
bucket, object string) (outDatedDisks []StorageAPI) {
|
|
|
|
outDatedDisks = make([]StorageAPI, len(disks))
|
|
for index, latestDisk := range latestDisks {
|
|
if latestDisk != nil {
|
|
continue
|
|
}
|
|
// disk either has an older xl.json or doesn't have one.
|
|
switch errorCause(errs[index]) {
|
|
case nil, errFileNotFound:
|
|
outDatedDisks[index] = disks[index]
|
|
}
|
|
}
|
|
|
|
return outDatedDisks
|
|
}
|
|
|
|
// Returns if the object should be healed.
|
|
func xlShouldHeal(disks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket, object string) bool {
|
|
onlineDisks, _ := listOnlineDisks(disks, partsMetadata,
|
|
errs)
|
|
// Return true even if one of the disks have stale data.
|
|
for _, disk := range onlineDisks {
|
|
if disk == nil {
|
|
return true
|
|
}
|
|
}
|
|
|
|
// Check if all parts of an object are available and their
|
|
// checksums are valid.
|
|
availableDisks, _, err := disksWithAllParts(onlineDisks, partsMetadata,
|
|
errs, bucket, object)
|
|
if err != nil {
|
|
// Note: This error is due to failure of blake2b
|
|
// checksum computation of a part. It doesn't clearly
|
|
// indicate if the object needs healing. At this
|
|
// juncture healing could fail with the same
|
|
// error. So, we choose to return that there is no
|
|
// need to heal.
|
|
return false
|
|
}
|
|
|
|
// Return true even if one disk has xl.json or one or more
|
|
// parts missing.
|
|
for _, disk := range availableDisks {
|
|
if disk == nil {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// xlHealStat - returns a structure which describes how many data,
|
|
// parity erasure blocks are missing and if it is possible to heal
|
|
// with the blocks present.
|
|
func xlHealStat(xl xlObjects, partsMetadata []xlMetaV1, errs []error) HealObjectInfo {
|
|
// Less than quorum erasure coded blocks of the object have the same create time.
|
|
// This object can't be healed with the information we have.
|
|
modTime, count := commonTime(listObjectModtimes(partsMetadata, errs))
|
|
if count < xl.readQuorum {
|
|
return HealObjectInfo{
|
|
Status: quorumUnavailable,
|
|
MissingDataCount: 0,
|
|
MissingPartityCount: 0,
|
|
}
|
|
}
|
|
|
|
// If there isn't a valid xlMeta then we can't heal the object.
|
|
xlMeta, err := pickValidXLMeta(partsMetadata, modTime)
|
|
if err != nil {
|
|
return HealObjectInfo{
|
|
Status: corrupted,
|
|
MissingDataCount: 0,
|
|
MissingPartityCount: 0,
|
|
}
|
|
}
|
|
|
|
// Compute heal statistics like bytes to be healed, missing
|
|
// data and missing parity count.
|
|
missingDataCount := 0
|
|
missingParityCount := 0
|
|
|
|
for i, err := range errs {
|
|
// xl.json is not found, which implies the erasure
|
|
// coded blocks are unavailable in the corresponding disk.
|
|
// First half of the disks are data and the rest are parity.
|
|
if realErr := errorCause(err); realErr == errFileNotFound || realErr == errDiskNotFound {
|
|
if xlMeta.Erasure.Distribution[i]-1 < xl.dataBlocks {
|
|
missingDataCount++
|
|
} else {
|
|
missingParityCount++
|
|
}
|
|
}
|
|
}
|
|
|
|
// This object can be healed. We have enough object metadata
|
|
// to reconstruct missing erasure coded blocks.
|
|
return HealObjectInfo{
|
|
Status: canHeal,
|
|
MissingDataCount: missingDataCount,
|
|
MissingPartityCount: missingParityCount,
|
|
}
|
|
}
|
|
|
|
// disksWithAllParts - This function needs to be called with
|
|
// []StorageAPI returned by listOnlineDisks. Returns,
|
|
// - disks which have all parts specified in the latest xl.json.
|
|
// - errs updated to have errFileNotFound in place of disks that had
|
|
// missing parts.
|
|
// - non-nil error if any of the online disks failed during
|
|
// calculating blake2b checksum.
|
|
func disksWithAllParts(onlineDisks []StorageAPI, partsMetadata []xlMetaV1, errs []error, bucket, object string) ([]StorageAPI, []error, error) {
|
|
availableDisks := make([]StorageAPI, len(onlineDisks))
|
|
for index, onlineDisk := range onlineDisks {
|
|
if onlineDisk == nil {
|
|
continue
|
|
}
|
|
// disk has a valid xl.json but may not have all the
|
|
// parts. This is considered an outdated disk, since
|
|
// it needs healing too.
|
|
for _, part := range partsMetadata[index].Parts {
|
|
// compute blake2b sum of part.
|
|
partPath := filepath.Join(object, part.Name)
|
|
hash := newHash(partsMetadata[index].Erasure.Algorithm)
|
|
blakeBytes, hErr := hashSum(onlineDisk, bucket, partPath, hash)
|
|
if hErr == errFileNotFound {
|
|
errs[index] = errFileNotFound
|
|
availableDisks[index] = nil
|
|
break
|
|
}
|
|
|
|
if hErr != nil && hErr != errFileNotFound {
|
|
return nil, nil, traceError(hErr)
|
|
}
|
|
|
|
partChecksum := partsMetadata[index].Erasure.GetCheckSumInfo(part.Name).Hash
|
|
blakeSum := hex.EncodeToString(blakeBytes)
|
|
// if blake2b sum doesn't match for a part
|
|
// then this disk is outdated and needs
|
|
// healing.
|
|
if blakeSum != partChecksum {
|
|
errs[index] = errFileNotFound
|
|
availableDisks[index] = nil
|
|
break
|
|
}
|
|
availableDisks[index] = onlineDisk
|
|
}
|
|
}
|
|
|
|
return availableDisks, errs, nil
|
|
}
|