2021-04-18 15:41:13 -04:00
// Copyright (c) 2015-2021 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2016-06-17 14:57:51 -04:00
2016-08-18 19:23:42 -04:00
package cmd
2016-05-25 19:42:31 -04:00
2016-11-16 19:42:23 -05:00
import (
2021-03-29 20:00:55 -04:00
"bytes"
2018-03-14 15:01:47 -04:00
"context"
2020-08-03 21:17:48 -04:00
"errors"
2016-11-16 19:42:23 -05:00
"fmt"
2019-01-17 07:58:18 -05:00
"io"
2023-11-22 15:30:31 -05:00
"strconv"
2023-06-24 22:31:04 -04:00
"strings"
2020-06-12 23:04:01 -04:00
"sync"
2021-12-21 13:08:26 -05:00
"time"
2017-11-25 14:58:29 -05:00
2023-06-19 20:53:08 -04:00
"github.com/minio/madmin-go/v3"
2021-06-01 17:59:40 -04:00
"github.com/minio/minio/internal/logger"
2023-09-04 15:57:37 -04:00
"github.com/minio/pkg/v2/sync/errgroup"
2016-11-16 19:42:23 -05:00
)
2016-10-17 05:10:23 -04:00
2021-12-21 13:08:26 -05:00
const reservedMetadataPrefixLowerDataShardFix = ReservedMetadataPrefixLower + "data-shard-fix"
2022-08-31 15:28:03 -04:00
//go:generate stringer -type=healingMetric -trimprefix=healingMetric $GOFILE
type healingMetric uint8
const (
healingMetricBucket healingMetric = iota
healingMetricObject
2022-11-28 13:20:55 -05:00
healingMetricCheckAbandonedParts
2022-08-31 15:28:03 -04:00
)
2021-12-21 13:08:26 -05:00
// AcceptableDelta returns 'true' if the fi.DiskMTime is under
// acceptable delta of "delta" duration with maxTime.
//
// This code is primarily used for heuristic detection of
// incorrect shards, as per https://github.com/minio/minio/pull/13803
//
// This check only is active if we could find maximally
// occurring disk mtimes that are somewhat same across
// the quorum. Allowing to skip those shards which we
// might think are wrong.
func ( fi FileInfo ) AcceptableDelta ( maxTime time . Time , delta time . Duration ) bool {
diff := maxTime . Sub ( fi . DiskMTime )
if diff < 0 {
diff = - diff
}
return diff < delta
}
// DataShardFixed - data shard fixed?
func ( fi FileInfo ) DataShardFixed ( ) bool {
return fi . Metadata [ reservedMetadataPrefixLowerDataShardFix ] == "true"
}
2023-06-24 22:31:04 -04:00
func ( er erasureObjects ) listAndHeal ( bucket , prefix string , healEntry func ( string , metaCacheEntry ) error ) error {
ctx , cancel := context . WithCancel ( context . Background ( ) )
defer cancel ( )
2023-12-01 03:18:04 -05:00
disks , _ := er . getOnlineDisksWithHealing ( false )
2023-06-24 22:31:04 -04:00
if len ( disks ) == 0 {
return errors . New ( "listAndHeal: No non-healing drives found" )
}
// How to resolve partial results.
resolver := metadataResolutionParams {
dirQuorum : 1 ,
objQuorum : 1 ,
bucket : bucket ,
strict : false , // Allow less strict matching.
}
path := baseDirFromPrefix ( prefix )
filterPrefix := strings . Trim ( strings . TrimPrefix ( prefix , path ) , slashSeparator )
if path == prefix {
filterPrefix = ""
}
lopts := listPathRawOptions {
disks : disks ,
bucket : bucket ,
path : path ,
filterPrefix : filterPrefix ,
recursive : true ,
forwardTo : "" ,
minDisks : 1 ,
reportNotFound : false ,
agreed : func ( entry metaCacheEntry ) {
if err := healEntry ( bucket , entry ) ; err != nil {
cancel ( )
}
} ,
partial : func ( entries metaCacheEntries , _ [ ] error ) {
entry , ok := entries . resolve ( & resolver )
if ! ok {
// check if we can get one entry atleast
// proceed to heal nonetheless.
entry , _ = entries . firstFound ( )
}
if err := healEntry ( bucket , * entry ) ; err != nil {
cancel ( )
return
}
} ,
finished : nil ,
}
if err := listPathRaw ( ctx , lopts ) ; err != nil {
return fmt . Errorf ( "listPathRaw returned %w: opts(%#v)" , err , lopts )
}
return nil
}
2023-12-06 01:34:46 -05:00
// HealBucket heals a bucket if it doesn't exist on one of the disks, additionally
// also heals the missing entries for bucket metadata files
// `policy.json, notification.xml, listeners.json`.
func ( er erasureObjects ) HealBucket ( ctx context . Context , bucket string , opts madmin . HealOpts ) (
result madmin . HealResultItem , err error ,
) {
storageDisks := er . getDisks ( )
storageEndpoints := er . getEndpoints ( )
// Heal bucket.
return er . healBucket ( ctx , storageDisks , storageEndpoints , bucket , opts )
}
// Heal bucket - create buckets on disks where it does not exist.
func ( er erasureObjects ) healBucket ( ctx context . Context , storageDisks [ ] StorageAPI , storageEndpoints [ ] Endpoint , bucket string , opts madmin . HealOpts ) ( res madmin . HealResultItem , err error ) {
// get write quorum for an object
writeQuorum := len ( storageDisks ) - er . defaultParityCount
if writeQuorum == er . defaultParityCount {
writeQuorum ++
}
if globalTrace . NumSubscribers ( madmin . TraceHealing ) > 0 {
startTime := time . Now ( )
defer func ( ) {
healTrace ( healingMetricBucket , startTime , bucket , "" , & opts , err , & res )
} ( )
}
// Initialize sync waitgroup.
g := errgroup . WithNErrs ( len ( storageDisks ) )
// Disk states slices
beforeState := make ( [ ] string , len ( storageDisks ) )
afterState := make ( [ ] string , len ( storageDisks ) )
// Make a volume entry on all underlying storage disks.
for index := range storageDisks {
index := index
g . Go ( func ( ) error {
if storageDisks [ index ] == nil {
beforeState [ index ] = madmin . DriveStateOffline
afterState [ index ] = madmin . DriveStateOffline
return errDiskNotFound
}
beforeState [ index ] = madmin . DriveStateOk
afterState [ index ] = madmin . DriveStateOk
if bucket == minioReservedBucket {
return nil
}
if _ , serr := storageDisks [ index ] . StatVol ( ctx , bucket ) ; serr != nil {
if serr == errDiskNotFound {
beforeState [ index ] = madmin . DriveStateOffline
afterState [ index ] = madmin . DriveStateOffline
return serr
}
if serr != errVolumeNotFound {
beforeState [ index ] = madmin . DriveStateCorrupt
afterState [ index ] = madmin . DriveStateCorrupt
return serr
}
beforeState [ index ] = madmin . DriveStateMissing
afterState [ index ] = madmin . DriveStateMissing
// mutate only if not a dry-run
if opts . DryRun {
return nil
}
return serr
}
return nil
} , index )
}
errs := g . Wait ( )
// Initialize heal result info
res = madmin . HealResultItem {
Type : madmin . HealItemBucket ,
Bucket : bucket ,
DiskCount : len ( storageDisks ) ,
ParityBlocks : er . defaultParityCount ,
DataBlocks : len ( storageDisks ) - er . defaultParityCount ,
}
for i := range beforeState {
res . Before . Drives = append ( res . Before . Drives , madmin . HealDriveInfo {
UUID : "" ,
Endpoint : storageEndpoints [ i ] . String ( ) ,
State : beforeState [ i ] ,
} )
}
reducedErr := reduceReadQuorumErrs ( ctx , errs , bucketOpIgnoredErrs , res . DataBlocks )
if errors . Is ( reducedErr , errVolumeNotFound ) && ! opts . Recreate {
for i := range beforeState {
res . After . Drives = append ( res . After . Drives , madmin . HealDriveInfo {
UUID : "" ,
Endpoint : storageEndpoints [ i ] . String ( ) ,
State : madmin . DriveStateOk ,
} )
}
return res , nil
}
// Initialize sync waitgroup.
g = errgroup . WithNErrs ( len ( storageDisks ) )
// Make a volume entry on all underlying storage disks.
for index := range storageDisks {
index := index
g . Go ( func ( ) error {
if beforeState [ index ] == madmin . DriveStateMissing {
makeErr := storageDisks [ index ] . MakeVol ( ctx , bucket )
if makeErr == nil {
afterState [ index ] = madmin . DriveStateOk
}
return makeErr
}
return errs [ index ]
} , index )
}
errs = g . Wait ( )
reducedErr = reduceWriteQuorumErrs ( ctx , errs , bucketOpIgnoredErrs , writeQuorum )
if reducedErr != nil {
// If we have exactly half the drives not available,
// we should still allow HealBucket to not return error.
// this is necessary for starting the server.
readQuorum := res . DataBlocks
switch reduceReadQuorumErrs ( ctx , errs , nil , readQuorum ) {
case nil :
case errDiskNotFound :
default :
return res , reducedErr
}
}
for i := range afterState {
res . After . Drives = append ( res . After . Drives , madmin . HealDriveInfo {
UUID : "" ,
Endpoint : storageEndpoints [ i ] . String ( ) ,
State : afterState [ i ] ,
} )
}
return res , nil
}
2017-01-19 12:34:18 -05:00
// listAllBuckets lists all buckets from all disks. It also
// returns the occurrence of each buckets in all disks
2021-11-15 12:46:55 -05:00
func listAllBuckets ( ctx context . Context , storageDisks [ ] StorageAPI , healBuckets map [ string ] VolInfo , readQuorum int ) error {
2020-09-24 12:53:38 -04:00
g := errgroup . WithNErrs ( len ( storageDisks ) )
var mu sync . Mutex
for index := range storageDisks {
index := index
g . Go ( func ( ) error {
if storageDisks [ index ] == nil {
// we ignore disk not found errors
return nil
2016-11-16 19:42:23 -05:00
}
2023-01-04 02:39:40 -05:00
if storageDisks [ index ] . Healing ( ) != nil {
// we ignore disks under healing
return nil
}
2020-09-24 12:53:38 -04:00
volsInfo , err := storageDisks [ index ] . ListVols ( ctx )
if err != nil {
return err
2017-01-19 12:34:18 -05:00
}
2020-09-24 12:53:38 -04:00
for _ , volInfo := range volsInfo {
// StorageAPI can send volume names which are
// incompatible with buckets - these are
// skipped, like the meta-bucket.
if isReservedOrInvalidBucket ( volInfo . Name , false ) {
continue
}
mu . Lock ( )
if _ , ok := healBuckets [ volInfo . Name ] ; ! ok {
healBuckets [ volInfo . Name ] = volInfo
}
mu . Unlock ( )
2020-05-06 17:25:05 -04:00
}
2020-09-24 12:53:38 -04:00
return nil
} , index )
2017-01-19 12:34:18 -05:00
}
2021-11-15 12:46:55 -05:00
return reduceReadQuorumErrs ( ctx , g . Wait ( ) , bucketMetadataOpIgnoredErrs , readQuorum )
2017-01-19 12:34:18 -05:00
}
2019-01-30 13:53:57 -05:00
// Only heal on disks where we are sure that healing is needed. We can expand
// this list as and when we figure out more errors can be added to this list safely.
2022-08-03 02:10:22 -04:00
func shouldHealObjectOnDisk ( erErr , dataErr error , meta FileInfo , latestMeta FileInfo , doinline bool ) bool {
2020-08-03 21:17:48 -04:00
switch {
case errors . Is ( erErr , errFileNotFound ) || errors . Is ( erErr , errFileVersionNotFound ) :
2019-01-30 13:53:57 -05:00
return true
2022-01-07 22:11:54 -05:00
case errors . Is ( erErr , errFileCorrupt ) :
2019-01-30 13:53:57 -05:00
return true
}
2020-06-12 23:04:01 -04:00
if erErr == nil {
2021-12-21 13:08:26 -05:00
if meta . XLV1 {
// Legacy means heal always
// always check first.
return true
}
2022-08-03 02:10:22 -04:00
if doinline {
// convert small files to 'inline'
return true
}
2021-07-26 14:48:09 -04:00
if ! meta . Deleted && ! meta . IsRemote ( ) {
2021-04-19 13:30:42 -04:00
// If xl.meta was read fine but there may be problem with the part.N files.
if IsErr ( dataErr , [ ] error {
errFileNotFound ,
errFileVersionNotFound ,
errFileCorrupt ,
} ... ) {
return true
}
2019-01-30 13:53:57 -05:00
}
2021-12-21 13:08:26 -05:00
if ! latestMeta . Equals ( meta ) {
2019-01-30 13:53:57 -05:00
return true
}
}
return false
}
2023-04-09 13:25:37 -04:00
const xMinIOHealing = ReservedMetadataPrefix + "healing"
// SetHealing marks object (version) as being healed.
// Note: this is to be used only from healObject
func ( fi * FileInfo ) SetHealing ( ) {
if fi . Metadata == nil {
fi . Metadata = make ( map [ string ] string )
}
fi . Metadata [ xMinIOHealing ] = "true"
}
// Healing returns true if object is being healed (i.e fi is being passed down
// from healObject)
func ( fi FileInfo ) Healing ( ) bool {
if _ , ok := fi . Metadata [ xMinIOHealing ] ; ok {
return true
}
return false
}
2018-01-22 17:54:55 -05:00
// Heals an object by re-writing corrupt/missing erasure blocks.
2022-11-28 13:20:55 -05:00
func ( er * erasureObjects ) healObject ( ctx context . Context , bucket string , object string , versionID string , opts madmin . HealOpts ) ( result madmin . HealResultItem , err error ) {
2020-11-23 12:12:17 -05:00
dryRun := opts . DryRun
scanMode := opts . ScanMode
2020-06-12 23:04:01 -04:00
storageDisks := er . getDisks ( )
storageEndpoints := er . getEndpoints ( )
2016-10-17 05:10:23 -04:00
2022-08-31 04:56:12 -04:00
if globalTrace . NumSubscribers ( madmin . TraceHealing ) > 0 {
startTime := time . Now ( )
defer func ( ) {
2022-12-08 10:49:10 -05:00
healTrace ( healingMetricObject , startTime , bucket , object , & opts , err , & result )
2022-08-31 04:56:12 -04:00
} ( )
}
2018-01-22 17:54:55 -05:00
// Initialize heal result object
result = madmin . HealResultItem {
2021-12-28 18:33:03 -05:00
Type : madmin . HealItemObject ,
Bucket : bucket ,
Object : object ,
2022-12-08 10:49:10 -05:00
VersionID : versionID ,
2021-12-28 18:33:03 -05:00
DiskCount : len ( storageDisks ) ,
2017-04-14 13:28:35 -04:00
}
Revert heal locks (#12365)
A lot of healing is likely to be on non-existing objects and
locks are very expensive and will slow down scanning
significantly.
In cases where all are valid or, all are broken allow
rejection without locking.
Keep the existing behavior, but move the check for
dangling objects to after the lock has been acquired.
```
_, err = getLatestFileInfo(ctx, partsMetadata, errs)
if err != nil {
return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, []error{}, opts)
}
```
Revert "heal: Hold lock when reading xl.meta from disks (#12362)"
This reverts commit abd32065aaae4080396a1b4b04a110454368b028
2021-05-25 20:02:06 -04:00
if ! opts . NoLock {
lk := er . NewNSLock ( bucket , object )
lkctx , err := lk . GetLock ( ctx , globalOperationTimeout )
if err != nil {
return result , err
}
ctx = lkctx . Context ( )
2022-12-23 22:49:07 -05:00
defer lk . Unlock ( lkctx )
Revert heal locks (#12365)
A lot of healing is likely to be on non-existing objects and
locks are very expensive and will slow down scanning
significantly.
In cases where all are valid or, all are broken allow
rejection without locking.
Keep the existing behavior, but move the check for
dangling objects to after the lock has been acquired.
```
_, err = getLatestFileInfo(ctx, partsMetadata, errs)
if err != nil {
return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, []error{}, opts)
}
```
Revert "heal: Hold lock when reading xl.meta from disks (#12362)"
This reverts commit abd32065aaae4080396a1b4b04a110454368b028
2021-05-25 20:02:06 -04:00
}
// Re-read when we have lock...
2023-11-21 00:33:47 -05:00
partsMetadata , errs := readAllFileInfo ( ctx , storageDisks , bucket , object , versionID , true , true )
2021-12-28 18:33:03 -05:00
if isAllNotFound ( errs ) {
2022-06-20 11:07:45 -04:00
err := errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
2021-12-28 18:33:03 -05:00
// Nothing to do, file is already gone.
return er . defaultHealResult ( FileInfo { } , storageDisks , storageEndpoints ,
2022-06-20 11:07:45 -04:00
errs , bucket , object , versionID ) , err
2021-12-28 18:33:03 -05:00
}
2021-03-04 17:36:23 -05:00
2021-12-28 18:33:03 -05:00
readQuorum , _ , err := objectQuorumFromMeta ( ctx , partsMetadata , errs , er . defaultParityCount )
if err != nil {
2022-10-27 12:05:24 -04:00
m , err := er . deleteIfDangling ( ctx , bucket , object , partsMetadata , errs , nil , ObjectOptions {
VersionID : versionID ,
} )
errs = make ( [ ] error , len ( errs ) )
for i := range errs {
errs [ i ] = err
}
if err == nil {
// Dangling object successfully purged, size is '0'
m . Size = 0
}
// Generate file/version not found with default heal result
err = errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
return er . defaultHealResult ( m , storageDisks , storageEndpoints ,
errs , bucket , object , versionID ) , err
Revert heal locks (#12365)
A lot of healing is likely to be on non-existing objects and
locks are very expensive and will slow down scanning
significantly.
In cases where all are valid or, all are broken allow
rejection without locking.
Keep the existing behavior, but move the check for
dangling objects to after the lock has been acquired.
```
_, err = getLatestFileInfo(ctx, partsMetadata, errs)
if err != nil {
return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, []error{}, opts)
}
```
Revert "heal: Hold lock when reading xl.meta from disks (#12362)"
This reverts commit abd32065aaae4080396a1b4b04a110454368b028
2021-05-25 20:02:06 -04:00
}
2021-07-26 14:48:09 -04:00
2021-12-28 18:33:03 -05:00
result . ParityBlocks = result . DiskCount - readQuorum
result . DataBlocks = readQuorum
2022-01-07 22:11:54 -05:00
// List of disks having latest version of the object xl.meta
2021-03-04 17:36:23 -05:00
// (by modtime).
2023-06-17 22:18:20 -04:00
onlineDisks , modTime , etag := listOnlineDisks ( storageDisks , partsMetadata , errs , readQuorum )
2021-11-22 12:36:29 -05:00
// Latest FileInfo for reference. If a valid metadata is not
// present, it is as good as object not found.
2023-06-17 22:18:20 -04:00
latestMeta , err := pickValidFileInfo ( ctx , partsMetadata , modTime , etag , readQuorum )
2021-11-22 12:36:29 -05:00
if err != nil {
2022-06-20 11:07:45 -04:00
return result , err
2021-11-22 12:36:29 -05:00
}
2021-05-14 19:50:47 -04:00
// List of disks having all parts as per latest metadata.
// NOTE: do not pass in latestDisks to diskWithAllParts since
// the diskWithAllParts needs to reach the drive to ensure
// validity of the metadata content, we should make sure that
// we pass in disks as is for it to be verified. Once verified
// the disksWithAllParts() returns the actual disks that can be
// used here for reconstruction. This is done to ensure that
// we do not skip drives that have inconsistent metadata to be
// skipped from purging when they are stale.
2021-12-24 02:01:46 -05:00
availableDisks , dataErrs , diskMTime := disksWithAllParts ( ctx , onlineDisks , partsMetadata ,
2021-11-22 12:36:29 -05:00
errs , latestMeta , bucket , object , scanMode )
2021-08-23 16:14:55 -04:00
2023-06-12 14:54:51 -04:00
var erasure Erasure
var recreate bool
2022-08-03 02:10:22 -04:00
if ! latestMeta . Deleted && ! latestMeta . IsRemote ( ) {
// Initialize erasure coding
2023-06-12 14:54:51 -04:00
erasure , err = NewErasure ( ctx , latestMeta . Erasure . DataBlocks ,
latestMeta . Erasure . ParityBlocks , latestMeta . Erasure . BlockSize )
2022-08-03 02:10:22 -04:00
if err != nil {
return result , err
}
// Is only 'true' if the opts.Recreate is true and
// the object shardSize < smallFileThreshold do not
// set this to 'true' arbitrarily and must be only
// 'true' with caller ask.
recreate = ( opts . Recreate &&
! latestMeta . InlineData ( ) &&
len ( latestMeta . Parts ) == 1 &&
2023-06-12 14:54:51 -04:00
erasure . ShardFileSize ( latestMeta . Parts [ 0 ] . ActualSize ) < smallFileThreshold )
2022-08-03 02:10:22 -04:00
}
2023-02-10 09:53:03 -05:00
result . ObjectSize , err = latestMeta . GetActualSize ( )
if err != nil {
return result , err
}
2018-01-22 17:54:55 -05:00
// Loop to find number of disks with valid data, per-drive
// data state and a list of outdated disks on which data needs
// to be healed.
outDatedDisks := make ( [ ] StorageAPI , len ( storageDisks ) )
disksToHealCount := 0
for i , v := range availableDisks {
driveState := ""
switch {
case v != nil :
driveState = madmin . DriveStateOk
2018-10-02 20:13:51 -04:00
case errs [ i ] == errDiskNotFound , dataErrs [ i ] == errDiskNotFound :
2018-01-22 17:54:55 -05:00
driveState = madmin . DriveStateOffline
2020-09-21 18:16:16 -04:00
case errs [ i ] == errFileNotFound , errs [ i ] == errFileVersionNotFound , errs [ i ] == errVolumeNotFound :
2018-01-22 17:54:55 -05:00
fallthrough
2020-07-06 11:09:48 -04:00
case dataErrs [ i ] == errFileNotFound , dataErrs [ i ] == errFileVersionNotFound , dataErrs [ i ] == errVolumeNotFound :
2018-01-22 17:54:55 -05:00
driveState = madmin . DriveStateMissing
default :
// all remaining cases imply corrupt data/metadata
driveState = madmin . DriveStateCorrupt
}
2023-06-12 14:54:51 -04:00
if shouldHealObjectOnDisk ( errs [ i ] , dataErrs [ i ] , partsMetadata [ i ] , latestMeta , recreate ) {
2018-01-22 17:54:55 -05:00
outDatedDisks [ i ] = storageDisks [ i ]
disksToHealCount ++
2018-02-15 20:45:57 -05:00
result . Before . Drives = append ( result . Before . Drives , madmin . HealDriveInfo {
UUID : "" ,
2021-09-29 14:36:19 -04:00
Endpoint : storageEndpoints [ i ] . String ( ) ,
2018-02-15 20:45:57 -05:00
State : driveState ,
} )
result . After . Drives = append ( result . After . Drives , madmin . HealDriveInfo {
UUID : "" ,
2021-09-29 14:36:19 -04:00
Endpoint : storageEndpoints [ i ] . String ( ) ,
2018-02-15 20:45:57 -05:00
State : driveState ,
} )
continue
}
result . Before . Drives = append ( result . Before . Drives , madmin . HealDriveInfo {
UUID : "" ,
2021-09-29 14:36:19 -04:00
Endpoint : storageEndpoints [ i ] . String ( ) ,
2018-02-15 20:45:57 -05:00
State : driveState ,
} )
result . After . Drives = append ( result . After . Drives , madmin . HealDriveInfo {
UUID : "" ,
2021-09-29 14:36:19 -04:00
Endpoint : storageEndpoints [ i ] . String ( ) ,
2018-02-15 20:45:57 -05:00
State : driveState ,
} )
2017-09-28 18:57:19 -04:00
}
2020-10-28 12:18:35 -04:00
if isAllNotFound ( errs ) {
2020-11-23 12:12:17 -05:00
// File is fully gone, fileInfo is empty.
2022-06-20 11:07:45 -04:00
err := errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
2021-07-26 11:01:41 -04:00
return er . defaultHealResult ( FileInfo { } , storageDisks , storageEndpoints , errs ,
2022-06-20 11:07:45 -04:00
bucket , object , versionID ) , err
2020-10-28 12:18:35 -04:00
}
2021-05-14 19:50:47 -04:00
2018-01-22 17:54:55 -05:00
if disksToHealCount == 0 {
// Nothing to heal!
return result , nil
}
2017-03-04 17:53:28 -05:00
2018-01-22 17:54:55 -05:00
// After this point, only have to repair data on disk - so
// return if it is a dry-run
if dryRun {
return result , nil
2017-03-31 20:55:15 -04:00
}
2023-06-12 14:54:51 -04:00
if ! latestMeta . XLV1 && ! latestMeta . Deleted && ! recreate && disksToHealCount > latestMeta . Erasure . ParityBlocks {
2023-09-29 12:08:24 -04:00
// Allow for dangling deletes, on versions that have DataDir missing etc.
// this would end up restoring the correct readable versions.
m , err := er . deleteIfDangling ( ctx , bucket , object , partsMetadata , errs , dataErrs , ObjectOptions {
VersionID : versionID ,
} )
errs = make ( [ ] error , len ( errs ) )
for i := range errs {
errs [ i ] = err
}
if err == nil {
// Dangling object successfully purged, size is '0'
m . Size = 0
}
// Generate file/version not found with default heal result
err = errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
return er . defaultHealResult ( m , storageDisks , storageEndpoints ,
errs , bucket , object , versionID ) , err
2022-05-25 18:17:10 -04:00
}
2020-06-12 23:04:01 -04:00
cleanFileInfo := func ( fi FileInfo ) FileInfo {
2022-05-25 18:17:10 -04:00
// Returns a copy of the 'fi' with erasure index, checksums and inline data niled.
2020-06-12 23:04:01 -04:00
nfi := fi
2022-05-25 18:17:10 -04:00
if ! nfi . IsRemote ( ) {
nfi . Data = nil
2021-08-23 16:14:55 -04:00
nfi . Erasure . Index = 0
nfi . Erasure . Checksums = nil
2021-04-19 13:30:42 -04:00
}
2020-06-12 23:04:01 -04:00
return nfi
2016-10-17 05:10:23 -04:00
}
2020-07-07 23:54:09 -04:00
// We write at temporary location and then rename to final location.
tmpID := mustGetUUID ( )
2020-07-17 20:41:29 -04:00
migrateDataDir := mustGetUUID ( )
2020-07-07 23:54:09 -04:00
2022-05-25 18:17:10 -04:00
// Reorder so that we have data disks first and parity disks next.
2023-03-20 12:08:42 -04:00
if ! latestMeta . Deleted && len ( latestMeta . Erasure . Distribution ) != len ( availableDisks ) {
err := fmt . Errorf ( "unexpected file distribution (%v) from available disks (%v), looks like backend disks have been manually modified refusing to heal %s/%s(%s)" ,
latestMeta . Erasure . Distribution , availableDisks , bucket , object , versionID )
2023-06-24 23:29:13 -04:00
logger . LogOnceIf ( ctx , err , "heal-object-available-disks" )
2023-03-20 12:08:42 -04:00
return er . defaultHealResult ( latestMeta , storageDisks , storageEndpoints , errs ,
bucket , object , versionID ) , err
}
2022-05-25 18:17:10 -04:00
latestDisks := shuffleDisks ( availableDisks , latestMeta . Erasure . Distribution )
2023-03-20 12:08:42 -04:00
if ! latestMeta . Deleted && len ( latestMeta . Erasure . Distribution ) != len ( outDatedDisks ) {
err := fmt . Errorf ( "unexpected file distribution (%v) from outdated disks (%v), looks like backend disks have been manually modified refusing to heal %s/%s(%s)" ,
latestMeta . Erasure . Distribution , outDatedDisks , bucket , object , versionID )
2023-06-24 23:29:13 -04:00
logger . LogOnceIf ( ctx , err , "heal-object-outdated-disks" )
2023-03-20 12:08:42 -04:00
return er . defaultHealResult ( latestMeta , storageDisks , storageEndpoints , errs ,
bucket , object , versionID ) , err
}
2022-05-25 18:17:10 -04:00
outDatedDisks = shuffleDisks ( outDatedDisks , latestMeta . Erasure . Distribution )
2023-03-20 12:08:42 -04:00
if ! latestMeta . Deleted && len ( latestMeta . Erasure . Distribution ) != len ( partsMetadata ) {
err := fmt . Errorf ( "unexpected file distribution (%v) from metadata entries (%v), looks like backend disks have been manually modified refusing to heal %s/%s(%s)" ,
latestMeta . Erasure . Distribution , len ( partsMetadata ) , bucket , object , versionID )
2023-06-24 23:29:13 -04:00
logger . LogOnceIf ( ctx , err , "heal-object-metadata-entries" )
2023-03-20 12:08:42 -04:00
return er . defaultHealResult ( latestMeta , storageDisks , storageEndpoints , errs ,
bucket , object , versionID ) , err
}
2022-05-25 18:17:10 -04:00
partsMetadata = shufflePartsMetadata ( partsMetadata , latestMeta . Erasure . Distribution )
2021-04-01 00:14:08 -04:00
copyPartsMetadata := make ( [ ] FileInfo , len ( partsMetadata ) )
2022-05-25 18:17:10 -04:00
for i := range latestDisks {
if latestDisks [ i ] == nil {
continue
}
copyPartsMetadata [ i ] = partsMetadata [ i ]
}
2019-01-17 07:58:18 -05:00
for i := range outDatedDisks {
if outDatedDisks [ i ] == nil {
continue
}
2022-05-25 18:17:10 -04:00
// Make sure to write the FileInfo information
// that is expected to be in quorum.
2020-06-12 23:04:01 -04:00
partsMetadata [ i ] = cleanFileInfo ( latestMeta )
2019-01-17 07:58:18 -05:00
}
2016-10-17 05:10:23 -04:00
2021-04-02 01:12:03 -04:00
// source data dir shall be empty in case of XLV1
// differentiate it with dstDataDir for readability
// srcDataDir is the one used with newBitrotReader()
// to read existing content.
srcDataDir := latestMeta . DataDir
dstDataDir := latestMeta . DataDir
2020-07-21 16:54:06 -04:00
if latestMeta . XLV1 {
2021-04-02 01:12:03 -04:00
dstDataDir = migrateDataDir
2020-07-21 16:54:06 -04:00
}
2021-03-29 20:00:55 -04:00
var inlineBuffers [ ] * bytes . Buffer
2021-05-06 19:06:57 -04:00
if ! latestMeta . Deleted && ! latestMeta . IsRemote ( ) {
2022-08-03 02:10:22 -04:00
if latestMeta . InlineData ( ) || recreate {
inlineBuffers = make ( [ ] * bytes . Buffer , len ( outDatedDisks ) )
2016-10-17 05:10:23 -04:00
}
2020-06-12 23:04:01 -04:00
2020-07-07 23:54:09 -04:00
erasureInfo := latestMeta . Erasure
for partIndex := 0 ; partIndex < len ( latestMeta . Parts ) ; partIndex ++ {
partSize := latestMeta . Parts [ partIndex ] . Size
partActualSize := latestMeta . Parts [ partIndex ] . ActualSize
2022-07-19 21:56:24 -04:00
partModTime := latestMeta . Parts [ partIndex ] . ModTime
2020-07-07 23:54:09 -04:00
partNumber := latestMeta . Parts [ partIndex ] . Number
2022-07-11 20:30:56 -04:00
partIdx := latestMeta . Parts [ partIndex ] . Index
2022-08-29 19:57:16 -04:00
partChecksums := latestMeta . Parts [ partIndex ] . Checksums
2023-06-12 14:54:51 -04:00
tillOffset := erasure . ShardFileOffset ( 0 , partSize , partSize )
2020-07-07 23:54:09 -04:00
readers := make ( [ ] io . ReaderAt , len ( latestDisks ) )
2023-08-03 05:18:18 -04:00
prefer := make ( [ ] bool , len ( latestDisks ) )
2020-07-07 23:54:09 -04:00
checksumAlgo := erasureInfo . GetChecksumInfo ( partNumber ) . Algorithm
for i , disk := range latestDisks {
if disk == OfflineDisk {
continue
}
2021-04-01 00:14:08 -04:00
checksumInfo := copyPartsMetadata [ i ] . Erasure . GetChecksumInfo ( partNumber )
2021-04-02 01:12:03 -04:00
partPath := pathJoin ( object , srcDataDir , fmt . Sprintf ( "part.%d" , partNumber ) )
2023-06-12 14:54:51 -04:00
readers [ i ] = newBitrotReader ( disk , copyPartsMetadata [ i ] . Data , bucket , partPath , tillOffset , checksumAlgo ,
checksumInfo . Hash , erasure . ShardSize ( ) )
2023-08-03 05:18:18 -04:00
prefer [ i ] = disk . Hostname ( ) == ""
2020-07-07 23:54:09 -04:00
}
writers := make ( [ ] io . Writer , len ( outDatedDisks ) )
for i , disk := range outDatedDisks {
if disk == OfflineDisk {
continue
}
2021-04-02 01:12:03 -04:00
partPath := pathJoin ( tmpID , dstDataDir , fmt . Sprintf ( "part.%d" , partNumber ) )
2021-03-29 20:00:55 -04:00
if len ( inlineBuffers ) > 0 {
2023-06-12 14:54:51 -04:00
inlineBuffers [ i ] = bytes . NewBuffer ( make ( [ ] byte , 0 , erasure . ShardFileSize ( latestMeta . Size ) + 32 ) )
writers [ i ] = newStreamingBitrotWriterBuffer ( inlineBuffers [ i ] , DefaultBitrotAlgorithm , erasure . ShardSize ( ) )
2021-03-29 20:00:55 -04:00
} else {
writers [ i ] = newBitrotWriter ( disk , minioMetaTmpBucket , partPath ,
2023-06-12 14:54:51 -04:00
tillOffset , DefaultBitrotAlgorithm , erasure . ShardSize ( ) )
2021-03-29 20:00:55 -04:00
}
2017-09-28 18:57:19 -04:00
}
2022-08-03 02:10:22 -04:00
2023-06-12 14:54:51 -04:00
// Heal each part. erasure.Heal() will write the healed
// part to .minio/tmp/uuid/ which needs to be renamed
// later to the final location.
2023-08-03 05:18:18 -04:00
err = erasure . Heal ( ctx , writers , readers , partSize , prefer )
2020-07-07 23:54:09 -04:00
closeBitrotReaders ( readers )
closeBitrotWriters ( writers )
if err != nil {
2022-06-20 11:07:45 -04:00
return result , err
2020-07-07 23:54:09 -04:00
}
2021-04-19 13:30:42 -04:00
2020-07-07 23:54:09 -04:00
// outDatedDisks that had write errors should not be
// written to for remaining parts, so we nil it out.
for i , disk := range outDatedDisks {
if disk == OfflineDisk {
continue
}
2020-06-12 23:04:01 -04:00
2020-07-07 23:54:09 -04:00
// A non-nil stale disk which did not receive
// a healed part checksum had a write error.
if writers [ i ] == nil {
outDatedDisks [ i ] = nil
disksToHealCount --
continue
}
2021-04-02 01:12:03 -04:00
partsMetadata [ i ] . DataDir = dstDataDir
2022-08-29 19:57:16 -04:00
partsMetadata [ i ] . AddObjectPart ( partNumber , "" , partSize , partActualSize , partModTime , partIdx , partChecksums )
2021-04-21 22:06:08 -04:00
if len ( inlineBuffers ) > 0 && inlineBuffers [ i ] != nil {
2021-03-29 20:00:55 -04:00
partsMetadata [ i ] . Data = inlineBuffers [ i ] . Bytes ( )
2022-08-03 02:10:22 -04:00
partsMetadata [ i ] . SetInlineData ( )
2021-03-29 20:00:55 -04:00
} else {
partsMetadata [ i ] . Data = nil
}
2020-07-07 23:54:09 -04:00
}
2017-09-28 18:57:19 -04:00
2020-07-07 23:54:09 -04:00
// If all disks are having errors, we give up.
if disksToHealCount == 0 {
2022-08-04 19:10:08 -04:00
return result , fmt . Errorf ( "all drives had write errors, unable to heal %s/%s" , bucket , object )
2020-07-07 23:54:09 -04:00
}
2021-04-19 13:30:42 -04:00
2016-10-17 05:10:23 -04:00
}
2021-04-19 13:30:42 -04:00
2016-10-17 05:10:23 -04:00
}
2022-11-01 11:00:02 -04:00
defer er . deleteAll ( context . Background ( ) , minioMetaTmpBucket , tmpID )
2019-04-25 10:33:26 -04:00
2016-10-17 05:10:23 -04:00
// Rename from tmp location to the actual location.
2020-07-21 16:54:06 -04:00
for i , disk := range outDatedDisks {
2020-06-12 23:04:01 -04:00
if disk == OfflineDisk {
2016-08-31 14:42:57 -04:00
continue
2016-08-17 14:36:33 -04:00
}
2021-11-21 13:41:30 -05:00
2021-04-20 13:44:39 -04:00
// record the index of the updated disks
partsMetadata [ i ] . Erasure . Index = i + 1
2016-10-17 05:10:23 -04:00
// Attempt a rename now from healed data to final location.
2023-04-09 13:25:37 -04:00
partsMetadata [ i ] . SetHealing ( )
2022-09-05 19:51:37 -04:00
if _ , err = disk . RenameData ( ctx , minioMetaTmpBucket , tmpID , partsMetadata [ i ] , bucket , object ) ; err != nil {
2022-06-20 11:07:45 -04:00
return result , err
2016-08-17 14:36:33 -04:00
}
2018-01-22 17:54:55 -05:00
2022-08-03 02:10:22 -04:00
// - Remove any parts from healed disks after its been inlined.
// - Remove any remaining parts from outdated disks from before transition.
if recreate || partsMetadata [ i ] . IsRemote ( ) {
2021-08-23 16:14:55 -04:00
rmDataDir := partsMetadata [ i ] . DataDir
disk . DeleteVol ( ctx , pathJoin ( bucket , encodeDirObject ( object ) , rmDataDir ) , true )
}
2018-03-27 21:11:39 -04:00
for i , v := range result . Before . Drives {
if v . Endpoint == disk . String ( ) {
result . After . Drives [ i ] . State = madmin . DriveStateOk
2018-02-15 20:45:57 -05:00
}
}
2016-08-17 14:36:33 -04:00
}
2018-01-22 17:54:55 -05:00
2021-12-24 02:01:46 -05:00
if ! diskMTime . Equal ( timeSentinel ) && ! diskMTime . IsZero ( ) {
// Update metadata to indicate special fix.
_ , err = er . PutObjectMetadata ( ctx , bucket , object , ObjectOptions {
NoLock : true ,
UserDefined : map [ string ] string {
reservedMetadataPrefixLowerDataShardFix : "true" ,
// another reserved metadata to capture original disk-mtime
// captured for this version of the object, to be used
// possibly in future to heal other versions if possible.
ReservedMetadataPrefixLower + "disk-mtime" : diskMTime . String ( ) ,
} ,
} )
}
2018-01-22 17:54:55 -05:00
return result , nil
2016-08-17 14:36:33 -04:00
}
2016-11-16 19:42:23 -05:00
2022-11-28 13:20:55 -05:00
// checkAbandonedParts will check if an object has abandoned parts,
// meaning data-dirs or inlined data that are no longer referenced by the xl.meta
// Errors are generally ignored by this function.
func ( er * erasureObjects ) checkAbandonedParts ( ctx context . Context , bucket string , object string , opts madmin . HealOpts ) ( err error ) {
if ! opts . Remove || opts . DryRun {
return nil
}
if globalTrace . NumSubscribers ( madmin . TraceHealing ) > 0 {
startTime := time . Now ( )
defer func ( ) {
2022-12-08 10:49:10 -05:00
healTrace ( healingMetricCheckAbandonedParts , startTime , bucket , object , nil , err , nil )
2022-11-28 13:20:55 -05:00
} ( )
}
if ! opts . NoLock {
lk := er . NewNSLock ( bucket , object )
lkctx , err := lk . GetLock ( ctx , globalOperationTimeout )
if err != nil {
return err
}
ctx = lkctx . Context ( )
2022-12-23 22:49:07 -05:00
defer lk . Unlock ( lkctx )
2022-11-28 13:20:55 -05:00
}
var wg sync . WaitGroup
for _ , disk := range er . getDisks ( ) {
if disk != nil {
wg . Add ( 1 )
go func ( disk StorageAPI ) {
defer wg . Done ( )
_ = disk . CleanAbandonedData ( ctx , bucket , object )
} ( disk )
}
}
wg . Wait ( )
return nil
}
2018-05-10 19:53:42 -04:00
// healObjectDir - heals object directory specifically, this special call
// is needed since we do not have a special backend format for directories.
2022-11-28 13:20:55 -05:00
func ( er * erasureObjects ) healObjectDir ( ctx context . Context , bucket , object string , dryRun bool , remove bool ) ( hr madmin . HealResultItem , err error ) {
2020-06-12 23:04:01 -04:00
storageDisks := er . getDisks ( )
storageEndpoints := er . getEndpoints ( )
2018-05-10 19:53:42 -04:00
// Initialize heal result object
hr = madmin . HealResultItem {
Type : madmin . HealItemObject ,
Bucket : bucket ,
Object : object ,
DiskCount : len ( storageDisks ) ,
2021-01-16 15:08:02 -05:00
ParityBlocks : er . defaultParityCount ,
DataBlocks : len ( storageDisks ) - er . defaultParityCount ,
2018-05-10 19:53:42 -04:00
ObjectSize : 0 ,
}
2019-01-30 13:51:56 -05:00
hr . Before . Drives = make ( [ ] madmin . HealDriveInfo , len ( storageDisks ) )
hr . After . Drives = make ( [ ] madmin . HealDriveInfo , len ( storageDisks ) )
2019-04-23 17:54:28 -04:00
errs := statAllDirs ( ctx , storageDisks , bucket , object )
2020-03-30 12:48:24 -04:00
danglingObject := isObjectDirDangling ( errs )
if danglingObject {
if ! dryRun && remove {
2020-06-12 23:04:01 -04:00
var wg sync . WaitGroup
// Remove versions in bulk for each disk
for index , disk := range storageDisks {
if disk == nil {
continue
}
wg . Add ( 1 )
go func ( index int , disk StorageAPI ) {
defer wg . Done ( )
2022-07-11 12:15:54 -04:00
_ = disk . Delete ( ctx , bucket , object , DeleteOptions {
Recursive : false ,
2023-11-29 01:35:16 -05:00
Immediate : false ,
2022-07-11 12:15:54 -04:00
} )
2020-06-12 23:04:01 -04:00
} ( index , disk )
}
wg . Wait ( )
2019-04-23 17:54:28 -04:00
}
}
2018-05-10 19:53:42 -04:00
2019-04-23 17:54:28 -04:00
// Prepare object creation in all disks
for i , err := range errs {
2021-09-29 14:36:19 -04:00
drive := storageEndpoints [ i ] . String ( )
2019-04-23 17:54:28 -04:00
switch err {
2019-08-01 17:13:06 -04:00
case nil :
2020-03-30 12:48:24 -04:00
hr . Before . Drives [ i ] = madmin . HealDriveInfo { Endpoint : drive , State : madmin . DriveStateOk }
hr . After . Drives [ i ] = madmin . HealDriveInfo { Endpoint : drive , State : madmin . DriveStateOk }
2019-04-23 17:54:28 -04:00
case errDiskNotFound :
hr . Before . Drives [ i ] = madmin . HealDriveInfo { State : madmin . DriveStateOffline }
hr . After . Drives [ i ] = madmin . HealDriveInfo { State : madmin . DriveStateOffline }
2019-08-01 17:13:06 -04:00
case errVolumeNotFound , errFileNotFound :
// Bucket or prefix/directory not found
2019-04-23 17:54:28 -04:00
hr . Before . Drives [ i ] = madmin . HealDriveInfo { Endpoint : drive , State : madmin . DriveStateMissing }
hr . After . Drives [ i ] = madmin . HealDriveInfo { Endpoint : drive , State : madmin . DriveStateMissing }
default :
hr . Before . Drives [ i ] = madmin . HealDriveInfo { Endpoint : drive , State : madmin . DriveStateCorrupt }
hr . After . Drives [ i ] = madmin . HealDriveInfo { Endpoint : drive , State : madmin . DriveStateCorrupt }
}
}
2022-06-20 11:07:45 -04:00
if danglingObject || isAllNotFound ( errs ) {
2020-11-23 21:50:53 -05:00
// Nothing to do, file is already gone.
2022-06-20 11:07:45 -04:00
return hr , errFileNotFound
}
if dryRun {
// Quit without try to heal the object dir
2021-12-28 18:33:03 -05:00
return hr , nil
2019-04-23 17:54:28 -04:00
}
2022-06-20 11:07:45 -04:00
2019-04-23 17:54:28 -04:00
for i , err := range errs {
2020-03-30 12:48:24 -04:00
if err == errVolumeNotFound || err == errFileNotFound {
2019-08-01 17:13:06 -04:00
// Bucket or prefix/directory not found
2020-09-04 12:45:06 -04:00
merr := storageDisks [ i ] . MakeVol ( ctx , pathJoin ( bucket , object ) )
2019-04-23 17:54:28 -04:00
switch merr {
case nil , errVolumeExists :
hr . After . Drives [ i ] . State = madmin . DriveStateOk
case errDiskNotFound :
hr . After . Drives [ i ] . State = madmin . DriveStateOffline
2019-01-30 13:51:56 -05:00
default :
2019-04-23 17:54:28 -04:00
hr . After . Drives [ i ] . State = madmin . DriveStateCorrupt
2018-05-10 19:53:42 -04:00
}
2019-04-23 17:54:28 -04:00
}
2018-05-10 19:53:42 -04:00
}
return hr , nil
}
2018-10-02 20:13:51 -04:00
// Populates default heal result item entries with possible values when we are returning prematurely.
// This is to ensure that in any circumstance we are not returning empty arrays with wrong values.
2022-11-28 13:20:55 -05:00
func ( er * erasureObjects ) defaultHealResult ( lfi FileInfo , storageDisks [ ] StorageAPI , storageEndpoints [ ] Endpoint , errs [ ] error , bucket , object , versionID string ) madmin . HealResultItem {
2018-10-02 20:13:51 -04:00
// Initialize heal result object
result := madmin . HealResultItem {
2021-07-26 11:01:41 -04:00
Type : madmin . HealItemObject ,
Bucket : bucket ,
Object : object ,
ObjectSize : lfi . Size ,
VersionID : versionID ,
DiskCount : len ( storageDisks ) ,
2018-10-02 20:13:51 -04:00
}
2021-05-25 12:34:27 -04:00
2020-11-23 12:12:17 -05:00
if lfi . IsValid ( ) {
2021-05-24 16:39:38 -04:00
result . ParityBlocks = lfi . Erasure . ParityBlocks
} else {
// Default to most common configuration for erasure blocks.
2021-07-26 11:01:41 -04:00
result . ParityBlocks = er . defaultParityCount
2021-05-24 16:39:38 -04:00
}
2021-05-25 12:34:27 -04:00
result . DataBlocks = len ( storageDisks ) - result . ParityBlocks
2021-05-24 16:39:38 -04:00
2018-10-02 20:13:51 -04:00
for index , disk := range storageDisks {
if disk == nil {
result . Before . Drives = append ( result . Before . Drives , madmin . HealDriveInfo {
2020-06-10 20:10:31 -04:00
UUID : "" ,
2021-09-29 14:36:19 -04:00
Endpoint : storageEndpoints [ index ] . String ( ) ,
2020-06-10 20:10:31 -04:00
State : madmin . DriveStateOffline ,
2018-10-02 20:13:51 -04:00
} )
result . After . Drives = append ( result . After . Drives , madmin . HealDriveInfo {
2020-06-10 20:10:31 -04:00
UUID : "" ,
2021-09-29 14:36:19 -04:00
Endpoint : storageEndpoints [ index ] . String ( ) ,
2020-06-10 20:10:31 -04:00
State : madmin . DriveStateOffline ,
2018-10-02 20:13:51 -04:00
} )
continue
}
driveState := madmin . DriveStateCorrupt
switch errs [ index ] {
case errFileNotFound , errVolumeNotFound :
driveState = madmin . DriveStateMissing
2021-07-26 11:01:41 -04:00
case nil :
driveState = madmin . DriveStateOk
2018-10-02 20:13:51 -04:00
}
result . Before . Drives = append ( result . Before . Drives , madmin . HealDriveInfo {
UUID : "" ,
2021-09-29 14:36:19 -04:00
Endpoint : storageEndpoints [ index ] . String ( ) ,
2018-10-02 20:13:51 -04:00
State : driveState ,
} )
result . After . Drives = append ( result . After . Drives , madmin . HealDriveInfo {
UUID : "" ,
2021-09-29 14:36:19 -04:00
Endpoint : storageEndpoints [ index ] . String ( ) ,
2018-10-02 20:13:51 -04:00
State : driveState ,
} )
}
return result
}
2019-04-23 17:54:28 -04:00
// Stat all directories.
func statAllDirs ( ctx context . Context , storageDisks [ ] StorageAPI , bucket , prefix string ) [ ] error {
2019-10-14 12:44:51 -04:00
g := errgroup . WithNErrs ( len ( storageDisks ) )
2019-04-23 17:54:28 -04:00
for index , disk := range storageDisks {
if disk == nil {
continue
}
2019-10-14 12:44:51 -04:00
index := index
g . Go ( func ( ) error {
2020-09-04 12:45:06 -04:00
entries , err := storageDisks [ index ] . ListDir ( ctx , bucket , prefix , 1 )
2019-04-23 17:54:28 -04:00
if err != nil {
2019-10-14 12:44:51 -04:00
return err
2019-04-23 17:54:28 -04:00
}
if len ( entries ) > 0 {
2019-10-14 12:44:51 -04:00
return errVolumeNotEmpty
2019-04-23 17:54:28 -04:00
}
2019-10-14 12:44:51 -04:00
return nil
} , index )
2019-04-23 17:54:28 -04:00
}
2019-10-14 12:44:51 -04:00
return g . Wait ( )
2019-04-23 17:54:28 -04:00
}
2020-10-28 12:18:35 -04:00
// isAllNotFound will return if any element of the error slice is not
// errFileNotFound, errFileVersionNotFound or errVolumeNotFound.
// A 0 length slice will always return false.
func isAllNotFound ( errs [ ] error ) bool {
for _ , err := range errs {
2021-12-28 18:33:03 -05:00
if err != nil {
switch err . Error ( ) {
case errFileNotFound . Error ( ) :
fallthrough
case errVolumeNotFound . Error ( ) :
fallthrough
case errFileVersionNotFound . Error ( ) :
continue
}
2020-10-28 12:18:35 -04:00
}
return false
}
return len ( errs ) > 0
}
2019-04-23 17:54:28 -04:00
// ObjectDir is considered dangling/corrupted if any only
// if total disks - a combination of corrupted and missing
// files is lesser than N/2+1 number of disks.
2020-10-28 12:18:35 -04:00
// If no files were found false will be returned.
2019-04-23 17:54:28 -04:00
func isObjectDirDangling ( errs [ ] error ) ( ok bool ) {
2020-03-30 12:48:24 -04:00
var found int
var notFound int
var foundNotEmpty int
var otherFound int
2019-04-23 17:54:28 -04:00
for _ , readErr := range errs {
2023-03-06 11:56:10 -05:00
switch {
case readErr == nil :
2020-03-30 12:48:24 -04:00
found ++
2023-03-06 11:56:10 -05:00
case readErr == errFileNotFound || readErr == errVolumeNotFound :
2020-03-30 12:48:24 -04:00
notFound ++
2023-03-06 11:56:10 -05:00
case readErr == errVolumeNotEmpty :
2020-03-30 12:48:24 -04:00
foundNotEmpty ++
2023-03-06 11:56:10 -05:00
default :
2020-03-30 12:48:24 -04:00
otherFound ++
2019-04-23 17:54:28 -04:00
}
}
2020-10-28 12:18:35 -04:00
found = found + foundNotEmpty + otherFound
return found < notFound && found > 0
2019-04-23 17:54:28 -04:00
}
2023-12-06 01:34:46 -05:00
// Object is considered dangling/corrupted if any only
2019-02-05 20:58:48 -05:00
// if total disks - a combination of corrupted and missing
// files is lesser than number of data blocks.
2020-06-12 23:04:01 -04:00
func isObjectDangling ( metaArr [ ] FileInfo , errs [ ] error , dataErrs [ ] error ) ( validMeta FileInfo , ok bool ) {
2019-02-05 20:58:48 -05:00
// We can consider an object data not reliable
2022-01-07 22:11:54 -05:00
// when xl.meta is not found in read quorum disks.
// or when xl.meta is not readable in read quorum disks.
2023-01-13 01:20:19 -05:00
danglingErrsCount := func ( cerrs [ ] error ) ( int , int , int ) {
2022-01-07 22:11:54 -05:00
var (
2023-01-13 01:20:19 -05:00
notFoundCount int
corruptedCount int
diskNotFoundCount int
2022-01-07 22:11:54 -05:00
)
for _ , readErr := range cerrs {
2023-03-06 11:56:10 -05:00
switch {
case errors . Is ( readErr , errFileNotFound ) || errors . Is ( readErr , errFileVersionNotFound ) :
2022-01-07 22:11:54 -05:00
notFoundCount ++
2023-03-06 11:56:10 -05:00
case errors . Is ( readErr , errFileCorrupt ) :
2022-01-07 22:11:54 -05:00
corruptedCount ++
2023-03-06 11:56:10 -05:00
case errors . Is ( readErr , errDiskNotFound ) :
2023-01-13 01:20:19 -05:00
diskNotFoundCount ++
2022-01-07 22:11:54 -05:00
}
2019-02-05 20:58:48 -05:00
}
2023-01-13 01:20:19 -05:00
return notFoundCount , corruptedCount , diskNotFoundCount
2019-02-05 20:58:48 -05:00
}
2022-01-07 22:11:54 -05:00
ndataErrs := make ( [ ] error , len ( dataErrs ) )
2019-03-26 17:57:44 -04:00
for i := range dataErrs {
if errs [ i ] != dataErrs [ i ] {
2022-01-07 22:11:54 -05:00
// Only count part errors, if the error is not
// same as xl.meta error. This is to avoid
// double counting when both parts and xl.meta
// are not available.
ndataErrs [ i ] = dataErrs [ i ]
2019-03-26 17:57:44 -04:00
}
}
2019-02-05 20:58:48 -05:00
2023-01-13 01:20:19 -05:00
notFoundMetaErrs , corruptedMetaErrs , driveNotFoundMetaErrs := danglingErrsCount ( errs )
notFoundPartsErrs , corruptedPartsErrs , driveNotFoundPartsErrs := danglingErrsCount ( ndataErrs )
2022-01-07 22:11:54 -05:00
2019-02-05 20:58:48 -05:00
for _ , m := range metaArr {
2022-01-07 22:11:54 -05:00
if m . IsValid ( ) {
validMeta = m
break
2019-02-05 20:58:48 -05:00
}
}
2022-01-07 22:11:54 -05:00
if ! validMeta . IsValid ( ) {
2023-09-29 12:08:24 -04:00
// validMeta is invalid because notFoundPartsErrs is
// greater than parity blocks, thus invalidating the FileInfo{}
// every dataErrs[i], metaArr[i] is an empty FileInfo{}
dataBlocks := ( len ( ndataErrs ) + 1 ) / 2
if notFoundPartsErrs > dataBlocks {
// Not using parity to ensure that we do not delete
// any valid content, if any is recoverable. But if
// notFoundDataDirs are already greater than the data
// blocks all bets are off and it is safe to purge.
//
// This is purely a defensive code, ideally parityBlocks
// is sufficient, however we can't know that since we
// do have the FileInfo{}.
return validMeta , true
}
2022-01-07 22:11:54 -05:00
// We have no idea what this file is, leave it as is.
return validMeta , false
}
2023-06-18 21:20:15 -04:00
if driveNotFoundMetaErrs > 0 || driveNotFoundPartsErrs > 0 {
return validMeta , false
}
2022-01-07 22:11:54 -05:00
if validMeta . Deleted {
// notFoundPartsErrs is ignored since
2021-01-20 16:12:12 -05:00
// - delete marker does not have any parts
2022-01-07 22:11:54 -05:00
return validMeta , corruptedMetaErrs + notFoundMetaErrs > len ( errs ) / 2
2020-07-07 23:54:09 -04:00
}
2022-01-07 22:11:54 -05:00
totalErrs := notFoundMetaErrs + corruptedMetaErrs + notFoundPartsErrs + corruptedPartsErrs
if validMeta . IsRemote ( ) {
// notFoundPartsErrs is ignored since
// - transition status of complete has no parts
totalErrs = notFoundMetaErrs + corruptedMetaErrs
2019-02-05 20:58:48 -05:00
}
2019-03-26 17:57:44 -04:00
// We have valid meta, now verify if we have enough files with parity blocks.
2022-01-07 22:11:54 -05:00
return validMeta , totalErrs > validMeta . Erasure . ParityBlocks
2019-02-05 20:58:48 -05:00
}
2019-03-26 17:57:44 -04:00
// HealObject - heal the given object, automatically deletes the object if stale/corrupted if `remove` is true.
2020-06-12 23:04:01 -04:00
func ( er erasureObjects ) HealObject ( ctx context . Context , bucket , object , versionID string , opts madmin . HealOpts ) ( hr madmin . HealResultItem , err error ) {
2018-08-20 19:58:47 -04:00
// Create context that also contains information about the object and bucket.
// The top level handler might not have this information.
reqInfo := logger . GetReqInfo ( ctx )
var newReqInfo * logger . ReqInfo
if reqInfo != nil {
2018-11-19 17:47:03 -05:00
newReqInfo = logger . NewReqInfo ( reqInfo . RemoteHost , reqInfo . UserAgent , reqInfo . DeploymentID , reqInfo . RequestID , reqInfo . API , bucket , object )
2018-08-20 19:58:47 -04:00
} else {
2023-10-18 11:06:57 -04:00
newReqInfo = logger . NewReqInfo ( "" , "" , globalDeploymentID ( ) , "" , "Heal" , bucket , object )
2018-08-20 19:58:47 -04:00
}
2020-04-09 12:30:02 -04:00
healCtx := logger . SetReqInfo ( GlobalContext , newReqInfo )
2018-08-20 19:58:47 -04:00
2018-05-10 19:53:42 -04:00
// Healing directories handle it separately.
2019-12-06 02:16:06 -05:00
if HasSuffix ( object , SlashSeparator ) {
2022-06-20 11:07:45 -04:00
hr , err := er . healObjectDir ( healCtx , bucket , object , opts . DryRun , opts . Remove )
return hr , toObjectErr ( err , bucket , object )
2018-05-10 19:53:42 -04:00
}
Revert heal locks (#12365)
A lot of healing is likely to be on non-existing objects and
locks are very expensive and will slow down scanning
significantly.
In cases where all are valid or, all are broken allow
rejection without locking.
Keep the existing behavior, but move the check for
dangling objects to after the lock has been acquired.
```
_, err = getLatestFileInfo(ctx, partsMetadata, errs)
if err != nil {
return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, []error{}, opts)
}
```
Revert "heal: Hold lock when reading xl.meta from disks (#12362)"
This reverts commit abd32065aaae4080396a1b4b04a110454368b028
2021-05-25 20:02:06 -04:00
storageDisks := er . getDisks ( )
storageEndpoints := er . getEndpoints ( )
2021-03-29 20:00:55 -04:00
// When versionID is empty, we read directly from the `null` versionID for healing.
if versionID == "" {
versionID = nullVersionID
}
Revert heal locks (#12365)
A lot of healing is likely to be on non-existing objects and
locks are very expensive and will slow down scanning
significantly.
In cases where all are valid or, all are broken allow
rejection without locking.
Keep the existing behavior, but move the check for
dangling objects to after the lock has been acquired.
```
_, err = getLatestFileInfo(ctx, partsMetadata, errs)
if err != nil {
return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, []error{}, opts)
}
```
Revert "heal: Hold lock when reading xl.meta from disks (#12362)"
This reverts commit abd32065aaae4080396a1b4b04a110454368b028
2021-05-25 20:02:06 -04:00
// Perform quick read without lock.
// This allows to quickly check if all is ok or all are missing.
2023-11-21 00:33:47 -05:00
_ , errs := readAllFileInfo ( healCtx , storageDisks , bucket , object , versionID , false , false )
2020-10-28 12:18:35 -04:00
if isAllNotFound ( errs ) {
2022-06-20 11:07:45 -04:00
err := errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
2020-11-23 12:12:17 -05:00
// Nothing to do, file is already gone.
2021-07-26 11:01:41 -04:00
return er . defaultHealResult ( FileInfo { } , storageDisks , storageEndpoints ,
2022-06-20 11:07:45 -04:00
errs , bucket , object , versionID ) , toObjectErr ( err , bucket , object , versionID )
2019-02-05 20:58:48 -05:00
}
2016-11-16 19:42:23 -05:00
// Heal the object.
2022-03-04 21:24:34 -05:00
hr , err = er . healObject ( healCtx , bucket , object , versionID , opts )
if errors . Is ( err , errFileCorrupt ) && opts . ScanMode != madmin . HealDeepScan {
// Instead of returning an error when a bitrot error is detected
// during a normal heal scan, heal again with bitrot flag enabled.
opts . ScanMode = madmin . HealDeepScan
hr , err = er . healObject ( healCtx , bucket , object , versionID , opts )
}
2022-06-20 11:07:45 -04:00
return hr , toObjectErr ( err , bucket , object , versionID )
2016-11-16 19:42:23 -05:00
}
2022-08-31 04:56:12 -04:00
// healTrace sends healing results to trace output.
2022-12-08 10:49:10 -05:00
func healTrace ( funcName healingMetric , startTime time . Time , bucket , object string , opts * madmin . HealOpts , err error , result * madmin . HealResultItem ) {
2022-08-31 04:56:12 -04:00
tr := madmin . TraceInfo {
TraceType : madmin . TraceHealing ,
Time : startTime ,
NodeName : globalLocalNodeName ,
2022-08-31 15:28:03 -04:00
FuncName : "heal." + funcName . String ( ) ,
2022-08-31 04:56:12 -04:00
Duration : time . Since ( startTime ) ,
2022-08-31 15:28:03 -04:00
Path : pathJoin ( bucket , decodeDirObject ( object ) ) ,
2022-08-31 04:56:12 -04:00
}
2022-11-28 13:20:55 -05:00
if opts != nil {
2023-02-21 12:33:33 -05:00
tr . Custom = map [ string ] string {
"dry" : fmt . Sprint ( opts . DryRun ) ,
"remove" : fmt . Sprint ( opts . Remove ) ,
"recreate" : fmt . Sprint ( opts . Recreate ) ,
"mode" : fmt . Sprint ( opts . ScanMode ) ,
}
2023-11-22 15:30:31 -05:00
if result != nil {
tr . Custom [ "version-id" ] = result . VersionID
tr . Custom [ "disks" ] = strconv . Itoa ( result . DiskCount )
}
2022-11-28 13:20:55 -05:00
}
2022-08-31 04:56:12 -04:00
if err != nil {
tr . Error = err . Error ( )
} else {
tr . HealResult = result
}
globalTrace . Publish ( tr )
}