2021-07-16 01:32:06 -04:00
|
|
|
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
|
|
//
|
|
|
|
// This file is part of MinIO Object Storage stack
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
package cmd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
2023-06-19 20:53:08 -04:00
|
|
|
"github.com/minio/madmin-go/v3"
|
2021-07-16 01:32:06 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2023-04-19 10:47:42 -04:00
|
|
|
mrfOpsQueueSize = 100000
|
2021-07-16 01:32:06 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
// partialOperation is a successful upload/delete of an object
|
|
|
|
// but not written in all disks (having quorum)
|
|
|
|
type partialOperation struct {
|
2023-06-24 22:31:04 -04:00
|
|
|
bucket string
|
|
|
|
object string
|
|
|
|
versionID string
|
|
|
|
allVersions bool
|
|
|
|
setIndex, poolIndex int
|
|
|
|
queued time.Time
|
2023-12-08 15:26:01 -05:00
|
|
|
scanMode madmin.HealScanMode
|
2021-07-16 01:32:06 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// mrfState sncapsulates all the information
|
|
|
|
// related to the global background MRF.
|
|
|
|
type mrfState struct {
|
2023-06-24 22:31:04 -04:00
|
|
|
ctx context.Context
|
|
|
|
pools *erasureServerPools
|
2021-07-16 01:32:06 -04:00
|
|
|
|
2024-01-30 17:10:06 -05:00
|
|
|
mu sync.RWMutex
|
2023-04-19 10:47:42 -04:00
|
|
|
opCh chan partialOperation
|
2021-07-16 01:32:06 -04:00
|
|
|
}
|
|
|
|
|
2021-07-26 11:00:59 -04:00
|
|
|
// Initialize healing MRF subsystem
|
|
|
|
func (m *mrfState) init(ctx context.Context, objAPI ObjectLayer) {
|
|
|
|
m.mu.Lock()
|
|
|
|
defer m.mu.Unlock()
|
|
|
|
|
|
|
|
m.ctx = ctx
|
|
|
|
m.opCh = make(chan partialOperation, mrfOpsQueueSize)
|
|
|
|
|
2023-06-24 22:31:04 -04:00
|
|
|
var ok bool
|
|
|
|
m.pools, ok = objAPI.(*erasureServerPools)
|
|
|
|
if ok {
|
|
|
|
go m.healRoutine()
|
|
|
|
}
|
2021-07-26 11:00:59 -04:00
|
|
|
}
|
|
|
|
|
2021-07-16 01:32:06 -04:00
|
|
|
// Add a partial S3 operation (put/delete) when one or more disks are offline.
|
|
|
|
func (m *mrfState) addPartialOp(op partialOperation) {
|
2023-04-19 10:47:42 -04:00
|
|
|
if m == nil {
|
2021-07-16 01:32:06 -04:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-01-30 17:10:06 -05:00
|
|
|
m.mu.RLock()
|
|
|
|
defer m.mu.RUnlock()
|
|
|
|
|
2021-07-16 01:32:06 -04:00
|
|
|
select {
|
|
|
|
case m.opCh <- op:
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-19 10:47:42 -04:00
|
|
|
var healSleeper = newDynamicSleeper(5, time.Second, false)
|
2021-07-16 01:32:06 -04:00
|
|
|
|
|
|
|
// healRoutine listens to new disks reconnection events and
|
|
|
|
// issues healing requests for queued objects belonging to the
|
|
|
|
// corresponding erasure set
|
|
|
|
func (m *mrfState) healRoutine() {
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-m.ctx.Done():
|
|
|
|
return
|
2023-04-19 10:47:42 -04:00
|
|
|
case u, ok := <-m.opCh:
|
|
|
|
if !ok {
|
|
|
|
return
|
2021-07-16 01:32:06 -04:00
|
|
|
}
|
|
|
|
|
2023-04-19 10:47:42 -04:00
|
|
|
now := time.Now()
|
|
|
|
if now.Sub(u.queued) < time.Second {
|
|
|
|
// let recently failed networks to reconnect
|
|
|
|
// making MRF wait for 1s before retrying,
|
|
|
|
// i.e 4 reconnect attempts.
|
2023-12-04 14:33:39 -05:00
|
|
|
time.Sleep(time.Second)
|
2021-07-16 01:32:06 -04:00
|
|
|
}
|
|
|
|
|
2023-04-19 10:47:42 -04:00
|
|
|
// wait on timer per heal
|
|
|
|
wait := healSleeper.Timer(context.Background())
|
|
|
|
|
2023-12-08 15:26:01 -05:00
|
|
|
scan := madmin.HealNormalScan
|
|
|
|
if u.scanMode != 0 {
|
|
|
|
scan = u.scanMode
|
|
|
|
}
|
2023-04-19 10:47:42 -04:00
|
|
|
if u.object == "" {
|
2023-12-08 15:26:01 -05:00
|
|
|
healBucket(u.bucket, scan)
|
2023-04-19 10:47:42 -04:00
|
|
|
} else {
|
2023-06-24 22:31:04 -04:00
|
|
|
if u.allVersions {
|
2023-12-08 15:26:01 -05:00
|
|
|
m.pools.serverPools[u.poolIndex].sets[u.setIndex].listAndHeal(u.bucket, u.object, u.scanMode, healObjectVersionsDisparity)
|
2023-06-24 22:31:04 -04:00
|
|
|
} else {
|
2023-12-08 15:26:01 -05:00
|
|
|
healObject(u.bucket, u.object, u.versionID, scan)
|
2023-06-24 22:31:04 -04:00
|
|
|
}
|
2021-07-16 01:32:06 -04:00
|
|
|
}
|
2021-08-25 20:46:20 -04:00
|
|
|
|
2023-04-19 10:47:42 -04:00
|
|
|
wait()
|
2021-07-16 01:32:06 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Initialize healing MRF
|
|
|
|
func initHealMRF(ctx context.Context, obj ObjectLayer) {
|
2021-07-26 11:00:59 -04:00
|
|
|
globalMRFState.init(ctx, obj)
|
2021-07-16 01:32:06 -04:00
|
|
|
}
|