mirror of https://github.com/minio/minio.git
initialize the disk healer early on (#19143)
This PR fixes a bug that perhaps has been long introduced, with no visible workarounds. In any deployment, if an entire erasure set is deleted, there is no way the cluster recovers.
This commit is contained in:
parent
0aae0180fb
commit
9a012a53ef
|
@ -83,7 +83,7 @@ function start_minio_3_node() {
|
||||||
}
|
}
|
||||||
|
|
||||||
function check_online() {
|
function check_online() {
|
||||||
if grep -q 'Unable to initialize sub-systems' ${WORK_DIR}/dist-minio-*.log; then
|
if ! grep -q 'Status:' ${WORK_DIR}/dist-minio-*.log; then
|
||||||
echo "1"
|
echo "1"
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
@ -109,6 +109,7 @@ function perform_test() {
|
||||||
|
|
||||||
rm -rf ${WORK_DIR}/${1}/*/
|
rm -rf ${WORK_DIR}/${1}/*/
|
||||||
|
|
||||||
|
set -x
|
||||||
start_minio_3_node 120 $2
|
start_minio_3_node 120 $2
|
||||||
|
|
||||||
rv=$(check_online)
|
rv=$(check_online)
|
||||||
|
|
|
@ -102,7 +102,6 @@ func waitForLowHTTPReq() {
|
||||||
|
|
||||||
func initBackgroundHealing(ctx context.Context, objAPI ObjectLayer) {
|
func initBackgroundHealing(ctx context.Context, objAPI ObjectLayer) {
|
||||||
// Run the background healer
|
// Run the background healer
|
||||||
globalBackgroundHealRoutine = newHealRoutine()
|
|
||||||
for i := 0; i < globalBackgroundHealRoutine.workers; i++ {
|
for i := 0; i < globalBackgroundHealRoutine.workers; i++ {
|
||||||
go globalBackgroundHealRoutine.AddWorker(ctx, objAPI)
|
go globalBackgroundHealRoutine.AddWorker(ctx, objAPI)
|
||||||
}
|
}
|
||||||
|
|
|
@ -376,26 +376,8 @@ func getLocalDisksToHeal() (disksToHeal Endpoints) {
|
||||||
var newDiskHealingTimeout = newDynamicTimeout(30*time.Second, 10*time.Second)
|
var newDiskHealingTimeout = newDynamicTimeout(30*time.Second, 10*time.Second)
|
||||||
|
|
||||||
func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint) error {
|
func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint) error {
|
||||||
disk, format, _, err := connectEndpoint(endpoint)
|
poolIdx, setIdx := endpoint.PoolIdx, endpoint.SetIdx
|
||||||
if err != nil {
|
disk := getStorageViaEndpoint(endpoint)
|
||||||
return fmt.Errorf("Error: %w, %s", err, endpoint)
|
|
||||||
}
|
|
||||||
defer disk.Close()
|
|
||||||
poolIdx := globalEndpoints.GetLocalPoolIdx(disk.Endpoint())
|
|
||||||
if poolIdx < 0 {
|
|
||||||
return fmt.Errorf("unexpected pool index (%d) found for %s", poolIdx, disk.Endpoint())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate the set index where the current endpoint belongs
|
|
||||||
z.serverPools[poolIdx].erasureDisksMu.RLock()
|
|
||||||
setIdx, _, err := findDiskIndex(z.serverPools[poolIdx].format, format)
|
|
||||||
z.serverPools[poolIdx].erasureDisksMu.RUnlock()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if setIdx < 0 {
|
|
||||||
return fmt.Errorf("unexpected set index (%d) found for %s", setIdx, disk.Endpoint())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Prevent parallel erasure set healing
|
// Prevent parallel erasure set healing
|
||||||
locker := z.NewNSLock(minioMetaBucket, fmt.Sprintf("new-drive-healing/%d/%d", poolIdx, setIdx))
|
locker := z.NewNSLock(minioMetaBucket, fmt.Sprintf("new-drive-healing/%d/%d", poolIdx, setIdx))
|
||||||
|
|
|
@ -145,7 +145,7 @@ func (es *expiryState) enqueueByNewerNoncurrent(bucket string, versions []Object
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var globalExpiryState *expiryState
|
var globalExpiryState = newExpiryState()
|
||||||
|
|
||||||
func newExpiryState() *expiryState {
|
func newExpiryState() *expiryState {
|
||||||
return &expiryState{
|
return &expiryState{
|
||||||
|
@ -155,8 +155,6 @@ func newExpiryState() *expiryState {
|
||||||
}
|
}
|
||||||
|
|
||||||
func initBackgroundExpiry(ctx context.Context, objectAPI ObjectLayer) {
|
func initBackgroundExpiry(ctx context.Context, objectAPI ObjectLayer) {
|
||||||
globalExpiryState = newExpiryState()
|
|
||||||
|
|
||||||
workerSize, _ := strconv.Atoi(env.Get("_MINIO_ILM_EXPIRY_WORKERS", strconv.Itoa((runtime.GOMAXPROCS(0)+1)/2)))
|
workerSize, _ := strconv.Atoi(env.Get("_MINIO_ILM_EXPIRY_WORKERS", strconv.Itoa((runtime.GOMAXPROCS(0)+1)/2)))
|
||||||
if workerSize == 0 {
|
if workerSize == 0 {
|
||||||
workerSize = 4
|
workerSize = 4
|
||||||
|
@ -185,6 +183,7 @@ func initBackgroundExpiry(ctx context.Context, objectAPI ObjectLayer) {
|
||||||
}
|
}
|
||||||
ewk.Wait()
|
ewk.Wait()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
for t := range globalExpiryState.byNewerNoncurrentCh {
|
for t := range globalExpiryState.byNewerNoncurrentCh {
|
||||||
nwk.Take()
|
nwk.Take()
|
||||||
|
|
|
@ -88,8 +88,6 @@ func init() {
|
||||||
logger.Init(GOPATH, GOROOT)
|
logger.Init(GOPATH, GOROOT)
|
||||||
logger.RegisterError(config.FmtError)
|
logger.RegisterError(config.FmtError)
|
||||||
|
|
||||||
initGlobalContext()
|
|
||||||
|
|
||||||
globalBatchJobsMetrics = batchJobMetrics{metrics: make(map[string]*batchJobInfo)}
|
globalBatchJobsMetrics = batchJobMetrics{metrics: make(map[string]*batchJobInfo)}
|
||||||
go globalBatchJobsMetrics.purgeJobMetrics()
|
go globalBatchJobsMetrics.purgeJobMetrics()
|
||||||
|
|
||||||
|
|
|
@ -1020,7 +1020,7 @@ func (i *scannerItem) applyTierObjSweep(ctx context.Context, o ObjectLayer, oi O
|
||||||
|
|
||||||
// applyNewerNoncurrentVersionLimit removes noncurrent versions older than the most recent NewerNoncurrentVersions configured.
|
// applyNewerNoncurrentVersionLimit removes noncurrent versions older than the most recent NewerNoncurrentVersions configured.
|
||||||
// Note: This function doesn't update sizeSummary since it always removes versions that it doesn't return.
|
// Note: This function doesn't update sizeSummary since it always removes versions that it doesn't return.
|
||||||
func (i *scannerItem) applyNewerNoncurrentVersionLimit(ctx context.Context, _ ObjectLayer, fivs []FileInfo) ([]ObjectInfo, error) {
|
func (i *scannerItem) applyNewerNoncurrentVersionLimit(ctx context.Context, _ ObjectLayer, fivs []FileInfo, expState *expiryState) ([]ObjectInfo, error) {
|
||||||
done := globalScannerMetrics.time(scannerMetricApplyNonCurrent)
|
done := globalScannerMetrics.time(scannerMetricApplyNonCurrent)
|
||||||
defer done()
|
defer done()
|
||||||
|
|
||||||
|
@ -1087,14 +1087,14 @@ func (i *scannerItem) applyNewerNoncurrentVersionLimit(ctx context.Context, _ Ob
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
globalExpiryState.enqueueByNewerNoncurrent(i.bucket, toDel, event)
|
expState.enqueueByNewerNoncurrent(i.bucket, toDel, event)
|
||||||
return objectInfos, nil
|
return objectInfos, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// applyVersionActions will apply lifecycle checks on all versions of a scanned item. Returns versions that remain
|
// applyVersionActions will apply lifecycle checks on all versions of a scanned item. Returns versions that remain
|
||||||
// after applying lifecycle checks configured.
|
// after applying lifecycle checks configured.
|
||||||
func (i *scannerItem) applyVersionActions(ctx context.Context, o ObjectLayer, fivs []FileInfo) ([]ObjectInfo, error) {
|
func (i *scannerItem) applyVersionActions(ctx context.Context, o ObjectLayer, fivs []FileInfo, expState *expiryState) ([]ObjectInfo, error) {
|
||||||
objInfos, err := i.applyNewerNoncurrentVersionLimit(ctx, o, fivs)
|
objInfos, err := i.applyNewerNoncurrentVersionLimit(ctx, o, fivs, expState)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,13 +39,13 @@ func TestApplyNewerNoncurrentVersionsLimit(t *testing.T) {
|
||||||
globalBucketMetadataSys = NewBucketMetadataSys()
|
globalBucketMetadataSys = NewBucketMetadataSys()
|
||||||
globalBucketObjectLockSys = &BucketObjectLockSys{}
|
globalBucketObjectLockSys = &BucketObjectLockSys{}
|
||||||
globalBucketVersioningSys = &BucketVersioningSys{}
|
globalBucketVersioningSys = &BucketVersioningSys{}
|
||||||
globalExpiryState = newExpiryState()
|
expiryState := newExpiryState()
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
expired := make([]ObjectToDelete, 0, 5)
|
expired := make([]ObjectToDelete, 0, 5)
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
for t := range globalExpiryState.byNewerNoncurrentCh {
|
for t := range expiryState.byNewerNoncurrentCh {
|
||||||
expired = append(expired, t.versions...)
|
expired = append(expired, t.versions...)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
@ -116,7 +116,7 @@ func TestApplyNewerNoncurrentVersionsLimit(t *testing.T) {
|
||||||
for i, fi := range fivs[:2] {
|
for i, fi := range fivs[:2] {
|
||||||
wants[i] = fi.ToObjectInfo(bucket, obj, versioned)
|
wants[i] = fi.ToObjectInfo(bucket, obj, versioned)
|
||||||
}
|
}
|
||||||
gots, err := item.applyNewerNoncurrentVersionLimit(context.TODO(), objAPI, fivs)
|
gots, err := item.applyNewerNoncurrentVersionLimit(context.TODO(), objAPI, fivs, expiryState)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed with err: %v", err)
|
t.Fatalf("Failed with err: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -125,7 +125,7 @@ func TestApplyNewerNoncurrentVersionsLimit(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close expiry state's channel to inspect object versions enqueued for expiration
|
// Close expiry state's channel to inspect object versions enqueued for expiration
|
||||||
close(globalExpiryState.byNewerNoncurrentCh)
|
close(expiryState.byNewerNoncurrentCh)
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
for _, obj := range expired {
|
for _, obj := range expired {
|
||||||
switch obj.ObjectV.VersionID {
|
switch obj.ObjectV.VersionID {
|
||||||
|
|
|
@ -175,8 +175,29 @@ func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServ
|
||||||
z.poolMeta = newPoolMeta(z, poolMeta{})
|
z.poolMeta = newPoolMeta(z, poolMeta{})
|
||||||
z.poolMeta.dontSave = true
|
z.poolMeta.dontSave = true
|
||||||
|
|
||||||
|
bootstrapTrace("newSharedLock", func() {
|
||||||
|
globalLeaderLock = newSharedLock(GlobalContext, z, "leader.lock")
|
||||||
|
})
|
||||||
|
|
||||||
|
// Enable background operations on
|
||||||
|
//
|
||||||
|
// - Disk auto healing
|
||||||
|
// - MRF (most recently failed) healing
|
||||||
|
// - Background expiration routine for lifecycle policies
|
||||||
|
bootstrapTrace("initAutoHeal", func() {
|
||||||
|
initAutoHeal(GlobalContext, z)
|
||||||
|
})
|
||||||
|
|
||||||
|
bootstrapTrace("initHealMRF", func() {
|
||||||
|
go globalMRFState.healRoutine(z)
|
||||||
|
})
|
||||||
|
|
||||||
|
bootstrapTrace("initBackgroundExpiry", func() {
|
||||||
|
initBackgroundExpiry(GlobalContext, z)
|
||||||
|
})
|
||||||
|
|
||||||
// initialize the object layer.
|
// initialize the object layer.
|
||||||
setObjectLayer(z)
|
defer setObjectLayer(z)
|
||||||
|
|
||||||
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||||
attempt := 1
|
attempt := 1
|
||||||
|
|
|
@ -224,7 +224,9 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
||||||
|
|
||||||
disks, _ := er.getOnlineDisksWithHealing(false)
|
disks, _ := er.getOnlineDisksWithHealing(false)
|
||||||
if len(disks) == 0 {
|
if len(disks) == 0 {
|
||||||
logger.LogIf(ctx, fmt.Errorf("no online disks found to heal the bucket `%s`", bucket))
|
// No object healing necessary
|
||||||
|
tracker.bucketDone(bucket)
|
||||||
|
logger.LogIf(ctx, tracker.update(ctx))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -472,9 +474,7 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
||||||
|
|
||||||
func healBucket(bucket string, scan madmin.HealScanMode) error {
|
func healBucket(bucket string, scan madmin.HealScanMode) error {
|
||||||
// Get background heal sequence to send elements to heal
|
// Get background heal sequence to send elements to heal
|
||||||
globalHealStateLK.Lock()
|
|
||||||
bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
|
bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
|
||||||
globalHealStateLK.Unlock()
|
|
||||||
if ok {
|
if ok {
|
||||||
return bgSeq.queueHealTask(healSource{bucket: bucket}, madmin.HealItemBucket)
|
return bgSeq.queueHealTask(healSource{bucket: bucket}, madmin.HealItemBucket)
|
||||||
}
|
}
|
||||||
|
@ -484,9 +484,7 @@ func healBucket(bucket string, scan madmin.HealScanMode) error {
|
||||||
// healObject sends the given object/version to the background healing workers
|
// healObject sends the given object/version to the background healing workers
|
||||||
func healObject(bucket, object, versionID string, scan madmin.HealScanMode) error {
|
func healObject(bucket, object, versionID string, scan madmin.HealScanMode) error {
|
||||||
// Get background heal sequence to send elements to heal
|
// Get background heal sequence to send elements to heal
|
||||||
globalHealStateLK.Lock()
|
|
||||||
bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
|
bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
|
||||||
globalHealStateLK.Unlock()
|
|
||||||
if ok {
|
if ok {
|
||||||
return bgSeq.queueHealTask(healSource{
|
return bgSeq.queueHealTask(healSource{
|
||||||
bucket: bucket,
|
bucket: bucket,
|
||||||
|
|
|
@ -379,13 +379,15 @@ var (
|
||||||
return *ptr
|
return *ptr
|
||||||
}
|
}
|
||||||
|
|
||||||
globalAllHealState *allHealState
|
globalAllHealState = newHealState(GlobalContext, true)
|
||||||
|
|
||||||
// The always present healing routine ready to heal objects
|
// The always present healing routine ready to heal objects
|
||||||
globalBackgroundHealRoutine *healRoutine
|
globalBackgroundHealRoutine = newHealRoutine()
|
||||||
globalBackgroundHealState *allHealState
|
globalBackgroundHealState = newHealState(GlobalContext, false)
|
||||||
|
|
||||||
globalMRFState mrfState
|
globalMRFState = mrfState{
|
||||||
|
opCh: make(chan partialOperation, mrfOpsQueueSize),
|
||||||
|
}
|
||||||
|
|
||||||
// If writes to FS backend should be O_SYNC.
|
// If writes to FS backend should be O_SYNC.
|
||||||
globalFSOSync bool
|
globalFSOSync bool
|
||||||
|
|
34
cmd/mrf.go
34
cmd/mrf.go
|
@ -19,7 +19,6 @@ package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"sync"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/minio/madmin-go/v3"
|
"github.com/minio/madmin-go/v3"
|
||||||
|
@ -44,37 +43,15 @@ type partialOperation struct {
|
||||||
// mrfState sncapsulates all the information
|
// mrfState sncapsulates all the information
|
||||||
// related to the global background MRF.
|
// related to the global background MRF.
|
||||||
type mrfState struct {
|
type mrfState struct {
|
||||||
ctx context.Context
|
|
||||||
pools *erasureServerPools
|
|
||||||
|
|
||||||
mu sync.RWMutex
|
|
||||||
opCh chan partialOperation
|
opCh chan partialOperation
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize healing MRF subsystem
|
|
||||||
func (m *mrfState) init(ctx context.Context, objAPI ObjectLayer) {
|
|
||||||
m.mu.Lock()
|
|
||||||
defer m.mu.Unlock()
|
|
||||||
|
|
||||||
m.ctx = ctx
|
|
||||||
m.opCh = make(chan partialOperation, mrfOpsQueueSize)
|
|
||||||
|
|
||||||
var ok bool
|
|
||||||
m.pools, ok = objAPI.(*erasureServerPools)
|
|
||||||
if ok {
|
|
||||||
go m.healRoutine()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add a partial S3 operation (put/delete) when one or more disks are offline.
|
// Add a partial S3 operation (put/delete) when one or more disks are offline.
|
||||||
func (m *mrfState) addPartialOp(op partialOperation) {
|
func (m *mrfState) addPartialOp(op partialOperation) {
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
m.mu.RLock()
|
|
||||||
defer m.mu.RUnlock()
|
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case m.opCh <- op:
|
case m.opCh <- op:
|
||||||
default:
|
default:
|
||||||
|
@ -86,10 +63,10 @@ var healSleeper = newDynamicSleeper(5, time.Second, false)
|
||||||
// healRoutine listens to new disks reconnection events and
|
// healRoutine listens to new disks reconnection events and
|
||||||
// issues healing requests for queued objects belonging to the
|
// issues healing requests for queued objects belonging to the
|
||||||
// corresponding erasure set
|
// corresponding erasure set
|
||||||
func (m *mrfState) healRoutine() {
|
func (m *mrfState) healRoutine(z *erasureServerPools) {
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-m.ctx.Done():
|
case <-GlobalContext.Done():
|
||||||
return
|
return
|
||||||
case u, ok := <-m.opCh:
|
case u, ok := <-m.opCh:
|
||||||
if !ok {
|
if !ok {
|
||||||
|
@ -115,7 +92,7 @@ func (m *mrfState) healRoutine() {
|
||||||
healBucket(u.bucket, scan)
|
healBucket(u.bucket, scan)
|
||||||
} else {
|
} else {
|
||||||
if u.allVersions {
|
if u.allVersions {
|
||||||
m.pools.serverPools[u.poolIndex].sets[u.setIndex].listAndHeal(u.bucket, u.object, u.scanMode, healObjectVersionsDisparity)
|
z.serverPools[u.poolIndex].sets[u.setIndex].listAndHeal(u.bucket, u.object, u.scanMode, healObjectVersionsDisparity)
|
||||||
} else {
|
} else {
|
||||||
healObject(u.bucket, u.object, u.versionID, scan)
|
healObject(u.bucket, u.object, u.versionID, scan)
|
||||||
}
|
}
|
||||||
|
@ -125,8 +102,3 @@ func (m *mrfState) healRoutine() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize healing MRF
|
|
||||||
func initHealMRF(ctx context.Context, obj ObjectLayer) {
|
|
||||||
globalMRFState.init(ctx, obj)
|
|
||||||
}
|
|
||||||
|
|
|
@ -30,7 +30,6 @@ import (
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -372,15 +371,7 @@ func serverHandleCmdArgs(ctxt serverCtxt) {
|
||||||
globalConnWriteDeadline = ctxt.ConnWriteDeadline
|
globalConnWriteDeadline = ctxt.ConnWriteDeadline
|
||||||
}
|
}
|
||||||
|
|
||||||
var globalHealStateLK sync.RWMutex
|
|
||||||
|
|
||||||
func initAllSubsystems(ctx context.Context) {
|
func initAllSubsystems(ctx context.Context) {
|
||||||
globalHealStateLK.Lock()
|
|
||||||
// New global heal state
|
|
||||||
globalAllHealState = newHealState(ctx, true)
|
|
||||||
globalBackgroundHealState = newHealState(ctx, false)
|
|
||||||
globalHealStateLK.Unlock()
|
|
||||||
|
|
||||||
// Initialize notification peer targets
|
// Initialize notification peer targets
|
||||||
globalNotificationSys = NewNotificationSys(globalEndpoints)
|
globalNotificationSys = NewNotificationSys(globalEndpoints)
|
||||||
|
|
||||||
|
@ -814,27 +805,6 @@ func serverMain(ctx *cli.Context) {
|
||||||
globalNodeNamesHex[hex.EncodeToString(nodeNameSum[:])] = struct{}{}
|
globalNodeNamesHex[hex.EncodeToString(nodeNameSum[:])] = struct{}{}
|
||||||
}
|
}
|
||||||
|
|
||||||
bootstrapTrace("newSharedLock", func() {
|
|
||||||
globalLeaderLock = newSharedLock(GlobalContext, newObject, "leader.lock")
|
|
||||||
})
|
|
||||||
|
|
||||||
// Enable background operations on
|
|
||||||
//
|
|
||||||
// - Disk auto healing
|
|
||||||
// - MRF (most recently failed) healing
|
|
||||||
// - Background expiration routine for lifecycle policies
|
|
||||||
bootstrapTrace("initAutoHeal", func() {
|
|
||||||
initAutoHeal(GlobalContext, newObject)
|
|
||||||
})
|
|
||||||
|
|
||||||
bootstrapTrace("initHealMRF", func() {
|
|
||||||
initHealMRF(GlobalContext, newObject)
|
|
||||||
})
|
|
||||||
|
|
||||||
bootstrapTrace("initBackgroundExpiry", func() {
|
|
||||||
initBackgroundExpiry(GlobalContext, newObject)
|
|
||||||
})
|
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
bootstrapTrace("initServerConfig", func() {
|
bootstrapTrace("initServerConfig", func() {
|
||||||
if err = initServerConfig(GlobalContext, newObject); err != nil {
|
if err = initServerConfig(GlobalContext, newObject); err != nil {
|
||||||
|
|
|
@ -40,18 +40,11 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
// Global service signal channel.
|
// Global service signal channel.
|
||||||
var globalServiceSignalCh chan serviceSignal
|
var globalServiceSignalCh = make(chan serviceSignal)
|
||||||
|
|
||||||
// GlobalContext context that is canceled when server is requested to shut down.
|
// GlobalContext context that is canceled when server is requested to shut down.
|
||||||
var GlobalContext context.Context
|
|
||||||
|
|
||||||
// cancelGlobalContext can be used to indicate server shutdown.
|
// cancelGlobalContext can be used to indicate server shutdown.
|
||||||
var cancelGlobalContext context.CancelFunc
|
var GlobalContext, cancelGlobalContext = context.WithCancel(context.Background())
|
||||||
|
|
||||||
func initGlobalContext() {
|
|
||||||
GlobalContext, cancelGlobalContext = context.WithCancel(context.Background())
|
|
||||||
globalServiceSignalCh = make(chan serviceSignal)
|
|
||||||
}
|
|
||||||
|
|
||||||
// restartProcess starts a new process passing it the active fd's. It
|
// restartProcess starts a new process passing it the active fd's. It
|
||||||
// doesn't fork, but starts a new process using the same environment and
|
// doesn't fork, but starts a new process using the same environment and
|
||||||
|
|
|
@ -54,7 +54,7 @@ var errDiskStale = errors.New("drive stale")
|
||||||
|
|
||||||
// To abstract a disk over network.
|
// To abstract a disk over network.
|
||||||
type storageRESTServer struct {
|
type storageRESTServer struct {
|
||||||
poolIndex, setIndex, diskIndex int
|
endpoint Endpoint
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -74,10 +74,14 @@ var (
|
||||||
storageListDirRPC = grid.NewStream[*grid.MSS, grid.NoPayload, *ListDirResult](grid.HandlerListDir, grid.NewMSS, nil, func() *ListDirResult { return &ListDirResult{} }).WithOutCapacity(1)
|
storageListDirRPC = grid.NewStream[*grid.MSS, grid.NoPayload, *ListDirResult](grid.HandlerListDir, grid.NewMSS, nil, func() *ListDirResult { return &ListDirResult{} }).WithOutCapacity(1)
|
||||||
)
|
)
|
||||||
|
|
||||||
func (s *storageRESTServer) getStorage() StorageAPI {
|
func getStorageViaEndpoint(endpoint Endpoint) StorageAPI {
|
||||||
globalLocalDrivesMu.RLock()
|
globalLocalDrivesMu.RLock()
|
||||||
defer globalLocalDrivesMu.RUnlock()
|
defer globalLocalDrivesMu.RUnlock()
|
||||||
return globalLocalSetDrives[s.poolIndex][s.setIndex][s.diskIndex]
|
return globalLocalSetDrives[endpoint.PoolIdx][endpoint.SetIdx][endpoint.DiskIdx]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *storageRESTServer) getStorage() StorageAPI {
|
||||||
|
return getStorageViaEndpoint(s.endpoint)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *storageRESTServer) writeErrorResponse(w http.ResponseWriter, err error) {
|
func (s *storageRESTServer) writeErrorResponse(w http.ResponseWriter, err error) {
|
||||||
|
@ -1287,9 +1291,7 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
|
||||||
}
|
}
|
||||||
|
|
||||||
server := &storageRESTServer{
|
server := &storageRESTServer{
|
||||||
poolIndex: endpoint.PoolIdx,
|
endpoint: endpoint,
|
||||||
setIndex: endpoint.SetIdx,
|
|
||||||
diskIndex: endpoint.DiskIdx,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
subrouter := router.PathPrefix(path.Join(storageRESTPrefix, endpoint.Path)).Subrouter()
|
subrouter := router.PathPrefix(path.Join(storageRESTPrefix, endpoint.Path)).Subrouter()
|
||||||
|
|
|
@ -581,7 +581,7 @@ func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache, updates
|
||||||
}
|
}
|
||||||
|
|
||||||
done := globalScannerMetrics.time(scannerMetricApplyAll)
|
done := globalScannerMetrics.time(scannerMetricApplyAll)
|
||||||
objInfos, err := item.applyVersionActions(ctx, objAPI, fivs.Versions)
|
objInfos, err := item.applyVersionActions(ctx, objAPI, fivs.Versions, globalExpiryState)
|
||||||
done()
|
done()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
Loading…
Reference in New Issue