2019-10-28 18:27:49 +01:00
|
|
|
/*
|
|
|
|
* MinIO Cloud Storage, (C) 2019 MinIO, Inc.
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package cmd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/minio/minio/cmd/logger"
|
|
|
|
"github.com/minio/minio/pkg/madmin"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
bgHealingUUID = "0000-0000-0000-0000"
|
|
|
|
leaderTick = time.Hour
|
|
|
|
healTick = time.Hour
|
|
|
|
healInterval = 30 * 24 * time.Hour
|
|
|
|
)
|
|
|
|
|
|
|
|
var leaderLockTimeout = newDynamicTimeout(time.Minute, time.Minute)
|
|
|
|
|
|
|
|
// NewBgHealSequence creates a background healing sequence
|
|
|
|
// operation which crawls all objects and heal them.
|
|
|
|
func newBgHealSequence(numDisks int) *healSequence {
|
|
|
|
|
|
|
|
reqInfo := &logger.ReqInfo{API: "BackgroundHeal"}
|
|
|
|
ctx := logger.SetReqInfo(context.Background(), reqInfo)
|
|
|
|
|
|
|
|
hs := madmin.HealOpts{
|
|
|
|
// Remove objects that do not have read-quorum
|
|
|
|
Remove: true,
|
|
|
|
ScanMode: madmin.HealNormalScan,
|
|
|
|
}
|
|
|
|
|
|
|
|
return &healSequence{
|
|
|
|
sourceCh: make(chan string),
|
|
|
|
startTime: UTCNow(),
|
|
|
|
clientToken: bgHealingUUID,
|
|
|
|
settings: hs,
|
|
|
|
currentStatus: healSequenceStatus{
|
|
|
|
Summary: healNotStartedStatus,
|
|
|
|
HealSettings: hs,
|
|
|
|
NumDisks: numDisks,
|
|
|
|
updateLock: &sync.RWMutex{},
|
|
|
|
},
|
|
|
|
traverseAndHealDoneCh: make(chan error),
|
|
|
|
stopSignalCh: make(chan struct{}),
|
|
|
|
ctx: ctx,
|
|
|
|
reportProgress: false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func getLocalBackgroundHealStatus() madmin.BgHealState {
|
|
|
|
bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
|
|
|
|
if !ok {
|
|
|
|
return madmin.BgHealState{}
|
|
|
|
}
|
|
|
|
|
|
|
|
return madmin.BgHealState{
|
|
|
|
ScannedItemsCount: bgSeq.scannedItemsCount,
|
|
|
|
LastHealActivity: bgSeq.lastHealActivity,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// healErasureSet lists and heals all objects in a specific erasure set
|
|
|
|
func healErasureSet(ctx context.Context, setIndex int, xlObj *xlObjects) error {
|
|
|
|
buckets, err := xlObj.ListBuckets(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get background heal sequence to send elements to heal
|
|
|
|
var bgSeq *healSequence
|
|
|
|
var ok bool
|
|
|
|
for {
|
|
|
|
bgSeq, ok = globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
|
|
|
|
if ok {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
time.Sleep(time.Second)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Heal all buckets with all objects
|
|
|
|
for _, bucket := range buckets {
|
|
|
|
// Heal current bucket
|
|
|
|
bgSeq.sourceCh <- bucket.Name
|
|
|
|
|
|
|
|
// List all objects in the current bucket and heal them
|
|
|
|
listDir := listDirFactory(ctx, xlObj.getLoadBalancedDisks()...)
|
|
|
|
walkResultCh := startTreeWalk(ctx, bucket.Name, "", "", true, listDir, nil)
|
|
|
|
for walkEntry := range walkResultCh {
|
|
|
|
bgSeq.sourceCh <- pathJoin(bucket.Name, walkEntry.entry)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Healing leader will take the charge of healing all erasure sets
|
2019-11-19 17:42:27 -08:00
|
|
|
func execLeaderTasks(z *xlZones) {
|
2019-10-28 18:27:49 +01:00
|
|
|
ctx := context.Background()
|
|
|
|
|
|
|
|
// Hold a lock so only one server performs auto-healing
|
2019-11-19 17:42:27 -08:00
|
|
|
leaderLock := z.NewNSLock(ctx, minioMetaBucket, "leader")
|
2019-10-28 18:27:49 +01:00
|
|
|
for {
|
|
|
|
err := leaderLock.GetLock(leaderLockTimeout)
|
|
|
|
if err == nil {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
time.Sleep(leaderTick)
|
|
|
|
}
|
|
|
|
|
2019-11-19 17:42:27 -08:00
|
|
|
// Hold a lock for healing the erasure set
|
|
|
|
zeroDuration := time.Millisecond
|
|
|
|
zeroDynamicTimeout := newDynamicTimeout(zeroDuration, zeroDuration)
|
|
|
|
|
2019-10-28 18:27:49 +01:00
|
|
|
lastScanTime := time.Now() // So that we don't heal immediately, but after one month.
|
|
|
|
for {
|
|
|
|
if time.Since(lastScanTime) < healInterval {
|
|
|
|
time.Sleep(healTick)
|
|
|
|
continue
|
|
|
|
}
|
2019-11-19 17:42:27 -08:00
|
|
|
for _, zone := range z.zones {
|
|
|
|
// Heal set by set
|
|
|
|
for i, set := range zone.sets {
|
|
|
|
setLock := z.zones[0].NewNSLock(ctx, "system", fmt.Sprintf("erasure-set-heal-%d", i))
|
|
|
|
if err := setLock.GetLock(zeroDynamicTimeout); err != nil {
|
|
|
|
logger.LogIf(ctx, err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if err := healErasureSet(ctx, i, set); err != nil {
|
|
|
|
setLock.Unlock()
|
|
|
|
logger.LogIf(ctx, err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
setLock.Unlock()
|
2019-10-28 18:27:49 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
lastScanTime = time.Now()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func startGlobalHeal() {
|
|
|
|
var objAPI ObjectLayer
|
|
|
|
for {
|
2019-11-09 09:27:23 -08:00
|
|
|
objAPI = newObjectLayerWithoutSafeModeFn()
|
2019-10-28 18:27:49 +01:00
|
|
|
if objAPI == nil {
|
|
|
|
time.Sleep(time.Second)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2019-11-19 17:42:27 -08:00
|
|
|
zones, ok := objAPI.(*xlZones)
|
2019-10-28 18:27:49 +01:00
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2019-11-19 17:42:27 -08:00
|
|
|
execLeaderTasks(zones)
|
2019-10-28 18:27:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func initGlobalHeal() {
|
|
|
|
go startGlobalHeal()
|
|
|
|
}
|