fix: simplify background heal and trigger heal items early (#9928)

Bonus fix during versioning merge one of the PR was missing
the offline/online disk count fix from #9801 port it correctly
over to the master branch from release.

Additionally, add versionID support for MRF

Fixes #9910
Fixes #9931
This commit is contained in:
Harshavardhana 2020-06-29 13:07:26 -07:00 committed by GitHub
parent 7cea3f7da4
commit a38ce29137
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 156 additions and 137 deletions

View File

@ -33,6 +33,7 @@ function start_minio_3_node() {
declare -a ARGS
export MINIO_ACCESS_KEY=minio
export MINIO_SECRET_KEY=minio123
export MINIO_ERASURE_SET_DRIVE_COUNT=6
start_port=$(shuf -i 10000-65000 -n 1)
for i in $(seq 1 3); do

View File

@ -173,7 +173,7 @@ func (a adminAPIHandlers) GetConfigKVHandler(w http.ResponseWriter, r *http.Requ
}
cfg := globalServerConfig
if globalSafeMode {
if newObjectLayerFn() == nil {
var err error
cfg, err = getValidConfig(objectAPI)
if err != nil {

View File

@ -718,10 +718,6 @@ func (a adminAPIHandlers) HealHandler(w http.ResponseWriter, r *http.Request) {
}
}
// find number of disks in the setup, ignore any errors here.
info, _ := objectAPI.StorageInfo(ctx, false)
numDisks := info.Backend.OfflineDisks.Sum() + info.Backend.OnlineDisks.Sum()
healPath := pathJoin(hip.bucket, hip.objPrefix)
if hip.clientToken == "" && !hip.forceStart && !hip.forceStop {
nh, exists := globalAllHealState.getHealSequence(healPath)
@ -764,7 +760,7 @@ func (a adminAPIHandlers) HealHandler(w http.ResponseWriter, r *http.Request) {
respCh <- hr
}()
case hip.clientToken == "":
nh := newHealSequence(GlobalContext, hip.bucket, hip.objPrefix, handlers.GetSourceIP(r), numDisks, hip.hs, hip.forceStart)
nh := newHealSequence(GlobalContext, hip.bucket, hip.objPrefix, handlers.GetSourceIP(r), hip.hs, hip.forceStart)
go func() {
respBytes, apiErr, errMsg := globalAllHealState.LaunchNewHealSequence(nh)
hr := healResp{respBytes, apiErr, errMsg}
@ -1409,10 +1405,8 @@ func (a adminAPIHandlers) ServerInfoHandler(w http.ResponseWriter, r *http.Reque
}
}
mode := ""
if globalSafeMode {
mode = "safe"
} else {
mode := "safe"
if newObjectLayerFn() != nil {
mode = "online"
}

View File

@ -76,9 +76,6 @@ type healSequenceStatus struct {
FailureDetail string `json:"Detail,omitempty"`
StartTime time.Time `json:"StartTime"`
// disk information
NumDisks int `json:"NumDisks"`
// settings for the heal sequence
HealSettings madmin.HealOpts `json:"Settings"`
@ -94,8 +91,8 @@ type allHealState struct {
healSeqMap map[string]*healSequence
}
// initHealState - initialize healing apparatus
func initHealState() *allHealState {
// newHealState - initialize global heal state management
func newHealState() *allHealState {
healState := &allHealState{
healSeqMap: make(map[string]*healSequence),
}
@ -367,7 +364,7 @@ type healSequence struct {
// NewHealSequence - creates healSettings, assumes bucket and
// objPrefix are already validated.
func newHealSequence(ctx context.Context, bucket, objPrefix, clientAddr string,
numDisks int, hs madmin.HealOpts, forceStart bool) *healSequence {
hs madmin.HealOpts, forceStart bool) *healSequence {
reqInfo := &logger.ReqInfo{RemoteHost: clientAddr, API: "Heal", BucketName: bucket}
reqInfo.AppendTags("prefix", objPrefix)
@ -386,7 +383,6 @@ func newHealSequence(ctx context.Context, bucket, objPrefix, clientAddr string,
currentStatus: healSequenceStatus{
Summary: healNotStartedStatus,
HealSettings: hs,
NumDisks: numDisks,
},
traverseAndHealDoneCh: make(chan error),
cancelCtx: cancel,
@ -677,11 +673,6 @@ func (h *healSequence) queueHealTask(source healSource, healType madmin.HealItem
}
func (h *healSequence) healItemsFromSourceCh() error {
bucketsOnly := true // heal buckets only, not objects.
if err := h.healItems(bucketsOnly); err != nil {
logger.LogIf(h.ctx, err)
}
for {
select {
case source, ok := <-h.sourceCh:
@ -716,7 +707,7 @@ func (h *healSequence) healFromSourceCh() {
h.healItemsFromSourceCh()
}
func (h *healSequence) healItems(bucketsOnly bool) error {
func (h *healSequence) healDiskMeta() error {
// Start with format healing
if err := h.healDiskFormat(); err != nil {
return err
@ -728,7 +719,11 @@ func (h *healSequence) healItems(bucketsOnly bool) error {
}
// Start healing the bucket config prefix.
if err := h.healMinioSysMeta(bucketConfigPrefix)(); err != nil {
return h.healMinioSysMeta(bucketConfigPrefix)()
}
func (h *healSequence) healItems(bucketsOnly bool) error {
if err := h.healDiskMeta(); err != nil {
return err
}

View File

@ -104,7 +104,7 @@ func (h *healRoutine) run(ctx context.Context, objAPI ObjectLayer) {
}
}
func initHealRoutine() *healRoutine {
func newHealRoutine() *healRoutine {
return &healRoutine{
tasks: make(chan healTask),
doneCh: make(chan struct{}),
@ -112,22 +112,22 @@ func initHealRoutine() *healRoutine {
}
func startBackgroundHealing(ctx context.Context, objAPI ObjectLayer) {
func initBackgroundHealing(ctx context.Context, objAPI ObjectLayer) {
// Run the background healer
globalBackgroundHealRoutine = initHealRoutine()
globalBackgroundHealRoutine = newHealRoutine()
go globalBackgroundHealRoutine.run(ctx, objAPI)
// Launch the background healer sequence to track
// background healing operations, ignore errors
// errors are handled into offline disks already.
info, _ := objAPI.StorageInfo(ctx, false)
numDisks := info.Backend.OnlineDisks.Sum() + info.Backend.OfflineDisks.Sum()
nh := newBgHealSequence(numDisks)
globalBackgroundHealState.LaunchNewHealSequence(nh)
nh := newBgHealSequence()
// Heal any disk format and metadata early, if possible.
if err := nh.healDiskMeta(); err != nil {
if newObjectLayerFn() != nil {
// log only in situations, when object layer
// has fully initialized.
logger.LogIf(nh.ctx, err)
}
}
func initBackgroundHealing(ctx context.Context, objAPI ObjectLayer) {
go startBackgroundHealing(ctx, objAPI)
globalBackgroundHealState.LaunchNewHealSequence(nh)
}
// healDiskFormat - heals format.json, return value indicates if a
@ -144,12 +144,20 @@ func healDiskFormat(ctx context.Context, objAPI ObjectLayer, opts madmin.HealOpt
// Healing succeeded notify the peers to reload format and re-initialize disks.
// We will not notify peers if healing is not required.
if err == nil {
// Notify servers in background and retry if needed.
go func() {
retry:
for _, nerr := range globalNotificationSys.ReloadFormat(opts.DryRun) {
if nerr.Err != nil {
if nerr.Err.Error() == errServerNotInitialized.Error() {
time.Sleep(time.Second)
goto retry
}
logger.GetReqInfo(ctx).SetTags("peerAddress", nerr.Host.String())
logger.LogIf(ctx, nerr.Err)
}
}
}()
}
return res, nil

View File

@ -149,7 +149,7 @@ func readServerConfig(ctx context.Context, objAPI ObjectLayer) (config.Config, e
if err != nil {
// Config not found for some reason, allow things to continue
// by initializing a new fresh config in safe mode.
if err == errConfigNotFound && globalSafeMode {
if err == errConfigNotFound && newObjectLayerFn() == nil {
return newServerConfig(), nil
}
return nil, err

View File

@ -668,9 +668,9 @@ func (er erasureObjects) CompleteMultipartUpload(ctx context.Context, bucket str
}
// Check if there is any offline disk and add it to the MRF list
for i := 0; i < len(onlineDisks); i++ {
if onlineDisks[i] == nil || storageDisks[i] == nil {
er.addPartialUpload(bucket, object)
for i, disk := range onlineDisks {
if disk == nil || storageDisks[i] == nil {
er.addPartial(bucket, object, fi.VersionID)
break
}
}

View File

@ -714,7 +714,7 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
// during this upload, send it to the MRF list.
for i := 0; i < len(onlineDisks); i++ {
if onlineDisks[i] == nil || storageDisks[i] == nil {
er.addPartialUpload(bucket, object)
er.addPartial(bucket, object, fi.VersionID)
break
}
}
@ -897,6 +897,23 @@ func (er erasureObjects) DeleteObjects(ctx context.Context, bucket string, objec
}
}
// Check failed deletes across multiple objects
for _, version := range versions {
// Check if there is any offline disk and add it to the MRF list
for _, disk := range storageDisks {
if disk == nil {
// ignore delete markers for quorum
if version.Deleted {
continue
}
// all other direct versionId references we should
// ensure no dangling file is left over.
er.addPartial(bucket, version.Name, version.VersionID)
break
}
}
}
return dobjects, errs
}
@ -935,14 +952,21 @@ func (er erasureObjects) DeleteObject(ctx context.Context, bucket, object string
return objInfo, toObjectErr(err, bucket, object)
}
for _, disk := range storageDisks {
if disk == nil {
er.addPartial(bucket, object, opts.VersionID)
break
}
}
return ObjectInfo{Bucket: bucket, Name: object, VersionID: opts.VersionID}, nil
}
// Send the successful but partial upload, however ignore
// Send the successful but partial upload/delete, however ignore
// if the channel is blocked by other items.
func (er erasureObjects) addPartialUpload(bucket, key string) {
func (er erasureObjects) addPartial(bucket, object, versionID string) {
select {
case er.mrfUploadCh <- partialUpload{bucket: bucket, object: key}:
case er.mrfOpCh <- partialOperation{bucket: bucket, object: object, versionID: versionID}:
default:
}
}

View File

@ -93,7 +93,7 @@ type erasureSets struct {
poolVersions *MergeWalkVersionsPool
mrfMU sync.Mutex
mrfUploads map[healSource]int
mrfOperations map[healSource]int
}
func isEndpointConnected(diskMap map[string]StorageAPI, endpoint string) bool {
@ -307,7 +307,7 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
pool: NewMergeWalkPool(globalMergeLookupTimeout),
poolSplunk: NewMergeWalkPool(globalMergeLookupTimeout),
poolVersions: NewMergeWalkVersionsPool(globalMergeLookupTimeout),
mrfUploads: make(map[healSource]int),
mrfOperations: make(map[healSource]int),
}
mutex := newNSLock(globalIsDistErasure)
@ -351,7 +351,7 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
getEndpoints: s.GetEndpoints(i),
nsMutex: mutex,
bp: bp,
mrfUploadCh: make(chan partialUpload, 10000),
mrfOpCh: make(chan partialOperation, 10000),
}
}
@ -1608,9 +1608,9 @@ func (s *erasureSets) IsReady(_ context.Context) bool {
// from all underlying er.sets and puts them in a global map which
// should not have more than 10000 entries.
func (s *erasureSets) maintainMRFList() {
var agg = make(chan partialUpload, 10000)
var agg = make(chan partialOperation, 10000)
for i, er := range s.sets {
go func(c <-chan partialUpload, setIndex int) {
go func(c <-chan partialOperation, setIndex int) {
for msg := range c {
msg.failedSet = setIndex
select {
@ -1618,19 +1618,20 @@ func (s *erasureSets) maintainMRFList() {
default:
}
}
}(er.mrfUploadCh, i)
}(er.mrfOpCh, i)
}
for fUpload := range agg {
for fOp := range agg {
s.mrfMU.Lock()
if len(s.mrfUploads) > 10000 {
if len(s.mrfOperations) > 10000 {
s.mrfMU.Unlock()
continue
}
s.mrfUploads[healSource{
bucket: fUpload.bucket,
object: fUpload.object,
}] = fUpload.failedSet
s.mrfOperations[healSource{
bucket: fOp.bucket,
object: fOp.object,
versionID: fOp.versionID,
}] = fOp.failedSet
s.mrfMU.Unlock()
}
}
@ -1656,17 +1657,17 @@ func (s *erasureSets) healMRFRoutine() {
for e := range s.disksConnectEvent {
// Get the list of objects related the er.set
// to which the connected disk belongs.
var mrfUploads []healSource
var mrfOperations []healSource
s.mrfMU.Lock()
for k, v := range s.mrfUploads {
for k, v := range s.mrfOperations {
if v == e.setIndex {
mrfUploads = append(mrfUploads, k)
mrfOperations = append(mrfOperations, k)
}
}
s.mrfMU.Unlock()
// Heal objects
for _, u := range mrfUploads {
for _, u := range mrfOperations {
// Send an object to be healed with a timeout
select {
case bgSeq.sourceCh <- u:
@ -1674,7 +1675,7 @@ func (s *erasureSets) healMRFRoutine() {
}
s.mrfMU.Lock()
delete(s.mrfUploads, u)
delete(s.mrfOperations, u)
s.mrfMU.Unlock()
}
}

View File

@ -2018,18 +2018,27 @@ func (z *erasureZones) PutObjectTags(ctx context.Context, bucket, object string,
if z.SingleZone() {
return z.zones[0].PutObjectTags(ctx, bucket, object, tags, opts)
}
for _, zone := range z.zones {
err := zone.PutObjectTags(ctx, bucket, object, tags, opts)
if err != nil {
if isErrBucketNotFound(err) {
if isErrObjectNotFound(err) {
continue
}
return err
}
return nil
}
return BucketNotFound{
if opts.VersionID != "" {
return VersionNotFound{
Bucket: bucket,
Object: object,
VersionID: opts.VersionID,
}
}
return ObjectNotFound{
Bucket: bucket,
Object: object,
}
}
@ -2041,15 +2050,23 @@ func (z *erasureZones) DeleteObjectTags(ctx context.Context, bucket, object stri
for _, zone := range z.zones {
err := zone.DeleteObjectTags(ctx, bucket, object, opts)
if err != nil {
if isErrBucketNotFound(err) {
if isErrObjectNotFound(err) {
continue
}
return err
}
return nil
}
return BucketNotFound{
if opts.VersionID != "" {
return VersionNotFound{
Bucket: bucket,
Object: object,
VersionID: opts.VersionID,
}
}
return ObjectNotFound{
Bucket: bucket,
Object: object,
}
}
@ -2061,14 +2078,22 @@ func (z *erasureZones) GetObjectTags(ctx context.Context, bucket, object string,
for _, zone := range z.zones {
tags, err := zone.GetObjectTags(ctx, bucket, object, opts)
if err != nil {
if isErrBucketNotFound(err) {
if isErrObjectNotFound(err) {
continue
}
return tags, err
}
return tags, nil
}
return nil, BucketNotFound{
if opts.VersionID != "" {
return nil, VersionNotFound{
Bucket: bucket,
Object: object,
VersionID: opts.VersionID,
}
}
return nil, ObjectNotFound{
Bucket: bucket,
Object: object,
}
}

View File

@ -34,11 +34,12 @@ import (
// OfflineDisk represents an unavailable disk.
var OfflineDisk StorageAPI // zero value is nil
// partialUpload is a successful upload of an object
// partialOperation is a successful upload/delete of an object
// but not written in all disks (having quorum)
type partialUpload struct {
type partialOperation struct {
bucket string
object string
versionID string
failedSet int
}
@ -62,7 +63,7 @@ type erasureObjects struct {
// Byte pools used for temporary i/o buffers.
bp *bpool.BytePoolCap
mrfUploadCh chan partialUpload
mrfOpCh chan partialOperation
}
// NewNSLock - initialize a new namespace RWLocker instance.
@ -87,21 +88,17 @@ func (d byDiskTotal) Less(i, j int) bool {
}
// getDisksInfo - fetch disks info across all other storage API.
func getDisksInfo(disks []StorageAPI, local bool) (disksInfo []DiskInfo, errs []error, onlineDisks, offlineDisks madmin.BackendDisks) {
func getDisksInfo(disks []StorageAPI, endpoints []string) (disksInfo []DiskInfo, errs []error, onlineDisks, offlineDisks madmin.BackendDisks) {
disksInfo = make([]DiskInfo, len(disks))
onlineDisks = make(madmin.BackendDisks)
offlineDisks = make(madmin.BackendDisks)
for _, disk := range disks {
if disk == OfflineDisk {
continue
for _, ep := range endpoints {
if _, ok := offlineDisks[ep]; !ok {
offlineDisks[ep] = 0
}
peerAddr := disk.Hostname()
if _, ok := offlineDisks[peerAddr]; !ok {
offlineDisks[peerAddr] = 0
}
if _, ok := onlineDisks[peerAddr]; !ok {
onlineDisks[peerAddr] = 0
if _, ok := onlineDisks[ep]; !ok {
onlineDisks[ep] = 0
}
}
@ -130,36 +127,12 @@ func getDisksInfo(disks []StorageAPI, local bool) (disksInfo []DiskInfo, errs []
errs = g.Wait()
// Wait for the routines.
for i, diskInfoErr := range errs {
if disks[i] == OfflineDisk {
continue
}
ep := endpoints[i]
if diskInfoErr != nil {
offlineDisks[disks[i].Hostname()]++
offlineDisks[ep]++
continue
}
onlineDisks[disks[i].Hostname()]++
}
// Iterate over the passed endpoints arguments and check
// if there are still disks missing from the offline/online lists
// and update them accordingly.
missingOfflineDisks := make(map[string]int)
for _, zone := range globalEndpoints {
for _, endpoint := range zone.Endpoints {
// if local is set and endpoint is not local
// we are not interested in remote disks.
if local && !endpoint.IsLocal {
continue
}
if _, ok := offlineDisks[endpoint.Host]; !ok {
missingOfflineDisks[endpoint.Host]++
}
}
}
for missingDisk, n := range missingOfflineDisks {
onlineDisks[missingDisk] = 0
offlineDisks[missingDisk] = n
onlineDisks[ep]++
}
// Success.
@ -167,8 +140,8 @@ func getDisksInfo(disks []StorageAPI, local bool) (disksInfo []DiskInfo, errs []
}
// Get an aggregated storage info across all disks.
func getStorageInfo(disks []StorageAPI, local bool) (StorageInfo, []error) {
disksInfo, errs, onlineDisks, offlineDisks := getDisksInfo(disks, local)
func getStorageInfo(disks []StorageAPI, endpoints []string) (StorageInfo, []error) {
disksInfo, errs, onlineDisks, offlineDisks := getDisksInfo(disks, endpoints)
// Sort so that the first element is the smallest.
sort.Sort(byDiskTotal(disksInfo))
@ -203,19 +176,23 @@ func getStorageInfo(disks []StorageAPI, local bool) (StorageInfo, []error) {
// StorageInfo - returns underlying storage statistics.
func (er erasureObjects) StorageInfo(ctx context.Context, local bool) (StorageInfo, []error) {
disks := er.getDisks()
endpoints := er.getEndpoints()
if local {
var localDisks []StorageAPI
for _, disk := range disks {
var localEndpoints []string
for i, disk := range disks {
if disk != nil {
if disk.IsLocal() {
// Append this local disk since local flag is true
localDisks = append(localDisks, disk)
localEndpoints = append(localEndpoints, endpoints[i])
}
}
}
disks = localDisks
endpoints = localEndpoints
}
return getStorageInfo(disks, local)
return getStorageInfo(disks, endpoints)
}
// GetMetrics - is not implemented and shouldn't be called.

View File

@ -1552,9 +1552,5 @@ func (fs *FSObjects) IsReady(_ context.Context) bool {
return false
}
globalObjLayerMutex.RLock()
res := globalObjectAPI != nil && !globalSafeMode
globalObjLayerMutex.RUnlock()
return res
return newObjectLayerFn() != nil
}

View File

@ -34,7 +34,7 @@ var leaderLockTimeout = newDynamicTimeout(time.Minute, time.Minute)
// NewBgHealSequence creates a background healing sequence
// operation which crawls all objects and heal them.
func newBgHealSequence(numDisks int) *healSequence {
func newBgHealSequence() *healSequence {
reqInfo := &logger.ReqInfo{API: "BackgroundHeal"}
ctx, cancelCtx := context.WithCancel(logger.SetReqInfo(GlobalContext, reqInfo))
@ -54,7 +54,6 @@ func newBgHealSequence(numDisks int) *healSequence {
currentStatus: healSequenceStatus{
Summary: healNotStartedStatus,
HealSettings: hs,
NumDisks: numDisks,
},
cancelCtx: cancelCtx,
ctx: ctx,

View File

@ -207,6 +207,12 @@ func initSafeMode(ctx context.Context, newObject ObjectLayer) (err error) {
}
}(txnLk)
// Enable healing to heal drives if possible
if globalIsErasure {
initBackgroundHealing(ctx, newObject)
initLocalDisksAutoHeal(ctx, newObject)
}
// **** WARNING ****
// Migrating to encrypted backend should happen before initialization of any
// sub-systems, make sure that we do not move the above codeblock elsewhere.
@ -422,9 +428,9 @@ func serverMain(ctx *cli.Context) {
setMaxResources()
if globalIsErasure {
// Init global heal state
globalAllHealState = initHealState()
globalBackgroundHealState = initHealState()
// New global heal state
globalAllHealState = newHealState()
globalBackgroundHealState = newHealState()
}
// Configure server.
@ -504,12 +510,6 @@ func serverMain(ctx *cli.Context) {
newAllSubsystems()
// Enable healing to heal drives if possible
if globalIsErasure {
initBackgroundHealing(GlobalContext, newObject)
initLocalDisksAutoHeal(GlobalContext, newObject)
}
go startBackgroundOps(GlobalContext, newObject)
logger.FatalIf(initSafeMode(GlobalContext, newObject), "Unable to initialize server switching into safe-mode")

View File

@ -407,7 +407,7 @@ func resetGlobalIsErasure() {
func resetGlobalHealState() {
// Init global heal state
if globalAllHealState == nil {
globalAllHealState = initHealState()
globalAllHealState = newHealState()
} else {
globalAllHealState.Lock()
for _, v := range globalAllHealState.healSeqMap {
@ -420,7 +420,7 @@ func resetGlobalHealState() {
// Init background heal state
if globalBackgroundHealState == nil {
globalBackgroundHealState = initHealState()
globalBackgroundHealState = newHealState()
} else {
globalBackgroundHealState.Lock()
for _, v := range globalBackgroundHealState.healSeqMap {

View File

@ -77,7 +77,6 @@ type HealTaskStatus struct {
FailureDetail string `json:"detail"`
StartTime time.Time `json:"startTime"`
HealSettings HealOpts `json:"settings"`
NumDisks int `json:"numDisks"`
Items []HealResultItem `json:"items,omitempty"`
}