mirror of
https://github.com/minio/minio.git
synced 2025-11-07 21:02:58 -05:00
fix: simplify background heal and trigger heal items early (#9928)
Bonus fix during versioning merge one of the PR was missing the offline/online disk count fix from #9801 port it correctly over to the master branch from release. Additionally, add versionID support for MRF Fixes #9910 Fixes #9931
This commit is contained in:
@@ -92,8 +92,8 @@ type erasureSets struct {
|
||||
poolSplunk *MergeWalkPool
|
||||
poolVersions *MergeWalkVersionsPool
|
||||
|
||||
mrfMU sync.Mutex
|
||||
mrfUploads map[healSource]int
|
||||
mrfMU sync.Mutex
|
||||
mrfOperations map[healSource]int
|
||||
}
|
||||
|
||||
func isEndpointConnected(diskMap map[string]StorageAPI, endpoint string) bool {
|
||||
@@ -307,7 +307,7 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
|
||||
pool: NewMergeWalkPool(globalMergeLookupTimeout),
|
||||
poolSplunk: NewMergeWalkPool(globalMergeLookupTimeout),
|
||||
poolVersions: NewMergeWalkVersionsPool(globalMergeLookupTimeout),
|
||||
mrfUploads: make(map[healSource]int),
|
||||
mrfOperations: make(map[healSource]int),
|
||||
}
|
||||
|
||||
mutex := newNSLock(globalIsDistErasure)
|
||||
@@ -351,7 +351,7 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
|
||||
getEndpoints: s.GetEndpoints(i),
|
||||
nsMutex: mutex,
|
||||
bp: bp,
|
||||
mrfUploadCh: make(chan partialUpload, 10000),
|
||||
mrfOpCh: make(chan partialOperation, 10000),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1608,9 +1608,9 @@ func (s *erasureSets) IsReady(_ context.Context) bool {
|
||||
// from all underlying er.sets and puts them in a global map which
|
||||
// should not have more than 10000 entries.
|
||||
func (s *erasureSets) maintainMRFList() {
|
||||
var agg = make(chan partialUpload, 10000)
|
||||
var agg = make(chan partialOperation, 10000)
|
||||
for i, er := range s.sets {
|
||||
go func(c <-chan partialUpload, setIndex int) {
|
||||
go func(c <-chan partialOperation, setIndex int) {
|
||||
for msg := range c {
|
||||
msg.failedSet = setIndex
|
||||
select {
|
||||
@@ -1618,19 +1618,20 @@ func (s *erasureSets) maintainMRFList() {
|
||||
default:
|
||||
}
|
||||
}
|
||||
}(er.mrfUploadCh, i)
|
||||
}(er.mrfOpCh, i)
|
||||
}
|
||||
|
||||
for fUpload := range agg {
|
||||
for fOp := range agg {
|
||||
s.mrfMU.Lock()
|
||||
if len(s.mrfUploads) > 10000 {
|
||||
if len(s.mrfOperations) > 10000 {
|
||||
s.mrfMU.Unlock()
|
||||
continue
|
||||
}
|
||||
s.mrfUploads[healSource{
|
||||
bucket: fUpload.bucket,
|
||||
object: fUpload.object,
|
||||
}] = fUpload.failedSet
|
||||
s.mrfOperations[healSource{
|
||||
bucket: fOp.bucket,
|
||||
object: fOp.object,
|
||||
versionID: fOp.versionID,
|
||||
}] = fOp.failedSet
|
||||
s.mrfMU.Unlock()
|
||||
}
|
||||
}
|
||||
@@ -1656,17 +1657,17 @@ func (s *erasureSets) healMRFRoutine() {
|
||||
for e := range s.disksConnectEvent {
|
||||
// Get the list of objects related the er.set
|
||||
// to which the connected disk belongs.
|
||||
var mrfUploads []healSource
|
||||
var mrfOperations []healSource
|
||||
s.mrfMU.Lock()
|
||||
for k, v := range s.mrfUploads {
|
||||
for k, v := range s.mrfOperations {
|
||||
if v == e.setIndex {
|
||||
mrfUploads = append(mrfUploads, k)
|
||||
mrfOperations = append(mrfOperations, k)
|
||||
}
|
||||
}
|
||||
s.mrfMU.Unlock()
|
||||
|
||||
// Heal objects
|
||||
for _, u := range mrfUploads {
|
||||
for _, u := range mrfOperations {
|
||||
// Send an object to be healed with a timeout
|
||||
select {
|
||||
case bgSeq.sourceCh <- u:
|
||||
@@ -1674,7 +1675,7 @@ func (s *erasureSets) healMRFRoutine() {
|
||||
}
|
||||
|
||||
s.mrfMU.Lock()
|
||||
delete(s.mrfUploads, u)
|
||||
delete(s.mrfOperations, u)
|
||||
s.mrfMU.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user