fix: do not listAndHeal() inline with PutObject() (#17499)

there is a possibility that slow drives can actually add latency
to the overall call, leading to a large spike in latency.

this can happen if there are other parallel listObjects()
calls to the same drive, in-turn causing each other to sort
of serialize.

this potentially improves performance and makes PutObject()
also non-blocking.
This commit is contained in:
Harshavardhana
2023-06-24 19:31:04 -07:00
committed by GitHub
parent fcbed41cc3
commit 1f8b9b4bd5
5 changed files with 98 additions and 74 deletions

View File

@@ -32,17 +32,19 @@ const (
// partialOperation is a successful upload/delete of an object
// but not written in all disks (having quorum)
type partialOperation struct {
bucket string
object string
versionID string
queued time.Time
bucket string
object string
versionID string
allVersions bool
setIndex, poolIndex int
queued time.Time
}
// mrfState sncapsulates all the information
// related to the global background MRF.
type mrfState struct {
ctx context.Context
objectAPI ObjectLayer
ctx context.Context
pools *erasureServerPools
mu sync.Mutex
opCh chan partialOperation
@@ -55,9 +57,12 @@ func (m *mrfState) init(ctx context.Context, objAPI ObjectLayer) {
m.ctx = ctx
m.opCh = make(chan partialOperation, mrfOpsQueueSize)
m.objectAPI = objAPI
go globalMRFState.healRoutine()
var ok bool
m.pools, ok = objAPI.(*erasureServerPools)
if ok {
go m.healRoutine()
}
}
// Add a partial S3 operation (put/delete) when one or more disks are offline.
@@ -101,7 +106,11 @@ func (m *mrfState) healRoutine() {
if u.object == "" {
healBucket(u.bucket, madmin.HealNormalScan)
} else {
healObject(u.bucket, u.object, u.versionID, madmin.HealNormalScan)
if u.allVersions {
m.pools.serverPools[u.poolIndex].sets[u.setIndex].listAndHeal(u.bucket, u.object, healObjectVersionsDisparity)
} else {
healObject(u.bucket, u.object, u.versionID, madmin.HealNormalScan)
}
}
wait()