replication: perform bucket resync in parallel (#16707)

Default number of parallel resync operations for a bucket to 10
to speed up resync.
This commit is contained in:
Poorna Krishnamoorthy 2023-02-24 12:07:34 -08:00 committed by Harshavardhana
parent c9e87f0548
commit f986b0c493

View File

@ -49,6 +49,7 @@ import (
"github.com/minio/minio/internal/hash"
xhttp "github.com/minio/minio/internal/http"
"github.com/minio/minio/internal/logger"
"github.com/minio/pkg/workers"
"github.com/zeebo/xxh3"
)
@ -2307,7 +2308,10 @@ func (s *replicationResyncer) PersistToDisk(ctx context.Context, objectAPI Objec
}
}
const resyncWorkerCnt = 50 // limit of number of bucket resyncs is progress at any given time
const (
resyncWorkerCnt = 10 // limit of number of bucket resyncs is progress at any given time
resyncParallelRoutines = 10 // number of parallel resync ops per bucket
)
func newresyncer() *replicationResyncer {
rs := replicationResyncer{
@ -2322,6 +2326,36 @@ func newresyncer() *replicationResyncer {
return &rs
}
// mark status of replication resync on remote target for the bucket
func (s *replicationResyncer) markStatus(status ResyncStatusType, opts resyncOpts) {
s.Lock()
defer s.Unlock()
m := s.statusMap[opts.bucket]
st := m.TargetsMap[opts.arn]
st.LastUpdate = UTCNow()
st.ResyncStatus = status
m.TargetsMap[opts.arn] = st
m.LastUpdate = UTCNow()
s.statusMap[opts.bucket] = m
}
// update replication resync stats for bucket's remote target
func (s *replicationResyncer) incStats(ts TargetReplicationResyncStatus, opts resyncOpts) {
s.Lock()
defer s.Unlock()
m := s.statusMap[opts.bucket]
st := m.TargetsMap[opts.arn]
st.Object = ts.Object
st.ReplicatedCount += ts.ReplicatedCount
st.FailedCount += ts.FailedCount
st.ReplicatedSize += ts.ReplicatedSize
st.FailedSize += ts.FailedSize
m.TargetsMap[opts.arn] = st
m.LastUpdate = UTCNow()
s.statusMap[opts.bucket] = m
}
// resyncBucket resyncs all qualifying objects as per replication rules for the target
// ARN
func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI ObjectLayer, heal bool, opts resyncOpts) {
@ -2333,15 +2367,7 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
resyncStatus := ResyncFailed
defer func() {
s.Lock()
m := s.statusMap[opts.bucket]
st := m.TargetsMap[opts.arn]
st.LastUpdate = UTCNow()
st.ResyncStatus = resyncStatus
m.TargetsMap[opts.arn] = st
m.LastUpdate = UTCNow()
s.statusMap[opts.bucket] = m
s.Unlock()
s.markStatus(resyncStatus, opts)
globalSiteResyncMetrics.incBucket(opts, resyncStatus)
s.workerCh <- struct{}{}
}()
@ -2377,15 +2403,9 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
}
// mark resync status as resync started
if !heal {
s.Lock()
m := s.statusMap[opts.bucket]
st := m.TargetsMap[opts.arn]
st.ResyncStatus = ResyncStarted
m.TargetsMap[opts.arn] = st
m.LastUpdate = UTCNow()
s.statusMap[opts.bucket] = m
s.Unlock()
s.markStatus(ResyncStarted, opts)
}
// Walk through all object versions - Walk() is always in ascending order needed to ensure
// delete marker replicated to target after object version is first created.
if err := objectAPI.Walk(ctx, opts.bucket, "", objInfoCh, ObjectOptions{}); err != nil {
@ -2401,21 +2421,27 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
if st.ResyncStatus == ResyncStarted || st.ResyncStatus == ResyncFailed {
lastCheckpoint = st.Object
}
workers, err := workers.New(resyncParallelRoutines)
for obj := range objInfoCh {
select {
case <-s.resyncCancelCh:
resyncStatus = ResyncCanceled
return
case <-ctx.Done():
return
default:
}
if heal && lastCheckpoint != "" && lastCheckpoint != obj.Name {
continue
}
lastCheckpoint = ""
obj := obj
workers.Take()
go func() {
defer workers.Give()
roi := getHealReplicateObjectInfo(obj, rcfg)
if !roi.ExistingObjResync.mustResync() {
continue
return
}
traceFn := s.trace(tgt.ResetID, fmt.Sprintf("%s/%s (%s)", opts.bucket, roi.Name, roi.VersionID))
if roi.DeleteMarker || !roi.VersionPurgeStatus.Empty() {
@ -2452,10 +2478,10 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
ReplicationProxyRequest: "false",
},
})
s.Lock()
m = s.statusMap[opts.bucket]
st = m.TargetsMap[opts.arn]
st.Object = roi.Name
st := TargetReplicationResyncStatus{
Object: roi.Name,
Bucket: roi.Bucket,
}
success := true
if err != nil {
if roi.DeleteMarker && isErrMethodNotAllowed(ErrorRespToObjectError(err, opts.bucket, roi.Name)) {
@ -2468,13 +2494,12 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
st.ReplicatedCount++
st.ReplicatedSize += roi.Size
}
m.TargetsMap[opts.arn] = st
m.LastUpdate = UTCNow()
s.statusMap[opts.bucket] = m
s.Unlock()
s.incStats(st, opts)
traceFn(err)
globalSiteResyncMetrics.updateMetric(roi, success, opts.resyncID)
}()
}
workers.Wait()
resyncStatus = ResyncCompleted
}