mirror of
https://github.com/minio/minio.git
synced 2025-02-03 09:55:59 -05:00
replication: perform bucket resync in parallel (#16707)
Default number of parallel resync operations for a bucket to 10 to speed up resync.
This commit is contained in:
parent
c9e87f0548
commit
f986b0c493
@ -49,6 +49,7 @@ import (
|
||||
"github.com/minio/minio/internal/hash"
|
||||
xhttp "github.com/minio/minio/internal/http"
|
||||
"github.com/minio/minio/internal/logger"
|
||||
"github.com/minio/pkg/workers"
|
||||
"github.com/zeebo/xxh3"
|
||||
)
|
||||
|
||||
@ -2307,7 +2308,10 @@ func (s *replicationResyncer) PersistToDisk(ctx context.Context, objectAPI Objec
|
||||
}
|
||||
}
|
||||
|
||||
const resyncWorkerCnt = 50 // limit of number of bucket resyncs is progress at any given time
|
||||
const (
|
||||
resyncWorkerCnt = 10 // limit of number of bucket resyncs is progress at any given time
|
||||
resyncParallelRoutines = 10 // number of parallel resync ops per bucket
|
||||
)
|
||||
|
||||
func newresyncer() *replicationResyncer {
|
||||
rs := replicationResyncer{
|
||||
@ -2322,6 +2326,36 @@ func newresyncer() *replicationResyncer {
|
||||
return &rs
|
||||
}
|
||||
|
||||
// mark status of replication resync on remote target for the bucket
|
||||
func (s *replicationResyncer) markStatus(status ResyncStatusType, opts resyncOpts) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
|
||||
m := s.statusMap[opts.bucket]
|
||||
st := m.TargetsMap[opts.arn]
|
||||
st.LastUpdate = UTCNow()
|
||||
st.ResyncStatus = status
|
||||
m.TargetsMap[opts.arn] = st
|
||||
m.LastUpdate = UTCNow()
|
||||
s.statusMap[opts.bucket] = m
|
||||
}
|
||||
|
||||
// update replication resync stats for bucket's remote target
|
||||
func (s *replicationResyncer) incStats(ts TargetReplicationResyncStatus, opts resyncOpts) {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
m := s.statusMap[opts.bucket]
|
||||
st := m.TargetsMap[opts.arn]
|
||||
st.Object = ts.Object
|
||||
st.ReplicatedCount += ts.ReplicatedCount
|
||||
st.FailedCount += ts.FailedCount
|
||||
st.ReplicatedSize += ts.ReplicatedSize
|
||||
st.FailedSize += ts.FailedSize
|
||||
m.TargetsMap[opts.arn] = st
|
||||
m.LastUpdate = UTCNow()
|
||||
s.statusMap[opts.bucket] = m
|
||||
}
|
||||
|
||||
// resyncBucket resyncs all qualifying objects as per replication rules for the target
|
||||
// ARN
|
||||
func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI ObjectLayer, heal bool, opts resyncOpts) {
|
||||
@ -2333,15 +2367,7 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
|
||||
|
||||
resyncStatus := ResyncFailed
|
||||
defer func() {
|
||||
s.Lock()
|
||||
m := s.statusMap[opts.bucket]
|
||||
st := m.TargetsMap[opts.arn]
|
||||
st.LastUpdate = UTCNow()
|
||||
st.ResyncStatus = resyncStatus
|
||||
m.TargetsMap[opts.arn] = st
|
||||
m.LastUpdate = UTCNow()
|
||||
s.statusMap[opts.bucket] = m
|
||||
s.Unlock()
|
||||
s.markStatus(resyncStatus, opts)
|
||||
globalSiteResyncMetrics.incBucket(opts, resyncStatus)
|
||||
s.workerCh <- struct{}{}
|
||||
}()
|
||||
@ -2377,15 +2403,9 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
|
||||
}
|
||||
// mark resync status as resync started
|
||||
if !heal {
|
||||
s.Lock()
|
||||
m := s.statusMap[opts.bucket]
|
||||
st := m.TargetsMap[opts.arn]
|
||||
st.ResyncStatus = ResyncStarted
|
||||
m.TargetsMap[opts.arn] = st
|
||||
m.LastUpdate = UTCNow()
|
||||
s.statusMap[opts.bucket] = m
|
||||
s.Unlock()
|
||||
s.markStatus(ResyncStarted, opts)
|
||||
}
|
||||
|
||||
// Walk through all object versions - Walk() is always in ascending order needed to ensure
|
||||
// delete marker replicated to target after object version is first created.
|
||||
if err := objectAPI.Walk(ctx, opts.bucket, "", objInfoCh, ObjectOptions{}); err != nil {
|
||||
@ -2401,21 +2421,27 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
|
||||
if st.ResyncStatus == ResyncStarted || st.ResyncStatus == ResyncFailed {
|
||||
lastCheckpoint = st.Object
|
||||
}
|
||||
workers, err := workers.New(resyncParallelRoutines)
|
||||
for obj := range objInfoCh {
|
||||
select {
|
||||
case <-s.resyncCancelCh:
|
||||
resyncStatus = ResyncCanceled
|
||||
return
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
if heal && lastCheckpoint != "" && lastCheckpoint != obj.Name {
|
||||
continue
|
||||
}
|
||||
lastCheckpoint = ""
|
||||
|
||||
obj := obj
|
||||
workers.Take()
|
||||
go func() {
|
||||
defer workers.Give()
|
||||
roi := getHealReplicateObjectInfo(obj, rcfg)
|
||||
if !roi.ExistingObjResync.mustResync() {
|
||||
continue
|
||||
return
|
||||
}
|
||||
traceFn := s.trace(tgt.ResetID, fmt.Sprintf("%s/%s (%s)", opts.bucket, roi.Name, roi.VersionID))
|
||||
if roi.DeleteMarker || !roi.VersionPurgeStatus.Empty() {
|
||||
@ -2452,10 +2478,10 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
|
||||
ReplicationProxyRequest: "false",
|
||||
},
|
||||
})
|
||||
s.Lock()
|
||||
m = s.statusMap[opts.bucket]
|
||||
st = m.TargetsMap[opts.arn]
|
||||
st.Object = roi.Name
|
||||
st := TargetReplicationResyncStatus{
|
||||
Object: roi.Name,
|
||||
Bucket: roi.Bucket,
|
||||
}
|
||||
success := true
|
||||
if err != nil {
|
||||
if roi.DeleteMarker && isErrMethodNotAllowed(ErrorRespToObjectError(err, opts.bucket, roi.Name)) {
|
||||
@ -2468,13 +2494,12 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
|
||||
st.ReplicatedCount++
|
||||
st.ReplicatedSize += roi.Size
|
||||
}
|
||||
m.TargetsMap[opts.arn] = st
|
||||
m.LastUpdate = UTCNow()
|
||||
s.statusMap[opts.bucket] = m
|
||||
s.Unlock()
|
||||
s.incStats(st, opts)
|
||||
traceFn(err)
|
||||
globalSiteResyncMetrics.updateMetric(roi, success, opts.resyncID)
|
||||
}()
|
||||
}
|
||||
workers.Wait()
|
||||
resyncStatus = ResyncCompleted
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user