mirror of
https://github.com/minio/minio.git
synced 2025-02-09 04:38:09 -05:00
replication: perform bucket resync in parallel (#16707)
Default number of parallel resync operations for a bucket to 10 to speed up resync.
This commit is contained in:
parent
c9e87f0548
commit
f986b0c493
@ -49,6 +49,7 @@ import (
|
|||||||
"github.com/minio/minio/internal/hash"
|
"github.com/minio/minio/internal/hash"
|
||||||
xhttp "github.com/minio/minio/internal/http"
|
xhttp "github.com/minio/minio/internal/http"
|
||||||
"github.com/minio/minio/internal/logger"
|
"github.com/minio/minio/internal/logger"
|
||||||
|
"github.com/minio/pkg/workers"
|
||||||
"github.com/zeebo/xxh3"
|
"github.com/zeebo/xxh3"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -2307,7 +2308,10 @@ func (s *replicationResyncer) PersistToDisk(ctx context.Context, objectAPI Objec
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const resyncWorkerCnt = 50 // limit of number of bucket resyncs is progress at any given time
|
const (
|
||||||
|
resyncWorkerCnt = 10 // limit of number of bucket resyncs is progress at any given time
|
||||||
|
resyncParallelRoutines = 10 // number of parallel resync ops per bucket
|
||||||
|
)
|
||||||
|
|
||||||
func newresyncer() *replicationResyncer {
|
func newresyncer() *replicationResyncer {
|
||||||
rs := replicationResyncer{
|
rs := replicationResyncer{
|
||||||
@ -2322,6 +2326,36 @@ func newresyncer() *replicationResyncer {
|
|||||||
return &rs
|
return &rs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// mark status of replication resync on remote target for the bucket
|
||||||
|
func (s *replicationResyncer) markStatus(status ResyncStatusType, opts resyncOpts) {
|
||||||
|
s.Lock()
|
||||||
|
defer s.Unlock()
|
||||||
|
|
||||||
|
m := s.statusMap[opts.bucket]
|
||||||
|
st := m.TargetsMap[opts.arn]
|
||||||
|
st.LastUpdate = UTCNow()
|
||||||
|
st.ResyncStatus = status
|
||||||
|
m.TargetsMap[opts.arn] = st
|
||||||
|
m.LastUpdate = UTCNow()
|
||||||
|
s.statusMap[opts.bucket] = m
|
||||||
|
}
|
||||||
|
|
||||||
|
// update replication resync stats for bucket's remote target
|
||||||
|
func (s *replicationResyncer) incStats(ts TargetReplicationResyncStatus, opts resyncOpts) {
|
||||||
|
s.Lock()
|
||||||
|
defer s.Unlock()
|
||||||
|
m := s.statusMap[opts.bucket]
|
||||||
|
st := m.TargetsMap[opts.arn]
|
||||||
|
st.Object = ts.Object
|
||||||
|
st.ReplicatedCount += ts.ReplicatedCount
|
||||||
|
st.FailedCount += ts.FailedCount
|
||||||
|
st.ReplicatedSize += ts.ReplicatedSize
|
||||||
|
st.FailedSize += ts.FailedSize
|
||||||
|
m.TargetsMap[opts.arn] = st
|
||||||
|
m.LastUpdate = UTCNow()
|
||||||
|
s.statusMap[opts.bucket] = m
|
||||||
|
}
|
||||||
|
|
||||||
// resyncBucket resyncs all qualifying objects as per replication rules for the target
|
// resyncBucket resyncs all qualifying objects as per replication rules for the target
|
||||||
// ARN
|
// ARN
|
||||||
func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI ObjectLayer, heal bool, opts resyncOpts) {
|
func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI ObjectLayer, heal bool, opts resyncOpts) {
|
||||||
@ -2333,15 +2367,7 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
|
|||||||
|
|
||||||
resyncStatus := ResyncFailed
|
resyncStatus := ResyncFailed
|
||||||
defer func() {
|
defer func() {
|
||||||
s.Lock()
|
s.markStatus(resyncStatus, opts)
|
||||||
m := s.statusMap[opts.bucket]
|
|
||||||
st := m.TargetsMap[opts.arn]
|
|
||||||
st.LastUpdate = UTCNow()
|
|
||||||
st.ResyncStatus = resyncStatus
|
|
||||||
m.TargetsMap[opts.arn] = st
|
|
||||||
m.LastUpdate = UTCNow()
|
|
||||||
s.statusMap[opts.bucket] = m
|
|
||||||
s.Unlock()
|
|
||||||
globalSiteResyncMetrics.incBucket(opts, resyncStatus)
|
globalSiteResyncMetrics.incBucket(opts, resyncStatus)
|
||||||
s.workerCh <- struct{}{}
|
s.workerCh <- struct{}{}
|
||||||
}()
|
}()
|
||||||
@ -2377,15 +2403,9 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
|
|||||||
}
|
}
|
||||||
// mark resync status as resync started
|
// mark resync status as resync started
|
||||||
if !heal {
|
if !heal {
|
||||||
s.Lock()
|
s.markStatus(ResyncStarted, opts)
|
||||||
m := s.statusMap[opts.bucket]
|
|
||||||
st := m.TargetsMap[opts.arn]
|
|
||||||
st.ResyncStatus = ResyncStarted
|
|
||||||
m.TargetsMap[opts.arn] = st
|
|
||||||
m.LastUpdate = UTCNow()
|
|
||||||
s.statusMap[opts.bucket] = m
|
|
||||||
s.Unlock()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Walk through all object versions - Walk() is always in ascending order needed to ensure
|
// Walk through all object versions - Walk() is always in ascending order needed to ensure
|
||||||
// delete marker replicated to target after object version is first created.
|
// delete marker replicated to target after object version is first created.
|
||||||
if err := objectAPI.Walk(ctx, opts.bucket, "", objInfoCh, ObjectOptions{}); err != nil {
|
if err := objectAPI.Walk(ctx, opts.bucket, "", objInfoCh, ObjectOptions{}); err != nil {
|
||||||
@ -2401,21 +2421,27 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
|
|||||||
if st.ResyncStatus == ResyncStarted || st.ResyncStatus == ResyncFailed {
|
if st.ResyncStatus == ResyncStarted || st.ResyncStatus == ResyncFailed {
|
||||||
lastCheckpoint = st.Object
|
lastCheckpoint = st.Object
|
||||||
}
|
}
|
||||||
|
workers, err := workers.New(resyncParallelRoutines)
|
||||||
for obj := range objInfoCh {
|
for obj := range objInfoCh {
|
||||||
select {
|
select {
|
||||||
case <-s.resyncCancelCh:
|
case <-s.resyncCancelCh:
|
||||||
resyncStatus = ResyncCanceled
|
resyncStatus = ResyncCanceled
|
||||||
return
|
return
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
if heal && lastCheckpoint != "" && lastCheckpoint != obj.Name {
|
if heal && lastCheckpoint != "" && lastCheckpoint != obj.Name {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
lastCheckpoint = ""
|
lastCheckpoint = ""
|
||||||
|
obj := obj
|
||||||
|
workers.Take()
|
||||||
|
go func() {
|
||||||
|
defer workers.Give()
|
||||||
roi := getHealReplicateObjectInfo(obj, rcfg)
|
roi := getHealReplicateObjectInfo(obj, rcfg)
|
||||||
if !roi.ExistingObjResync.mustResync() {
|
if !roi.ExistingObjResync.mustResync() {
|
||||||
continue
|
return
|
||||||
}
|
}
|
||||||
traceFn := s.trace(tgt.ResetID, fmt.Sprintf("%s/%s (%s)", opts.bucket, roi.Name, roi.VersionID))
|
traceFn := s.trace(tgt.ResetID, fmt.Sprintf("%s/%s (%s)", opts.bucket, roi.Name, roi.VersionID))
|
||||||
if roi.DeleteMarker || !roi.VersionPurgeStatus.Empty() {
|
if roi.DeleteMarker || !roi.VersionPurgeStatus.Empty() {
|
||||||
@ -2452,10 +2478,10 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
|
|||||||
ReplicationProxyRequest: "false",
|
ReplicationProxyRequest: "false",
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
s.Lock()
|
st := TargetReplicationResyncStatus{
|
||||||
m = s.statusMap[opts.bucket]
|
Object: roi.Name,
|
||||||
st = m.TargetsMap[opts.arn]
|
Bucket: roi.Bucket,
|
||||||
st.Object = roi.Name
|
}
|
||||||
success := true
|
success := true
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if roi.DeleteMarker && isErrMethodNotAllowed(ErrorRespToObjectError(err, opts.bucket, roi.Name)) {
|
if roi.DeleteMarker && isErrMethodNotAllowed(ErrorRespToObjectError(err, opts.bucket, roi.Name)) {
|
||||||
@ -2468,13 +2494,12 @@ func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI Object
|
|||||||
st.ReplicatedCount++
|
st.ReplicatedCount++
|
||||||
st.ReplicatedSize += roi.Size
|
st.ReplicatedSize += roi.Size
|
||||||
}
|
}
|
||||||
m.TargetsMap[opts.arn] = st
|
s.incStats(st, opts)
|
||||||
m.LastUpdate = UTCNow()
|
|
||||||
s.statusMap[opts.bucket] = m
|
|
||||||
s.Unlock()
|
|
||||||
traceFn(err)
|
traceFn(err)
|
||||||
globalSiteResyncMetrics.updateMetric(roi, success, opts.resyncID)
|
globalSiteResyncMetrics.updateMetric(roi, success, opts.resyncID)
|
||||||
|
}()
|
||||||
}
|
}
|
||||||
|
workers.Wait()
|
||||||
resyncStatus = ResyncCompleted
|
resyncStatus = ResyncCompleted
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user