Avoid metadata update for incoming replication failure (#12054)

This is an optimization to save IOPS. The replication
failures will be re-queued once more to re-attempt
replication. If it still does not succeed, the replication
status is set as `FAILED` and will be caught up on
scanner cycle.
This commit is contained in:
Poorna Krishnamoorthy 2021-04-15 16:32:00 -07:00 committed by GitHub
parent 75ac4ea840
commit d30c5d1cf0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 53 additions and 36 deletions

View File

@ -571,7 +571,8 @@ func getReplicationAction(oi1 ObjectInfo, oi2 minio.ObjectInfo) replicationActio
// replicateObject replicates the specified version of the object to destination bucket // replicateObject replicates the specified version of the object to destination bucket
// The source object is then updated to reflect the replication status. // The source object is then updated to reflect the replication status.
func replicateObject(ctx context.Context, objInfo ObjectInfo, objectAPI ObjectLayer) { func replicateObject(ctx context.Context, ri ReplicateObjectInfo, objectAPI ObjectLayer) {
objInfo := ri.ObjectInfo
bucket := objInfo.Bucket bucket := objInfo.Bucket
object := objInfo.Name object := objInfo.Name
@ -740,7 +741,8 @@ func replicateObject(ctx context.Context, objInfo ObjectInfo, objectAPI ObjectLa
if !ok { if !ok {
return return
} }
// Leave metadata in `PENDING` state if inline replication fails to save iops
if ri.OpType == replication.HealReplicationType || replicationStatus == replication.Completed {
// This lower level implementation is necessary to avoid write locks from CopyObject. // This lower level implementation is necessary to avoid write locks from CopyObject.
poolIdx, err := z.getPoolIdx(ctx, bucket, object, objInfo.Size) poolIdx, err := z.getPoolIdx(ctx, bucket, object, objInfo.Size)
if err != nil { if err != nil {
@ -756,7 +758,6 @@ func replicateObject(ctx context.Context, objInfo ObjectInfo, objectAPI ObjectLa
logger.LogIf(ctx, fmt.Errorf("Unable to update replication metadata for %s/%s(%s): %w", bucket, objInfo.Name, objInfo.VersionID, err)) logger.LogIf(ctx, fmt.Errorf("Unable to update replication metadata for %s/%s(%s): %w", bucket, objInfo.Name, objInfo.VersionID, err))
} }
} }
opType := replication.MetadataReplicationType opType := replication.MetadataReplicationType
if rtype == replicateAll { if rtype == replicateAll {
opType = replication.ObjectReplicationType opType = replication.ObjectReplicationType
@ -768,6 +769,14 @@ func replicateObject(ctx context.Context, objInfo ObjectInfo, objectAPI ObjectLa
Object: objInfo, Object: objInfo,
Host: "Internal: [Replication]", Host: "Internal: [Replication]",
}) })
}
// re-queue failures once more - keep a retry count to avoid flooding the queue if
// the target site is down. Leave it to scanner to catch up instead.
if replicationStatus == replication.Failed && ri.RetryCount < 1 {
ri.OpType = replication.HealReplicationType
ri.RetryCount++
globalReplicationPool.queueReplicaTask(ctx, ri)
}
} }
// filterReplicationStatusMetadata filters replication status metadata for COPY // filterReplicationStatusMetadata filters replication status metadata for COPY
@ -808,9 +817,9 @@ var (
type ReplicationPool struct { type ReplicationPool struct {
mu sync.Mutex mu sync.Mutex
size int size int
replicaCh chan ObjectInfo replicaCh chan ReplicateObjectInfo
replicaDeleteCh chan DeletedObjectVersionInfo replicaDeleteCh chan DeletedObjectVersionInfo
mrfReplicaCh chan ObjectInfo mrfReplicaCh chan ReplicateObjectInfo
mrfReplicaDeleteCh chan DeletedObjectVersionInfo mrfReplicaDeleteCh chan DeletedObjectVersionInfo
killCh chan struct{} killCh chan struct{}
wg sync.WaitGroup wg sync.WaitGroup
@ -821,9 +830,9 @@ type ReplicationPool struct {
// NewReplicationPool creates a pool of replication workers of specified size // NewReplicationPool creates a pool of replication workers of specified size
func NewReplicationPool(ctx context.Context, o ObjectLayer, sz int) *ReplicationPool { func NewReplicationPool(ctx context.Context, o ObjectLayer, sz int) *ReplicationPool {
pool := &ReplicationPool{ pool := &ReplicationPool{
replicaCh: make(chan ObjectInfo, 1000), replicaCh: make(chan ReplicateObjectInfo, 1000),
replicaDeleteCh: make(chan DeletedObjectVersionInfo, 1000), replicaDeleteCh: make(chan DeletedObjectVersionInfo, 1000),
mrfReplicaCh: make(chan ObjectInfo, 100000), mrfReplicaCh: make(chan ReplicateObjectInfo, 100000),
mrfReplicaDeleteCh: make(chan DeletedObjectVersionInfo, 100000), mrfReplicaDeleteCh: make(chan DeletedObjectVersionInfo, 100000),
ctx: ctx, ctx: ctx,
objLayer: o, objLayer: o,
@ -895,7 +904,7 @@ func (p *ReplicationPool) Resize(n int) {
} }
} }
func (p *ReplicationPool) queueReplicaTask(ctx context.Context, oi ObjectInfo) { func (p *ReplicationPool) queueReplicaTask(ctx context.Context, ri ReplicateObjectInfo) {
if p == nil { if p == nil {
return return
} }
@ -903,8 +912,8 @@ func (p *ReplicationPool) queueReplicaTask(ctx context.Context, oi ObjectInfo) {
case <-ctx.Done(): case <-ctx.Done():
close(p.replicaCh) close(p.replicaCh)
close(p.mrfReplicaCh) close(p.mrfReplicaCh)
case p.replicaCh <- oi: case p.replicaCh <- ri:
case p.mrfReplicaCh <- oi: case p.mrfReplicaCh <- ri:
// queue all overflows into the mrfReplicaCh to handle incoming pending/failed operations // queue all overflows into the mrfReplicaCh to handle incoming pending/failed operations
default: default:
} }
@ -1065,9 +1074,9 @@ func proxyHeadToReplicationTarget(ctx context.Context, bucket, object string, op
func scheduleReplication(ctx context.Context, objInfo ObjectInfo, o ObjectLayer, sync bool, opType replication.Type) { func scheduleReplication(ctx context.Context, objInfo ObjectInfo, o ObjectLayer, sync bool, opType replication.Type) {
if sync { if sync {
replicateObject(ctx, objInfo, o) replicateObject(ctx, ReplicateObjectInfo{ObjectInfo: objInfo, OpType: opType}, o)
} else { } else {
globalReplicationPool.queueReplicaTask(GlobalContext, objInfo) globalReplicationPool.queueReplicaTask(GlobalContext, ReplicateObjectInfo{ObjectInfo: objInfo, OpType: opType})
} }
if sz, err := objInfo.GetActualSize(); err == nil { if sz, err := objInfo.GetActualSize(); err == nil {
globalReplicationStats.Update(objInfo.Bucket, sz, objInfo.ReplicationStatus, replication.StatusType(""), opType) globalReplicationStats.Update(objInfo.Bucket, sz, objInfo.ReplicationStatus, replication.StatusType(""), opType)

View File

@ -1109,11 +1109,11 @@ func (i *scannerItem) healReplication(ctx context.Context, o ObjectLayer, oi Obj
case replication.Pending: case replication.Pending:
sizeS.pendingCount++ sizeS.pendingCount++
sizeS.pendingSize += oi.Size sizeS.pendingSize += oi.Size
globalReplicationPool.queueReplicaTask(ctx, oi) globalReplicationPool.queueReplicaTask(ctx, ReplicateObjectInfo{ObjectInfo: oi, OpType: replication.HealReplicationType})
case replication.Failed: case replication.Failed:
sizeS.failedSize += oi.Size sizeS.failedSize += oi.Size
sizeS.failedCount++ sizeS.failedCount++
globalReplicationPool.queueReplicaTask(ctx, oi) globalReplicationPool.queueReplicaTask(ctx, ReplicateObjectInfo{ObjectInfo: oi, OpType: replication.HealReplicationType})
case replication.Completed, "COMPLETE": case replication.Completed, "COMPLETE":
sizeS.replicatedSize += oi.Size sizeS.replicatedSize += oi.Size
case replication.Replica: case replication.Replica:

View File

@ -220,6 +220,13 @@ func (o ObjectInfo) Clone() (cinfo ObjectInfo) {
return cinfo return cinfo
} }
// ReplicateObjectInfo represents object info to be replicated
type ReplicateObjectInfo struct {
ObjectInfo
OpType replication.Type
RetryCount uint32
}
// MultipartInfo captures metadata information about the uploadId // MultipartInfo captures metadata information about the uploadId
// this data structure is used primarily for some internal purposes // this data structure is used primarily for some internal purposes
// for verifying upload type such as was the upload // for verifying upload type such as was the upload

View File

@ -122,6 +122,7 @@ const (
ObjectReplicationType Type = 1 + iota ObjectReplicationType Type = 1 + iota
DeleteReplicationType DeleteReplicationType
MetadataReplicationType MetadataReplicationType
HealReplicationType
) )
// ObjectOpts provides information to deduce whether replication // ObjectOpts provides information to deduce whether replication