mirror of
https://github.com/minio/minio.git
synced 2025-04-19 10:07:30 -04:00
Avoid metadata update for incoming replication failure (#12054)
This is an optimization to save IOPS. The replication failures will be re-queued once more to re-attempt replication. If it still does not succeed, the replication status is set as `FAILED` and will be caught up on scanner cycle.
This commit is contained in:
parent
75ac4ea840
commit
d30c5d1cf0
@ -571,7 +571,8 @@ func getReplicationAction(oi1 ObjectInfo, oi2 minio.ObjectInfo) replicationActio
|
|||||||
|
|
||||||
// replicateObject replicates the specified version of the object to destination bucket
|
// replicateObject replicates the specified version of the object to destination bucket
|
||||||
// The source object is then updated to reflect the replication status.
|
// The source object is then updated to reflect the replication status.
|
||||||
func replicateObject(ctx context.Context, objInfo ObjectInfo, objectAPI ObjectLayer) {
|
func replicateObject(ctx context.Context, ri ReplicateObjectInfo, objectAPI ObjectLayer) {
|
||||||
|
objInfo := ri.ObjectInfo
|
||||||
bucket := objInfo.Bucket
|
bucket := objInfo.Bucket
|
||||||
object := objInfo.Name
|
object := objInfo.Name
|
||||||
|
|
||||||
@ -740,7 +741,8 @@ func replicateObject(ctx context.Context, objInfo ObjectInfo, objectAPI ObjectLa
|
|||||||
if !ok {
|
if !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
// Leave metadata in `PENDING` state if inline replication fails to save iops
|
||||||
|
if ri.OpType == replication.HealReplicationType || replicationStatus == replication.Completed {
|
||||||
// This lower level implementation is necessary to avoid write locks from CopyObject.
|
// This lower level implementation is necessary to avoid write locks from CopyObject.
|
||||||
poolIdx, err := z.getPoolIdx(ctx, bucket, object, objInfo.Size)
|
poolIdx, err := z.getPoolIdx(ctx, bucket, object, objInfo.Size)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -756,7 +758,6 @@ func replicateObject(ctx context.Context, objInfo ObjectInfo, objectAPI ObjectLa
|
|||||||
logger.LogIf(ctx, fmt.Errorf("Unable to update replication metadata for %s/%s(%s): %w", bucket, objInfo.Name, objInfo.VersionID, err))
|
logger.LogIf(ctx, fmt.Errorf("Unable to update replication metadata for %s/%s(%s): %w", bucket, objInfo.Name, objInfo.VersionID, err))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
opType := replication.MetadataReplicationType
|
opType := replication.MetadataReplicationType
|
||||||
if rtype == replicateAll {
|
if rtype == replicateAll {
|
||||||
opType = replication.ObjectReplicationType
|
opType = replication.ObjectReplicationType
|
||||||
@ -768,6 +769,14 @@ func replicateObject(ctx context.Context, objInfo ObjectInfo, objectAPI ObjectLa
|
|||||||
Object: objInfo,
|
Object: objInfo,
|
||||||
Host: "Internal: [Replication]",
|
Host: "Internal: [Replication]",
|
||||||
})
|
})
|
||||||
|
}
|
||||||
|
// re-queue failures once more - keep a retry count to avoid flooding the queue if
|
||||||
|
// the target site is down. Leave it to scanner to catch up instead.
|
||||||
|
if replicationStatus == replication.Failed && ri.RetryCount < 1 {
|
||||||
|
ri.OpType = replication.HealReplicationType
|
||||||
|
ri.RetryCount++
|
||||||
|
globalReplicationPool.queueReplicaTask(ctx, ri)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// filterReplicationStatusMetadata filters replication status metadata for COPY
|
// filterReplicationStatusMetadata filters replication status metadata for COPY
|
||||||
@ -808,9 +817,9 @@ var (
|
|||||||
type ReplicationPool struct {
|
type ReplicationPool struct {
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
size int
|
size int
|
||||||
replicaCh chan ObjectInfo
|
replicaCh chan ReplicateObjectInfo
|
||||||
replicaDeleteCh chan DeletedObjectVersionInfo
|
replicaDeleteCh chan DeletedObjectVersionInfo
|
||||||
mrfReplicaCh chan ObjectInfo
|
mrfReplicaCh chan ReplicateObjectInfo
|
||||||
mrfReplicaDeleteCh chan DeletedObjectVersionInfo
|
mrfReplicaDeleteCh chan DeletedObjectVersionInfo
|
||||||
killCh chan struct{}
|
killCh chan struct{}
|
||||||
wg sync.WaitGroup
|
wg sync.WaitGroup
|
||||||
@ -821,9 +830,9 @@ type ReplicationPool struct {
|
|||||||
// NewReplicationPool creates a pool of replication workers of specified size
|
// NewReplicationPool creates a pool of replication workers of specified size
|
||||||
func NewReplicationPool(ctx context.Context, o ObjectLayer, sz int) *ReplicationPool {
|
func NewReplicationPool(ctx context.Context, o ObjectLayer, sz int) *ReplicationPool {
|
||||||
pool := &ReplicationPool{
|
pool := &ReplicationPool{
|
||||||
replicaCh: make(chan ObjectInfo, 1000),
|
replicaCh: make(chan ReplicateObjectInfo, 1000),
|
||||||
replicaDeleteCh: make(chan DeletedObjectVersionInfo, 1000),
|
replicaDeleteCh: make(chan DeletedObjectVersionInfo, 1000),
|
||||||
mrfReplicaCh: make(chan ObjectInfo, 100000),
|
mrfReplicaCh: make(chan ReplicateObjectInfo, 100000),
|
||||||
mrfReplicaDeleteCh: make(chan DeletedObjectVersionInfo, 100000),
|
mrfReplicaDeleteCh: make(chan DeletedObjectVersionInfo, 100000),
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
objLayer: o,
|
objLayer: o,
|
||||||
@ -895,7 +904,7 @@ func (p *ReplicationPool) Resize(n int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *ReplicationPool) queueReplicaTask(ctx context.Context, oi ObjectInfo) {
|
func (p *ReplicationPool) queueReplicaTask(ctx context.Context, ri ReplicateObjectInfo) {
|
||||||
if p == nil {
|
if p == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -903,8 +912,8 @@ func (p *ReplicationPool) queueReplicaTask(ctx context.Context, oi ObjectInfo) {
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
close(p.replicaCh)
|
close(p.replicaCh)
|
||||||
close(p.mrfReplicaCh)
|
close(p.mrfReplicaCh)
|
||||||
case p.replicaCh <- oi:
|
case p.replicaCh <- ri:
|
||||||
case p.mrfReplicaCh <- oi:
|
case p.mrfReplicaCh <- ri:
|
||||||
// queue all overflows into the mrfReplicaCh to handle incoming pending/failed operations
|
// queue all overflows into the mrfReplicaCh to handle incoming pending/failed operations
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
@ -1065,9 +1074,9 @@ func proxyHeadToReplicationTarget(ctx context.Context, bucket, object string, op
|
|||||||
|
|
||||||
func scheduleReplication(ctx context.Context, objInfo ObjectInfo, o ObjectLayer, sync bool, opType replication.Type) {
|
func scheduleReplication(ctx context.Context, objInfo ObjectInfo, o ObjectLayer, sync bool, opType replication.Type) {
|
||||||
if sync {
|
if sync {
|
||||||
replicateObject(ctx, objInfo, o)
|
replicateObject(ctx, ReplicateObjectInfo{ObjectInfo: objInfo, OpType: opType}, o)
|
||||||
} else {
|
} else {
|
||||||
globalReplicationPool.queueReplicaTask(GlobalContext, objInfo)
|
globalReplicationPool.queueReplicaTask(GlobalContext, ReplicateObjectInfo{ObjectInfo: objInfo, OpType: opType})
|
||||||
}
|
}
|
||||||
if sz, err := objInfo.GetActualSize(); err == nil {
|
if sz, err := objInfo.GetActualSize(); err == nil {
|
||||||
globalReplicationStats.Update(objInfo.Bucket, sz, objInfo.ReplicationStatus, replication.StatusType(""), opType)
|
globalReplicationStats.Update(objInfo.Bucket, sz, objInfo.ReplicationStatus, replication.StatusType(""), opType)
|
||||||
|
@ -1109,11 +1109,11 @@ func (i *scannerItem) healReplication(ctx context.Context, o ObjectLayer, oi Obj
|
|||||||
case replication.Pending:
|
case replication.Pending:
|
||||||
sizeS.pendingCount++
|
sizeS.pendingCount++
|
||||||
sizeS.pendingSize += oi.Size
|
sizeS.pendingSize += oi.Size
|
||||||
globalReplicationPool.queueReplicaTask(ctx, oi)
|
globalReplicationPool.queueReplicaTask(ctx, ReplicateObjectInfo{ObjectInfo: oi, OpType: replication.HealReplicationType})
|
||||||
case replication.Failed:
|
case replication.Failed:
|
||||||
sizeS.failedSize += oi.Size
|
sizeS.failedSize += oi.Size
|
||||||
sizeS.failedCount++
|
sizeS.failedCount++
|
||||||
globalReplicationPool.queueReplicaTask(ctx, oi)
|
globalReplicationPool.queueReplicaTask(ctx, ReplicateObjectInfo{ObjectInfo: oi, OpType: replication.HealReplicationType})
|
||||||
case replication.Completed, "COMPLETE":
|
case replication.Completed, "COMPLETE":
|
||||||
sizeS.replicatedSize += oi.Size
|
sizeS.replicatedSize += oi.Size
|
||||||
case replication.Replica:
|
case replication.Replica:
|
||||||
|
@ -220,6 +220,13 @@ func (o ObjectInfo) Clone() (cinfo ObjectInfo) {
|
|||||||
return cinfo
|
return cinfo
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReplicateObjectInfo represents object info to be replicated
|
||||||
|
type ReplicateObjectInfo struct {
|
||||||
|
ObjectInfo
|
||||||
|
OpType replication.Type
|
||||||
|
RetryCount uint32
|
||||||
|
}
|
||||||
|
|
||||||
// MultipartInfo captures metadata information about the uploadId
|
// MultipartInfo captures metadata information about the uploadId
|
||||||
// this data structure is used primarily for some internal purposes
|
// this data structure is used primarily for some internal purposes
|
||||||
// for verifying upload type such as was the upload
|
// for verifying upload type such as was the upload
|
||||||
|
@ -122,6 +122,7 @@ const (
|
|||||||
ObjectReplicationType Type = 1 + iota
|
ObjectReplicationType Type = 1 + iota
|
||||||
DeleteReplicationType
|
DeleteReplicationType
|
||||||
MetadataReplicationType
|
MetadataReplicationType
|
||||||
|
HealReplicationType
|
||||||
)
|
)
|
||||||
|
|
||||||
// ObjectOpts provides information to deduce whether replication
|
// ObjectOpts provides information to deduce whether replication
|
||||||
|
Loading…
x
Reference in New Issue
Block a user