2021-04-18 12:41:13 -07:00
// Copyright (c) 2015-2021 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2020-07-21 17:49:56 -07:00
package cmd
import (
"context"
2022-02-10 10:16:52 -08:00
"encoding/binary"
"errors"
2020-09-15 20:44:48 -07:00
"fmt"
2021-06-30 07:44:24 -07:00
"io"
2022-09-24 16:20:28 -07:00
"math"
2022-11-14 07:16:40 -08:00
"math/rand"
2020-07-21 17:49:56 -07:00
"net/http"
2023-05-06 13:35:43 -07:00
"net/url"
2023-07-13 02:51:33 -04:00
"os"
2022-02-10 10:16:52 -08:00
"path"
2023-07-13 02:51:33 -04:00
"path/filepath"
2021-02-03 20:41:33 -08:00
"reflect"
2020-08-12 17:32:24 -07:00
"strings"
2021-03-09 02:56:42 -08:00
"sync"
2022-09-24 16:20:28 -07:00
"sync/atomic"
2020-07-21 17:49:56 -07:00
"time"
2022-01-11 22:32:29 -08:00
"github.com/dustin/go-humanize"
2023-06-19 17:53:08 -07:00
"github.com/minio/madmin-go/v3"
2021-08-23 17:16:18 +02:00
"github.com/minio/minio-go/v7"
2020-07-21 17:49:56 -07:00
"github.com/minio/minio-go/v7/pkg/encrypt"
"github.com/minio/minio-go/v7/pkg/tags"
2022-12-12 10:28:30 -08:00
"github.com/minio/minio/internal/amztime"
2021-06-01 14:59:40 -07:00
"github.com/minio/minio/internal/bucket/bandwidth"
2022-10-13 17:46:49 -07:00
objectlock "github.com/minio/minio/internal/bucket/object/lock"
2021-06-01 14:59:40 -07:00
"github.com/minio/minio/internal/bucket/replication"
"github.com/minio/minio/internal/config/storageclass"
"github.com/minio/minio/internal/crypto"
"github.com/minio/minio/internal/event"
2021-06-30 07:44:24 -07:00
"github.com/minio/minio/internal/hash"
2021-06-01 14:59:40 -07:00
xhttp "github.com/minio/minio/internal/http"
"github.com/minio/minio/internal/logger"
2022-11-18 00:20:09 +01:00
"github.com/zeebo/xxh3"
2020-07-21 17:49:56 -07:00
)
2021-09-18 16:31:35 -04:00
const (
throttleDeadline = 1 * time . Hour
// ReplicationReset has reset id and timestamp of last reset operation
ReplicationReset = "replication-reset"
// ReplicationStatus has internal replication status - stringified representation of target's replication status for all replication
// activity initiated from this cluster
ReplicationStatus = "replication-status"
// ReplicationTimestamp - the last time replication was initiated on this cluster for this object version
ReplicationTimestamp = "replication-timestamp"
// ReplicaStatus - this header is present if a replica was received by this cluster for this object version
ReplicaStatus = "replica-status"
// ReplicaTimestamp - the last time a replica was received by this cluster for this object version
ReplicaTimestamp = "replica-timestamp"
// TaggingTimestamp - the last time a tag metadata modification happened on this cluster for this object version
TaggingTimestamp = "tagging-timestamp"
// ObjectLockRetentionTimestamp - the last time a object lock metadata modification happened on this cluster for this object version
ObjectLockRetentionTimestamp = "objectlock-retention-timestamp"
// ObjectLockLegalHoldTimestamp - the last time a legal hold metadata modification happened on this cluster for this object version
ObjectLockLegalHoldTimestamp = "objectlock-legalhold-timestamp"
2021-12-13 18:22:56 -08:00
// ReplicationWorkerMultiplier is suggested worker multiplier if traffic exceeds replication worker capacity
ReplicationWorkerMultiplier = 1.5
2021-09-18 16:31:35 -04:00
)
2021-07-28 15:20:01 -07:00
2023-01-25 05:16:33 +05:30
func isReplicationEnabled ( ctx context . Context , bucketName string ) bool {
rc , _ := getReplicationConfig ( ctx , bucketName )
return rc != nil
}
2020-07-30 19:55:22 -07:00
// gets replication config associated to a given bucket name.
func getReplicationConfig ( ctx context . Context , bucketName string ) ( rc * replication . Config , err error ) {
2022-04-24 15:06:31 +05:30
rCfg , _ , err := globalBucketMetadataSys . GetReplicationConfig ( ctx , bucketName )
2022-09-13 21:23:33 -07:00
if err != nil {
if errors . Is ( err , BucketReplicationConfigNotFound { Bucket : bucketName } ) || errors . Is ( err , errInvalidArgument ) {
return rCfg , err
}
logger . CriticalIf ( ctx , err )
}
2022-04-24 15:06:31 +05:30
return rCfg , err
2020-07-21 17:49:56 -07:00
}
2020-07-30 19:55:22 -07:00
// validateReplicationDestination returns error if replication destination bucket missing or not configured
2020-07-21 17:49:56 -07:00
// It also returns true if replication destination is same as this server.
2022-05-26 17:57:23 -07:00
func validateReplicationDestination ( ctx context . Context , bucket string , rCfg * replication . Config , checkRemote bool ) ( bool , APIError ) {
2021-09-18 16:31:35 -04:00
var arns [ ] string
if rCfg . RoleArn != "" {
arns = append ( arns , rCfg . RoleArn )
} else {
for _ , rule := range rCfg . Rules {
arns = append ( arns , rule . Destination . String ( ) )
}
2020-07-21 17:49:56 -07:00
}
2022-05-26 17:57:23 -07:00
var sameTarget bool
2021-09-18 16:31:35 -04:00
for _ , arnStr := range arns {
arn , err := madmin . ParseARN ( arnStr )
if err != nil {
2022-05-26 17:57:23 -07:00
return sameTarget , errorCodes . ToAPIErrWithErr ( ErrBucketRemoteArnInvalid , err )
2021-09-18 16:31:35 -04:00
}
if arn . Type != madmin . ReplicationService {
2022-05-26 17:57:23 -07:00
return sameTarget , toAPIError ( ctx , BucketRemoteArnTypeInvalid { Bucket : bucket } )
2021-09-18 16:31:35 -04:00
}
clnt := globalBucketTargetSys . GetRemoteTargetClient ( ctx , arnStr )
if clnt == nil {
2022-05-26 17:57:23 -07:00
return sameTarget , toAPIError ( ctx , BucketRemoteTargetNotFound { Bucket : bucket } )
2021-09-18 16:31:35 -04:00
}
2022-05-26 17:57:23 -07:00
if checkRemote { // validate remote bucket
2022-10-13 17:46:49 -07:00
found , err := clnt . BucketExists ( ctx , arn . Bucket )
if err != nil {
2022-05-26 17:57:23 -07:00
return sameTarget , errorCodes . ToAPIErrWithErr ( ErrRemoteDestinationNotFoundError , err )
}
2022-10-13 17:46:49 -07:00
if ! found {
return sameTarget , errorCodes . ToAPIErrWithErr ( ErrRemoteDestinationNotFoundError , BucketRemoteTargetNotFound { Bucket : arn . Bucket } )
}
2022-05-26 17:57:23 -07:00
if ret , err := globalBucketObjectLockSys . Get ( bucket ) ; err == nil {
if ret . LockEnabled {
lock , _ , _ , _ , err := clnt . GetObjectLockConfig ( ctx , arn . Bucket )
2022-10-13 17:46:49 -07:00
if err != nil {
2022-05-26 17:57:23 -07:00
return sameTarget , errorCodes . ToAPIErrWithErr ( ErrReplicationDestinationMissingLock , err )
}
2022-10-13 17:46:49 -07:00
if lock != objectlock . Enabled {
return sameTarget , errorCodes . ToAPIErrWithErr ( ErrReplicationDestinationMissingLock , nil )
}
2021-09-18 16:31:35 -04:00
}
2020-08-04 23:02:27 -07:00
}
}
2021-09-18 16:31:35 -04:00
// validate replication ARN against target endpoint
2022-12-22 01:31:20 -08:00
c := globalBucketTargetSys . GetRemoteTargetClient ( ctx , arnStr )
if c != nil {
2023-05-06 13:35:43 -07:00
if err := checkRemoteEndpoint ( ctx , c . EndpointURL ( ) ) ; err != nil {
switch err . ( type ) {
case BucketRemoteIdenticalToSource :
return true , errorCodes . ToAPIErrWithErr ( ErrBucketRemoteIdenticalToSource , fmt . Errorf ( "remote target endpoint %s is self referential" , c . EndpointURL ( ) . String ( ) ) )
default :
}
}
2021-09-18 16:31:35 -04:00
if c . EndpointURL ( ) . String ( ) == clnt . EndpointURL ( ) . String ( ) {
2022-05-26 17:57:23 -07:00
selfTarget , _ := isLocalHost ( clnt . EndpointURL ( ) . Hostname ( ) , clnt . EndpointURL ( ) . Port ( ) , globalMinioPort )
if ! sameTarget {
sameTarget = selfTarget
}
continue
2021-09-18 16:31:35 -04:00
}
2020-07-21 17:49:56 -07:00
}
}
2022-05-26 17:57:23 -07:00
if len ( arns ) == 0 {
return false , toAPIError ( ctx , BucketRemoteTargetNotFound { Bucket : bucket } )
}
return sameTarget , toAPIError ( ctx , nil )
2020-07-21 17:49:56 -07:00
}
2023-05-06 13:35:43 -07:00
// performs a http request to remote endpoint to check if deployment id of remote endpoint is same as
// local cluster deployment id. This is to prevent replication to self, especially in case of a loadbalancer
// in front of MinIO.
func checkRemoteEndpoint ( ctx context . Context , epURL * url . URL ) error {
reqURL := & url . URL {
Scheme : epURL . Scheme ,
Host : epURL . Host ,
2023-06-21 00:27:54 +08:00
Path : healthCheckPathPrefix + healthCheckReadinessPath ,
2023-05-06 13:35:43 -07:00
}
req , err := http . NewRequestWithContext ( ctx , http . MethodGet , reqURL . String ( ) , nil )
if err != nil {
return err
}
client := & http . Client {
Transport : NewHTTPTransport ( ) ,
Timeout : 10 * time . Second ,
}
resp , err := client . Do ( req )
if err != nil {
return err
}
if err == nil {
// Drain the connection.
xhttp . DrainBody ( resp . Body )
}
if resp != nil {
amzid := resp . Header . Get ( xhttp . AmzRequestHostID )
if _ , ok := globalNodeNamesHex [ amzid ] ; ok {
return BucketRemoteIdenticalToSource {
Endpoint : epURL . String ( ) ,
}
}
}
return nil
}
2021-06-01 19:59:11 -07:00
type mustReplicateOptions struct {
2021-09-18 16:31:35 -04:00
meta map [ string ] string
status replication . StatusType
opType replication . Type
replicationRequest bool // incoming request is a replication request
2021-06-01 19:59:11 -07:00
}
func ( o mustReplicateOptions ) ReplicationStatus ( ) ( s replication . StatusType ) {
if rs , ok := o . meta [ xhttp . AmzBucketReplicationStatus ] ; ok {
return replication . StatusType ( rs )
}
return s
}
2022-01-02 09:15:06 -08:00
2021-06-01 19:59:11 -07:00
func ( o mustReplicateOptions ) isExistingObjectReplication ( ) bool {
return o . opType == replication . ExistingObjectReplicationType
}
func ( o mustReplicateOptions ) isMetadataReplication ( ) bool {
return o . opType == replication . MetadataReplicationType
}
2022-01-02 09:15:06 -08:00
2021-09-18 16:31:35 -04:00
func getMustReplicateOptions ( o ObjectInfo , op replication . Type , opts ObjectOptions ) mustReplicateOptions {
2021-06-01 19:59:11 -07:00
if ! op . Valid ( ) {
op = replication . ObjectReplicationType
if o . metadataOnly {
op = replication . MetadataReplicationType
}
}
meta := cloneMSS ( o . UserDefined )
if o . UserTags != "" {
meta [ xhttp . AmzObjectTagging ] = o . UserTags
}
2021-09-18 16:31:35 -04:00
2021-06-01 19:59:11 -07:00
return mustReplicateOptions {
2021-09-18 16:31:35 -04:00
meta : meta ,
status : o . ReplicationStatus ,
opType : op ,
replicationRequest : opts . ReplicationRequest ,
2021-06-01 19:59:11 -07:00
}
}
2021-04-29 19:01:43 -07:00
2021-01-11 22:36:51 -08:00
// mustReplicate returns 2 booleans - true if object meets replication criteria and true if replication is to be done in
// a synchronous manner.
2021-09-18 16:31:35 -04:00
func mustReplicate ( ctx context . Context , bucket , object string , mopts mustReplicateOptions ) ( dsc ReplicateDecision ) {
2022-05-31 02:57:57 -07:00
// object layer not initialized we return with no decision.
if newObjectLayerFn ( ) == nil {
return
}
2022-05-06 19:05:28 -07:00
// Disable server-side replication on object prefixes which are excluded
// from versioning via the MinIO bucket versioning extension.
2022-08-24 13:46:29 -07:00
if ! globalBucketVersioningSys . PrefixEnabled ( bucket , object ) {
2022-05-06 19:05:28 -07:00
return
}
2022-05-31 02:57:57 -07:00
2021-06-01 19:59:11 -07:00
replStatus := mopts . ReplicationStatus ( )
if replStatus == replication . Replica && ! mopts . isMetadataReplication ( ) {
2021-09-18 16:31:35 -04:00
return
}
if mopts . replicationRequest { // incoming replication request on target cluster
return
2020-07-21 17:49:56 -07:00
}
2020-07-30 19:55:22 -07:00
cfg , err := getReplicationConfig ( ctx , bucket )
2020-07-21 17:49:56 -07:00
if err != nil {
2021-09-18 16:31:35 -04:00
return
2020-07-21 17:49:56 -07:00
}
opts := replication . ObjectOpts {
2021-06-01 19:59:11 -07:00
Name : object ,
SSEC : crypto . SSEC . IsEncrypted ( mopts . meta ) ,
Replica : replStatus == replication . Replica ,
ExistingObject : mopts . isExistingObjectReplication ( ) ,
2020-07-21 17:49:56 -07:00
}
2021-06-01 19:59:11 -07:00
tagStr , ok := mopts . meta [ xhttp . AmzObjectTagging ]
2020-07-21 17:49:56 -07:00
if ok {
opts . UserTags = tagStr
}
2021-09-18 16:31:35 -04:00
tgtArns := cfg . FilterTargetArns ( opts )
for _ , tgtArn := range tgtArns {
tgt := globalBucketTargetSys . GetRemoteTargetClient ( ctx , tgtArn )
// the target online status should not be used here while deciding
// whether to replicate as the target could be temporarily down
opts . TargetArn = tgtArn
replicate := cfg . Replicate ( opts )
var synchronous bool
if tgt != nil {
synchronous = tgt . replicateSync
}
dsc . Set ( newReplicateTargetDecision ( tgtArn , replicate , synchronous ) )
2021-01-11 22:36:51 -08:00
}
2021-09-18 16:31:35 -04:00
return dsc
2021-01-11 22:36:51 -08:00
}
// Standard headers that needs to be extracted from User metadata.
var standardHeaders = [ ] string {
2021-01-27 11:22:34 -08:00
xhttp . ContentType ,
xhttp . CacheControl ,
xhttp . ContentEncoding ,
xhttp . ContentLanguage ,
xhttp . ContentDisposition ,
2021-01-11 22:36:51 -08:00
xhttp . AmzStorageClass ,
xhttp . AmzObjectTagging ,
xhttp . AmzBucketReplicationStatus ,
2021-01-27 11:22:34 -08:00
xhttp . AmzObjectLockMode ,
xhttp . AmzObjectLockRetainUntilDate ,
xhttp . AmzObjectLockLegalHold ,
xhttp . AmzTagCount ,
xhttp . AmzServerSideEncryption ,
2020-07-21 17:49:56 -07:00
}
2020-11-19 18:43:58 -08:00
// returns true if any of the objects being deleted qualifies for replication.
func hasReplicationRules ( ctx context . Context , bucket string , objects [ ] ObjectToDelete ) bool {
c , err := getReplicationConfig ( ctx , bucket )
if err != nil || c == nil {
return false
}
for _ , obj := range objects {
if c . HasActiveRules ( obj . ObjectName , true ) {
return true
}
}
return false
}
2021-01-11 22:36:51 -08:00
// isStandardHeader returns true if header is a supported header and not a custom header
2021-02-03 20:41:33 -08:00
func isStandardHeader ( matchHeaderKey string ) bool {
return equals ( matchHeaderKey , standardHeaders ... )
2021-01-11 22:36:51 -08:00
}
2020-11-19 18:43:58 -08:00
// returns whether object version is a deletemarker and if object qualifies for replication
2021-09-18 16:31:35 -04:00
func checkReplicateDelete ( ctx context . Context , bucket string , dobj ObjectToDelete , oi ObjectInfo , delOpts ObjectOptions , gerr error ) ( dsc ReplicateDecision ) {
2020-11-19 18:43:58 -08:00
rcfg , err := getReplicationConfig ( ctx , bucket )
if err != nil || rcfg == nil {
2021-09-18 16:31:35 -04:00
return
}
// If incoming request is a replication request, it does not need to be re-replicated.
if delOpts . ReplicationRequest {
return
2020-11-19 18:43:58 -08:00
}
2022-05-06 19:05:28 -07:00
// Skip replication if this object's prefix is excluded from being
// versioned.
2022-05-07 22:06:44 -07:00
if ! delOpts . Versioned {
2022-05-06 19:05:28 -07:00
return
}
2021-02-18 16:35:37 -08:00
opts := replication . ObjectOpts {
Name : dobj . ObjectName ,
SSEC : crypto . SSEC . IsEncrypted ( oi . UserDefined ) ,
UserTags : oi . UserTags ,
DeleteMarker : oi . DeleteMarker ,
VersionID : dobj . VersionID ,
2021-03-13 10:28:35 -08:00
OpType : replication . DeleteReplicationType ,
2021-02-18 16:35:37 -08:00
}
2021-09-18 16:31:35 -04:00
tgtArns := rcfg . FilterTargetArns ( opts )
if len ( tgtArns ) > 0 {
dsc . targetsMap = make ( map [ string ] replicateTargetDecision , len ( tgtArns ) )
var sync , replicate bool
for _ , tgtArn := range tgtArns {
opts . TargetArn = tgtArn
replicate = rcfg . Replicate ( opts )
2022-12-28 22:48:33 -08:00
// when incoming delete is removal of a delete marker(a.k.a versioned delete),
2021-09-18 16:31:35 -04:00
// GetObjectInfo returns extra information even though it returns errFileNotFound
if gerr != nil {
validReplStatus := false
switch oi . TargetReplicationStatus ( tgtArn ) {
case replication . Pending , replication . Completed , replication . Failed :
validReplStatus = true
}
if oi . DeleteMarker && ( validReplStatus || replicate ) {
dsc . Set ( newReplicateTargetDecision ( tgtArn , replicate , sync ) )
continue
} else {
// can be the case that other cluster is down and duplicate `mc rm --vid`
// is issued - this still needs to be replicated back to the other target
replicate = oi . VersionPurgeStatus == Pending || oi . VersionPurgeStatus == Failed
dsc . Set ( newReplicateTargetDecision ( tgtArn , replicate , sync ) )
continue
}
}
tgt := globalBucketTargetSys . GetRemoteTargetClient ( ctx , tgtArn )
// the target online status should not be used here while deciding
// whether to replicate deletes as the target could be temporarily down
tgtDsc := newReplicateTargetDecision ( tgtArn , false , false )
if tgt != nil {
tgtDsc = newReplicateTargetDecision ( tgtArn , replicate , tgt . replicateSync )
}
dsc . Set ( tgtDsc )
}
}
return dsc
2020-11-19 18:43:58 -08:00
}
// replicate deletes to the designated replication target if replication configuration
// has delete marker replication or delete replication (MinIO extension to allow deletes where version id
// is specified) enabled.
// Similar to bucket replication for PUT operation, soft delete (a.k.a setting delete marker) and
// permanent deletes (by specifying a version ID in the delete operation) have three states "Pending", "Complete"
// and "Failed" to mark the status of the replication of "DELETE" operation. All failed operations can
// then be retried by healing. In the case of permanent deletes, until the replication is completed on the
// target cluster, the object version is marked deleted on the source and hidden from listing. It is permanently
// deleted from the source when the VersionPurgeStatus changes to "Complete", i.e after replication succeeds
// on target.
2022-07-12 10:43:32 -07:00
func replicateDelete ( ctx context . Context , dobj DeletedObjectReplicationInfo , objectAPI ObjectLayer ) {
2021-09-18 16:31:35 -04:00
var replicationStatus replication . StatusType
2020-11-19 18:43:58 -08:00
bucket := dobj . Bucket
2021-02-03 20:41:33 -08:00
versionID := dobj . DeleteMarkerVersionID
if versionID == "" {
versionID = dobj . VersionID
}
2021-07-01 14:02:44 -07:00
defer func ( ) {
2021-09-18 16:31:35 -04:00
replStatus := string ( replicationStatus )
2022-10-24 19:35:07 +01:00
auditLogInternal ( context . Background ( ) , AuditLogOptions {
2022-07-12 10:43:32 -07:00
Event : dobj . EventType ,
2021-07-01 14:02:44 -07:00
APIName : ReplicateDeleteAPI ,
2022-10-24 19:35:07 +01:00
Bucket : bucket ,
Object : dobj . ObjectName ,
2021-07-01 14:02:44 -07:00
VersionID : versionID ,
Status : replStatus ,
} )
} ( )
2020-11-19 18:43:58 -08:00
rcfg , err := getReplicationConfig ( ctx , bucket )
if err != nil || rcfg == nil {
2023-08-08 13:27:40 -07:00
logger . LogOnceIf ( ctx , fmt . Errorf ( "unable to obtain replication config for bucket: %s: err: %s" , bucket , err ) , bucket )
2021-02-03 20:41:33 -08:00
sendEvent ( eventArgs {
BucketName : bucket ,
Object : ObjectInfo {
Bucket : bucket ,
Name : dobj . ObjectName ,
VersionID : versionID ,
DeleteMarker : dobj . DeleteMarker ,
} ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 20:41:33 -08:00
EventName : event . ObjectReplicationNotTracked ,
} )
2020-11-19 18:43:58 -08:00
return
}
2023-08-08 13:27:40 -07:00
dsc , err := parseReplicateDecision ( ctx , bucket , dobj . ReplicationState . ReplicateDecisionStr )
2021-09-18 16:31:35 -04:00
if err != nil {
2023-08-08 13:27:40 -07:00
logger . LogOnceIf ( ctx , fmt . Errorf ( "unable to parse replication decision parameters for bucket: %s, err: %s, decision: %s" ,
bucket , err , dobj . ReplicationState . ReplicateDecisionStr ) , dobj . ReplicationState . ReplicateDecisionStr )
2021-02-03 20:41:33 -08:00
sendEvent ( eventArgs {
BucketName : bucket ,
Object : ObjectInfo {
Bucket : bucket ,
Name : dobj . ObjectName ,
VersionID : versionID ,
DeleteMarker : dobj . DeleteMarker ,
} ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 20:41:33 -08:00
EventName : event . ObjectReplicationNotTracked ,
} )
2020-11-19 18:43:58 -08:00
return
}
2021-02-03 20:41:33 -08:00
2021-08-23 17:16:18 +02:00
// Lock the object name before starting replication operation.
// Use separate lock that doesn't collide with regular objects.
lk := objectAPI . NewNSLock ( bucket , "/[replicate]/" + dobj . ObjectName )
lkctx , err := lk . GetLock ( ctx , globalOperationTimeout )
if err != nil {
2022-08-22 16:53:06 -07:00
globalReplicationPool . queueMRFSave ( dobj . ToMRFEntry ( ) )
2021-08-23 17:16:18 +02:00
sendEvent ( eventArgs {
BucketName : bucket ,
Object : ObjectInfo {
Bucket : bucket ,
Name : dobj . ObjectName ,
VersionID : versionID ,
DeleteMarker : dobj . DeleteMarker ,
} ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-08-23 17:16:18 +02:00
EventName : event . ObjectReplicationNotTracked ,
} )
return
}
ctx = lkctx . Context ( )
2022-12-24 04:49:07 +01:00
defer lk . Unlock ( lkctx )
2020-11-19 18:43:58 -08:00
2021-09-18 16:31:35 -04:00
var wg sync . WaitGroup
var rinfos replicatedInfos
rinfos . Targets = make ( [ ] replicatedTargetInfo , len ( dsc . targetsMap ) )
idx := - 1
2023-08-08 13:27:40 -07:00
for _ , tgtEntry := range dsc . targetsMap {
2021-09-18 16:31:35 -04:00
idx ++
2023-08-08 13:27:40 -07:00
if tgtEntry . Tgt == nil {
2021-09-18 16:31:35 -04:00
continue
2020-11-19 18:43:58 -08:00
}
2023-08-08 13:27:40 -07:00
if ! tgtEntry . Replicate {
2021-09-18 16:31:35 -04:00
continue
2020-11-19 18:43:58 -08:00
}
2021-09-18 16:31:35 -04:00
// if dobj.TargetArn is not empty string, this is a case of specific target being re-synced.
2023-08-08 13:27:40 -07:00
if dobj . TargetArn != "" && dobj . TargetArn != tgtEntry . Arn {
2021-09-18 16:31:35 -04:00
continue
}
wg . Add ( 1 )
go func ( index int , tgt * TargetClient ) {
defer wg . Done ( )
2023-08-08 13:27:40 -07:00
rinfos . Targets [ index ] = replicateDeleteToTarget ( ctx , dobj , tgt )
} ( idx , tgtEntry . Tgt )
2020-11-19 18:43:58 -08:00
}
2021-09-18 16:31:35 -04:00
wg . Wait ( )
replicationStatus = rinfos . ReplicationStatus ( )
prevStatus := dobj . DeleteMarkerReplicationStatus ( )
2021-04-03 09:03:42 -07:00
if dobj . VersionID != "" {
2021-09-18 16:31:35 -04:00
prevStatus = replication . StatusType ( dobj . VersionPurgeStatus ( ) )
replicationStatus = replication . StatusType ( rinfos . VersionPurgeStatus ( ) )
2021-04-03 09:03:42 -07:00
}
2021-09-18 16:31:35 -04:00
2021-04-04 15:34:33 -07:00
// to decrement pending count later.
2021-09-18 16:31:35 -04:00
for _ , rinfo := range rinfos . Targets {
if rinfo . ReplicationStatus != rinfo . PrevReplicationStatus {
2021-11-17 21:10:57 +01:00
globalReplicationStats . Update ( dobj . Bucket , rinfo . Arn , 0 , 0 , replicationStatus ,
2021-09-18 16:31:35 -04:00
prevStatus , replication . DeleteReplicationType )
}
}
2021-01-25 14:04:41 -08:00
2022-01-02 09:15:06 -08:00
eventName := event . ObjectReplicationComplete
2021-09-18 16:31:35 -04:00
if replicationStatus == replication . Failed {
2020-11-21 23:48:50 -08:00
eventName = event . ObjectReplicationFailed
2022-08-22 16:53:06 -07:00
globalReplicationPool . queueMRFSave ( dobj . ToMRFEntry ( ) )
2020-11-21 23:48:50 -08:00
}
2021-09-18 16:31:35 -04:00
drs := getReplicationState ( rinfos , dobj . ReplicationState , dobj . VersionID )
2022-07-21 11:05:44 -07:00
if replicationStatus != prevStatus {
drs . ReplicationTimeStamp = UTCNow ( )
}
2021-02-03 20:41:33 -08:00
dobjInfo , err := objectAPI . DeleteObject ( ctx , bucket , dobj . ObjectName , ObjectOptions {
2021-09-18 16:31:35 -04:00
VersionID : versionID ,
MTime : dobj . DeleteMarkerMTime . Time ,
DeleteReplication : drs ,
2022-05-06 19:05:28 -07:00
Versioned : globalBucketVersioningSys . PrefixEnabled ( bucket , dobj . ObjectName ) ,
2022-08-24 13:46:29 -07:00
// Objects matching prefixes should not leave delete markers,
// dramatically reduces namespace pollution while keeping the
// benefits of replication, make sure to apply version suspension
// only at bucket level instead.
VersionSuspended : globalBucketVersioningSys . Suspended ( bucket ) ,
2021-01-25 14:04:41 -08:00
} )
2021-02-09 15:11:43 -08:00
if err != nil && ! isErrVersionNotFound ( err ) { // VersionNotFound would be reported by pool that object version is missing on.
2021-02-03 20:41:33 -08:00
sendEvent ( eventArgs {
BucketName : bucket ,
Object : ObjectInfo {
Bucket : bucket ,
Name : dobj . ObjectName ,
VersionID : versionID ,
DeleteMarker : dobj . DeleteMarker ,
} ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 20:41:33 -08:00
EventName : eventName ,
} )
} else {
sendEvent ( eventArgs {
BucketName : bucket ,
Object : dobjInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 20:41:33 -08:00
EventName : eventName ,
} )
2020-11-19 18:43:58 -08:00
}
}
2022-03-16 16:59:43 -07:00
func replicateDeleteToTarget ( ctx context . Context , dobj DeletedObjectReplicationInfo , tgt * TargetClient ) ( rinfo replicatedTargetInfo ) {
2021-09-18 16:31:35 -04:00
versionID := dobj . DeleteMarkerVersionID
if versionID == "" {
versionID = dobj . VersionID
}
rinfo = dobj . ReplicationState . targetState ( tgt . ARN )
rinfo . OpType = dobj . OpType
defer func ( ) {
if rinfo . ReplicationStatus == replication . Completed && tgt . ResetID != "" && dobj . OpType == replication . ExistingObjectReplicationType {
rinfo . ResyncTimestamp = fmt . Sprintf ( "%s;%s" , UTCNow ( ) . Format ( http . TimeFormat ) , tgt . ResetID )
}
} ( )
if dobj . VersionID == "" && rinfo . PrevReplicationStatus == replication . Completed && dobj . OpType != replication . ExistingObjectReplicationType {
rinfo . ReplicationStatus = rinfo . PrevReplicationStatus
return
}
if dobj . VersionID != "" && rinfo . VersionPurgeStatus == Complete {
return
}
2022-08-16 17:46:22 -07:00
if globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
2023-07-11 23:17:45 +03:00
logger . LogOnceIf ( ctx , fmt . Errorf ( "remote target is offline for bucket:%s arn:%s" , dobj . Bucket , tgt . ARN ) , "replication-target-offline-delete-" + tgt . ARN )
2021-09-18 16:31:35 -04:00
sendEvent ( eventArgs {
BucketName : dobj . Bucket ,
Object : ObjectInfo {
Bucket : dobj . Bucket ,
Name : dobj . ObjectName ,
VersionID : dobj . VersionID ,
DeleteMarker : dobj . DeleteMarker ,
} ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-09-18 16:31:35 -04:00
EventName : event . ObjectReplicationNotTracked ,
} )
if dobj . VersionID == "" {
rinfo . ReplicationStatus = replication . Failed
} else {
rinfo . VersionPurgeStatus = Failed
}
return
}
2021-12-16 15:34:55 -08:00
// early return if already replicated delete marker for existing object replication/ healing delete markers
2022-10-13 16:43:36 -07:00
if dobj . DeleteMarkerVersionID != "" {
2022-12-05 20:18:50 +01:00
toi , err := tgt . StatObject ( ctx , tgt . Bucket , dobj . ObjectName , minio . StatObjectOptions {
2021-09-18 16:31:35 -04:00
VersionID : versionID ,
2022-12-05 20:18:50 +01:00
Internal : minio . AdvancedGetOptions {
2022-10-21 14:45:06 -07:00
ReplicationProxyRequest : "false" ,
IsReplicationReadyForDeleteMarker : true ,
2022-01-02 09:15:06 -08:00
} ,
2022-10-13 16:43:36 -07:00
} )
2023-07-10 10:57:56 -04:00
switch {
case isErrMethodNotAllowed ( ErrorRespToObjectError ( err , dobj . Bucket , dobj . ObjectName ) ) :
// delete marker already replicated
if dobj . VersionID == "" && rinfo . VersionPurgeStatus . Empty ( ) {
2021-09-18 16:31:35 -04:00
rinfo . ReplicationStatus = replication . Completed
2021-12-16 15:34:55 -08:00
return
2021-09-18 16:31:35 -04:00
}
2023-07-10 10:57:56 -04:00
case isErrObjectNotFound ( ErrorRespToObjectError ( err , dobj . Bucket , dobj . ObjectName ) ) :
// version being purged is already not found on target.
if ! rinfo . VersionPurgeStatus . Empty ( ) {
rinfo . VersionPurgeStatus = Complete
return
}
default :
2023-03-08 07:03:29 -08:00
// mark delete marker replication as failed if target cluster not ready to receive
// this request yet (object version not replicated yet)
if err != nil && ! toi . ReplicationReady {
rinfo . ReplicationStatus = replication . Failed
return
}
2022-10-13 16:43:36 -07:00
}
2021-09-18 16:31:35 -04:00
}
2022-12-05 20:18:50 +01:00
rmErr := tgt . RemoveObject ( ctx , tgt . Bucket , dobj . ObjectName , minio . RemoveObjectOptions {
2021-09-18 16:31:35 -04:00
VersionID : versionID ,
2022-12-05 20:18:50 +01:00
Internal : minio . AdvancedRemoveOptions {
2021-09-18 16:31:35 -04:00
ReplicationDeleteMarker : dobj . DeleteMarkerVersionID != "" ,
ReplicationMTime : dobj . DeleteMarkerMTime . Time ,
2022-12-05 20:18:50 +01:00
ReplicationStatus : minio . ReplicationStatusReplica ,
2021-09-18 16:31:35 -04:00
ReplicationRequest : true , // always set this to distinguish between `mc mirror` replication and serverside
} ,
} )
if rmErr != nil {
if dobj . VersionID == "" {
rinfo . ReplicationStatus = replication . Failed
} else {
rinfo . VersionPurgeStatus = Failed
}
logger . LogIf ( ctx , fmt . Errorf ( "Unable to replicate delete marker to %s/%s(%s): %s" , tgt . Bucket , dobj . ObjectName , versionID , rmErr ) )
} else {
if dobj . VersionID == "" {
rinfo . ReplicationStatus = replication . Completed
} else {
rinfo . VersionPurgeStatus = Complete
}
}
return
}
func getCopyObjMetadata ( oi ObjectInfo , sc string ) map [ string ] string {
2020-11-19 11:50:22 -08:00
meta := make ( map [ string ] string , len ( oi . UserDefined ) )
for k , v := range oi . UserDefined {
2023-07-06 16:02:08 -07:00
if stringsHasPrefixFold ( k , ReservedMetadataPrefixLower ) {
2020-11-19 11:50:22 -08:00
continue
}
2021-02-03 20:41:33 -08:00
if equals ( k , xhttp . AmzBucketReplicationStatus ) {
continue
}
// https://github.com/google/security-research/security/advisories/GHSA-76wf-9vgp-pj7w
if equals ( k , xhttp . AmzMetaUnencryptedContentLength , xhttp . AmzMetaUnencryptedContentMD5 ) {
2020-11-19 11:50:22 -08:00
continue
}
meta [ k ] = v
}
2021-02-03 20:41:33 -08:00
2020-11-19 11:50:22 -08:00
if oi . ContentEncoding != "" {
meta [ xhttp . ContentEncoding ] = oi . ContentEncoding
}
2021-02-03 20:41:33 -08:00
2020-11-19 11:50:22 -08:00
if oi . ContentType != "" {
meta [ xhttp . ContentType ] = oi . ContentType
}
2021-02-03 20:41:33 -08:00
if oi . UserTags != "" {
meta [ xhttp . AmzObjectTagging ] = oi . UserTags
2020-11-19 11:50:22 -08:00
meta [ xhttp . AmzTagDirective ] = "REPLACE"
}
2021-02-03 20:41:33 -08:00
2020-11-19 11:50:22 -08:00
if sc == "" {
sc = oi . StorageClass
}
2021-04-19 10:30:42 -07:00
// drop non standard storage classes for tiering from replication
if sc != "" && ( sc == storageclass . RRS || sc == storageclass . STANDARD ) {
2021-02-03 20:41:33 -08:00
meta [ xhttp . AmzStorageClass ] = sc
2020-11-19 11:50:22 -08:00
}
2021-04-19 10:30:42 -07:00
2020-11-19 11:50:22 -08:00
meta [ xhttp . MinIOSourceETag ] = oi . ETag
2022-10-27 18:46:52 +02:00
meta [ xhttp . MinIOSourceMTime ] = oi . ModTime . UTC ( ) . Format ( time . RFC3339Nano )
2020-11-19 11:50:22 -08:00
meta [ xhttp . AmzBucketReplicationStatus ] = replication . Replica . String ( )
return meta
}
2021-02-08 18:12:28 -08:00
type caseInsensitiveMap map [ string ] string
// Lookup map entry case insensitively.
func ( m caseInsensitiveMap ) Lookup ( key string ) ( string , bool ) {
if len ( m ) == 0 {
return "" , false
}
for _ , k := range [ ] string {
key ,
strings . ToLower ( key ) ,
http . CanonicalHeaderKey ( key ) ,
} {
v , ok := m [ k ]
if ok {
return v , ok
}
2021-02-08 16:19:05 -08:00
}
2021-02-08 18:12:28 -08:00
return "" , false
2021-02-08 16:19:05 -08:00
}
2022-12-05 20:18:50 +01:00
func putReplicationOpts ( ctx context . Context , sc string , objInfo ObjectInfo ) ( putOpts minio . PutObjectOptions , err error ) {
2020-07-21 17:49:56 -07:00
meta := make ( map [ string ] string )
for k , v := range objInfo . UserDefined {
2023-07-06 16:02:08 -07:00
if stringsHasPrefixFold ( k , ReservedMetadataPrefixLower ) {
2020-07-21 17:49:56 -07:00
continue
}
2021-01-11 22:36:51 -08:00
if isStandardHeader ( k ) {
2020-08-12 17:32:24 -07:00
continue
}
2020-07-21 17:49:56 -07:00
meta [ k ] = v
}
2021-02-03 20:41:33 -08:00
2021-04-19 10:30:42 -07:00
if sc == "" && ( objInfo . StorageClass == storageclass . STANDARD || objInfo . StorageClass == storageclass . RRS ) {
2020-08-05 20:01:20 -07:00
sc = objInfo . StorageClass
}
2022-12-05 20:18:50 +01:00
putOpts = minio . PutObjectOptions {
2020-10-06 08:37:09 -07:00
UserMetadata : meta ,
ContentType : objInfo . ContentType ,
ContentEncoding : objInfo . ContentEncoding ,
StorageClass : sc ,
2022-12-05 20:18:50 +01:00
Internal : minio . AdvancedPutOptions {
2021-03-03 11:13:31 -08:00
SourceVersionID : objInfo . VersionID ,
2022-12-05 20:18:50 +01:00
ReplicationStatus : minio . ReplicationStatusReplica ,
2021-03-03 11:13:31 -08:00
SourceMTime : objInfo . ModTime ,
SourceETag : objInfo . ETag ,
ReplicationRequest : true , // always set this to distinguish between `mc mirror` replication and serverside
2020-10-06 08:37:09 -07:00
} ,
2020-07-21 17:49:56 -07:00
}
2021-02-03 20:41:33 -08:00
if objInfo . UserTags != "" {
tag , _ := tags . ParseObjectTags ( objInfo . UserTags )
if tag != nil {
putOpts . UserTags = tag . ToMap ( )
2021-09-18 16:31:35 -04:00
// set tag timestamp in opts
tagTimestamp := objInfo . ModTime
if tagTmstampStr , ok := objInfo . UserDefined [ ReservedMetadataPrefixLower + TaggingTimestamp ] ; ok {
tagTimestamp , err = time . Parse ( time . RFC3339Nano , tagTmstampStr )
if err != nil {
return putOpts , err
}
}
putOpts . Internal . TaggingTimestamp = tagTimestamp
2021-02-03 20:41:33 -08:00
}
}
2021-02-08 18:12:28 -08:00
lkMap := caseInsensitiveMap ( objInfo . UserDefined )
if lang , ok := lkMap . Lookup ( xhttp . ContentLanguage ) ; ok {
2021-01-27 11:22:34 -08:00
putOpts . ContentLanguage = lang
}
2021-02-08 18:12:28 -08:00
if disp , ok := lkMap . Lookup ( xhttp . ContentDisposition ) ; ok {
2021-01-27 11:22:34 -08:00
putOpts . ContentDisposition = disp
}
2021-02-08 18:12:28 -08:00
if cc , ok := lkMap . Lookup ( xhttp . CacheControl ) ; ok {
2021-01-27 11:22:34 -08:00
putOpts . CacheControl = cc
}
2021-02-08 18:12:28 -08:00
if mode , ok := lkMap . Lookup ( xhttp . AmzObjectLockMode ) ; ok {
2022-12-05 20:18:50 +01:00
rmode := minio . RetentionMode ( mode )
2020-07-21 17:49:56 -07:00
putOpts . Mode = rmode
}
2021-02-08 18:12:28 -08:00
if retainDateStr , ok := lkMap . Lookup ( xhttp . AmzObjectLockRetainUntilDate ) ; ok {
2022-12-12 10:28:30 -08:00
rdate , err := amztime . ISO8601Parse ( retainDateStr )
2020-07-21 17:49:56 -07:00
if err != nil {
2022-12-12 10:28:30 -08:00
return putOpts , err
2020-07-21 17:49:56 -07:00
}
putOpts . RetainUntilDate = rdate
2021-09-18 16:31:35 -04:00
// set retention timestamp in opts
retTimestamp := objInfo . ModTime
if retainTmstampStr , ok := objInfo . UserDefined [ ReservedMetadataPrefixLower + ObjectLockRetentionTimestamp ] ; ok {
retTimestamp , err = time . Parse ( time . RFC3339Nano , retainTmstampStr )
if err != nil {
return putOpts , err
}
}
putOpts . Internal . RetentionTimestamp = retTimestamp
2020-07-21 17:49:56 -07:00
}
2021-02-08 18:12:28 -08:00
if lhold , ok := lkMap . Lookup ( xhttp . AmzObjectLockLegalHold ) ; ok {
2022-12-05 20:18:50 +01:00
putOpts . LegalHold = minio . LegalHoldStatus ( lhold )
2021-09-18 16:31:35 -04:00
// set legalhold timestamp in opts
lholdTimestamp := objInfo . ModTime
if lholdTmstampStr , ok := objInfo . UserDefined [ ReservedMetadataPrefixLower + ObjectLockLegalHoldTimestamp ] ; ok {
lholdTimestamp , err = time . Parse ( time . RFC3339Nano , lholdTmstampStr )
if err != nil {
return putOpts , err
}
}
putOpts . Internal . LegalholdTimestamp = lholdTimestamp
2020-07-21 17:49:56 -07:00
}
if crypto . S3 . IsEncrypted ( objInfo . UserDefined ) {
putOpts . ServerSideEncryption = encrypt . NewSSE ( )
}
return
}
2020-11-19 11:50:22 -08:00
type replicationAction string
const (
replicateMetadata replicationAction = "metadata"
replicateNone replicationAction = "none"
replicateAll replicationAction = "all"
)
2021-02-03 20:41:33 -08:00
// matches k1 with all keys, returns 'true' if one of them matches
func equals ( k1 string , keys ... string ) bool {
for _ , k2 := range keys {
2021-11-18 12:15:22 -08:00
if strings . EqualFold ( k1 , k2 ) {
2021-02-03 20:41:33 -08:00
return true
}
}
return false
}
2020-11-19 11:50:22 -08:00
// returns replicationAction by comparing metadata between source and target
2021-09-28 13:26:12 -04:00
func getReplicationAction ( oi1 ObjectInfo , oi2 minio . ObjectInfo , opType replication . Type ) replicationAction {
// Avoid resyncing null versions created prior to enabling replication if target has a newer copy
if opType == replication . ExistingObjectReplicationType &&
oi1 . ModTime . Unix ( ) > oi2 . LastModified . Unix ( ) && oi1 . VersionID == nullVersionID {
return replicateNone
}
2023-03-07 07:43:38 -08:00
sz , _ := oi1 . GetActualSize ( )
2020-11-19 11:50:22 -08:00
// needs full replication
if oi1 . ETag != oi2 . ETag ||
oi1 . VersionID != oi2 . VersionID ||
2023-03-07 07:43:38 -08:00
sz != oi2 . Size ||
2021-01-27 11:22:34 -08:00
oi1 . DeleteMarker != oi2 . IsDeleteMarker ||
2021-02-03 20:41:33 -08:00
oi1 . ModTime . Unix ( ) != oi2 . LastModified . Unix ( ) {
2020-11-19 11:50:22 -08:00
return replicateAll
}
2021-02-03 20:41:33 -08:00
2021-01-27 11:22:34 -08:00
if oi1 . ContentType != oi2 . ContentType {
2020-11-19 11:50:22 -08:00
return replicateMetadata
}
2021-02-03 20:41:33 -08:00
2020-11-19 11:50:22 -08:00
if oi1 . ContentEncoding != "" {
2021-01-27 11:22:34 -08:00
enc , ok := oi2 . Metadata [ xhttp . ContentEncoding ]
2021-02-03 20:41:33 -08:00
if ! ok {
enc , ok = oi2 . Metadata [ strings . ToLower ( xhttp . ContentEncoding ) ]
if ! ok {
return replicateMetadata
}
}
if strings . Join ( enc , "," ) != oi1 . ContentEncoding {
2020-11-19 11:50:22 -08:00
return replicateMetadata
}
}
2021-02-03 20:41:33 -08:00
t , _ := tags . ParseObjectTags ( oi1 . UserTags )
2022-01-10 19:06:10 -08:00
if ! reflect . DeepEqual ( oi2 . UserTags , t . ToMap ( ) ) || ( oi2 . UserTagCount != len ( t . ToMap ( ) ) ) {
2020-11-19 11:50:22 -08:00
return replicateMetadata
}
2021-02-03 20:41:33 -08:00
// Compare only necessary headers
compareKeys := [ ] string {
"Expires" ,
"Cache-Control" ,
"Content-Language" ,
"Content-Disposition" ,
"X-Amz-Object-Lock-Mode" ,
"X-Amz-Object-Lock-Retain-Until-Date" ,
"X-Amz-Object-Lock-Legal-Hold" ,
"X-Amz-Website-Redirect-Location" ,
"X-Amz-Meta-" ,
}
// compare metadata on both maps to see if meta is identical
compareMeta1 := make ( map [ string ] string )
for k , v := range oi1 . UserDefined {
var found bool
for _ , prefix := range compareKeys {
2023-07-06 16:02:08 -07:00
if ! stringsHasPrefixFold ( k , prefix ) {
2021-02-03 20:41:33 -08:00
continue
}
found = true
break
}
if found {
compareMeta1 [ strings . ToLower ( k ) ] = v
2021-01-27 11:22:34 -08:00
}
}
2021-02-03 20:41:33 -08:00
compareMeta2 := make ( map [ string ] string )
for k , v := range oi2 . Metadata {
var found bool
for _ , prefix := range compareKeys {
2023-07-06 16:02:08 -07:00
if ! stringsHasPrefixFold ( k , prefix ) {
2021-02-03 20:41:33 -08:00
continue
}
found = true
break
2021-01-27 11:22:34 -08:00
}
2021-02-03 20:41:33 -08:00
if found {
compareMeta2 [ strings . ToLower ( k ) ] = strings . Join ( v , "," )
2020-11-19 11:50:22 -08:00
}
}
2021-02-03 20:41:33 -08:00
if ! reflect . DeepEqual ( compareMeta1 , compareMeta2 ) {
2020-11-19 11:50:22 -08:00
return replicateMetadata
}
2021-02-03 20:41:33 -08:00
2020-11-19 11:50:22 -08:00
return replicateNone
}
2020-07-21 17:49:56 -07:00
// replicateObject replicates the specified version of the object to destination bucket
// The source object is then updated to reflect the replication status.
2022-07-12 10:43:32 -07:00
func replicateObject ( ctx context . Context , ri ReplicateObjectInfo , objectAPI ObjectLayer ) {
2021-07-01 14:02:44 -07:00
var replicationStatus replication . StatusType
defer func ( ) {
if replicationStatus . Empty ( ) {
// replication status is empty means
// replication was not attempted for some
// reason, notify the state of the object
// on disk.
replicationStatus = ri . ReplicationStatus
}
2022-10-24 19:35:07 +01:00
auditLogInternal ( ctx , AuditLogOptions {
2022-07-12 10:43:32 -07:00
Event : ri . EventType ,
2021-07-01 14:02:44 -07:00
APIName : ReplicateObjectAPI ,
2022-10-24 19:35:07 +01:00
Bucket : ri . Bucket ,
Object : ri . Name ,
2021-07-01 14:02:44 -07:00
VersionID : ri . VersionID ,
Status : replicationStatus . String ( ) ,
} )
} ( )
2021-04-15 16:32:00 -07:00
objInfo := ri . ObjectInfo
2020-09-16 16:04:55 -07:00
bucket := objInfo . Bucket
object := objInfo . Name
2020-07-30 19:55:22 -07:00
cfg , err := getReplicationConfig ( ctx , bucket )
2020-07-21 17:49:56 -07:00
if err != nil {
2023-07-11 23:17:45 +03:00
logger . LogOnceIf ( ctx , err , "get-replication-config-" + bucket )
2021-02-03 20:41:33 -08:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 20:41:33 -08:00
} )
2020-07-21 17:49:56 -07:00
return
}
2021-09-18 16:31:35 -04:00
tgtArns := cfg . FilterTargetArns ( replication . ObjectOpts {
2022-01-19 10:45:42 -08:00
Name : object ,
SSEC : crypto . SSEC . IsEncrypted ( objInfo . UserDefined ) ,
UserTags : objInfo . UserTags ,
2021-09-18 16:31:35 -04:00
} )
// Lock the object name before starting replication.
// Use separate lock that doesn't collide with regular objects.
lk := objectAPI . NewNSLock ( bucket , "/[replicate]/" + object )
lkctx , err := lk . GetLock ( ctx , globalOperationTimeout )
if err != nil {
2021-02-03 20:41:33 -08:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 20:41:33 -08:00
} )
2022-08-22 16:53:06 -07:00
globalReplicationPool . queueMRFSave ( ri . ToMRFEntry ( ) )
2020-07-21 17:49:56 -07:00
return
}
2021-09-18 16:31:35 -04:00
ctx = lkctx . Context ( )
2022-12-24 04:49:07 +01:00
defer lk . Unlock ( lkctx )
2021-09-18 16:31:35 -04:00
var wg sync . WaitGroup
var rinfos replicatedInfos
rinfos . Targets = make ( [ ] replicatedTargetInfo , len ( tgtArns ) )
for i , tgtArn := range tgtArns {
tgt := globalBucketTargetSys . GetRemoteTargetClient ( ctx , tgtArn )
if tgt == nil {
2023-08-08 13:27:40 -07:00
logger . LogOnceIf ( ctx , fmt . Errorf ( "failed to get target for bucket:%s arn:%s" , bucket , tgtArn ) , tgtArn )
2021-09-18 16:31:35 -04:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-09-18 16:31:35 -04:00
} )
continue
}
wg . Add ( 1 )
go func ( index int , tgt * TargetClient ) {
defer wg . Done ( )
2022-09-14 18:44:04 -07:00
if ri . OpType == replication . ObjectReplicationType {
// all incoming calls go through optimized path.
rinfos . Targets [ index ] = ri . replicateObject ( ctx , objectAPI , tgt )
} else {
rinfos . Targets [ index ] = ri . replicateAll ( ctx , objectAPI , tgt )
}
2021-09-18 16:31:35 -04:00
} ( i , tgt )
}
wg . Wait ( )
2023-05-16 15:35:08 -07:00
replicationStatus = rinfos . ReplicationStatus ( ) // used in defer function
2021-09-18 16:31:35 -04:00
// FIXME: add support for missing replication events
// - event.ObjectReplicationMissedThreshold
// - event.ObjectReplicationReplicatedAfterThreshold
2022-01-02 09:15:06 -08:00
eventName := event . ObjectReplicationComplete
2023-05-16 15:35:08 -07:00
if replicationStatus == replication . Failed {
2021-09-18 16:31:35 -04:00
eventName = event . ObjectReplicationFailed
}
newReplStatusInternal := rinfos . ReplicationStatusInternal ( )
// Note that internal replication status(es) may match for previously replicated objects - in such cases
// metadata should be updated with last resync timestamp.
if objInfo . ReplicationStatusInternal != newReplStatusInternal || rinfos . ReplicationResynced ( ) {
popts := ObjectOptions {
2021-10-30 08:22:04 -07:00
MTime : objInfo . ModTime ,
VersionID : objInfo . VersionID ,
2023-07-10 10:57:56 -04:00
EvalMetadataFn : func ( oi * ObjectInfo , gerr error ) ( dsc ReplicateDecision , err error ) {
2021-10-30 08:22:04 -07:00
oi . UserDefined [ ReservedMetadataPrefixLower + ReplicationStatus ] = newReplStatusInternal
oi . UserDefined [ ReservedMetadataPrefixLower + ReplicationTimestamp ] = UTCNow ( ) . Format ( time . RFC3339Nano )
oi . UserDefined [ xhttp . AmzBucketReplicationStatus ] = string ( rinfos . ReplicationStatus ( ) )
for _ , rinfo := range rinfos . Targets {
if rinfo . ResyncTimestamp != "" {
oi . UserDefined [ targetResetHeader ( rinfo . Arn ) ] = rinfo . ResyncTimestamp
}
}
if objInfo . UserTags != "" {
oi . UserDefined [ xhttp . AmzObjectTagging ] = objInfo . UserTags
}
2023-07-10 10:57:56 -04:00
return dsc , nil
2021-10-30 08:22:04 -07:00
} ,
2021-09-18 16:31:35 -04:00
}
2021-10-30 08:22:04 -07:00
2023-03-31 17:37:29 +01:00
_ , _ = objectAPI . PutObjectMetadata ( ctx , bucket , object , popts )
2021-09-18 16:31:35 -04:00
opType := replication . MetadataReplicationType
if rinfos . Action ( ) == replicateAll {
opType = replication . ObjectReplicationType
}
for _ , rinfo := range rinfos . Targets {
if rinfo . ReplicationStatus != rinfo . PrevReplicationStatus {
2021-11-17 21:10:57 +01:00
globalReplicationStats . Update ( bucket , rinfo . Arn , rinfo . Size , rinfo . Duration , rinfo . ReplicationStatus , rinfo . PrevReplicationStatus , opType )
2021-09-18 16:31:35 -04:00
}
}
}
sendEvent ( eventArgs {
EventName : eventName ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-09-18 16:31:35 -04:00
} )
// re-queue failures once more - keep a retry count to avoid flooding the queue if
// the target site is down. Leave it to scanner to catch up instead.
2022-08-22 16:53:06 -07:00
if rinfos . ReplicationStatus ( ) != replication . Completed {
2021-09-18 16:31:35 -04:00
ri . OpType = replication . HealReplicationType
2022-07-12 10:43:32 -07:00
ri . EventType = ReplicateMRF
2021-09-18 16:31:35 -04:00
ri . ReplicationStatusInternal = rinfos . ReplicationStatusInternal ( )
ri . RetryCount ++
2022-08-22 16:53:06 -07:00
globalReplicationPool . queueMRFSave ( ri . ToMRFEntry ( ) )
2021-09-18 16:31:35 -04:00
}
}
2022-09-14 18:44:04 -07:00
// replicateObject replicates object data for specified version of the object to destination bucket
2021-09-18 16:31:35 -04:00
// The source object is then updated to reflect the replication status.
2022-09-14 18:44:04 -07:00
func ( ri ReplicateObjectInfo ) replicateObject ( ctx context . Context , objectAPI ObjectLayer , tgt * TargetClient ) ( rinfo replicatedTargetInfo ) {
2021-11-17 21:10:57 +01:00
startTime := time . Now ( )
2021-09-22 13:48:45 -04:00
objInfo := ri . ObjectInfo . Clone ( )
2021-09-18 16:31:35 -04:00
bucket := objInfo . Bucket
object := objInfo . Name
sz , _ := objInfo . GetActualSize ( )
2022-09-14 18:44:04 -07:00
rAction := replicateAll
2021-09-18 16:31:35 -04:00
rinfo = replicatedTargetInfo {
Size : sz ,
Arn : tgt . ARN ,
PrevReplicationStatus : objInfo . TargetReplicationStatus ( tgt . ARN ) ,
ReplicationStatus : replication . Failed ,
OpType : ri . OpType ,
ReplicationAction : rAction ,
}
2022-02-10 10:16:52 -08:00
2021-09-18 16:31:35 -04:00
if ri . ObjectInfo . TargetReplicationStatus ( tgt . ARN ) == replication . Completed && ! ri . ExistingObjResync . Empty ( ) && ! ri . ExistingObjResync . mustResyncTarget ( tgt . ARN ) {
rinfo . ReplicationStatus = replication . Completed
rinfo . ReplicationResynced = true
2021-09-08 18:34:50 -04:00
return
}
2022-09-14 18:44:04 -07:00
2022-08-16 17:46:22 -07:00
if globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
2023-07-13 02:51:33 -04:00
logger . LogOnceIf ( ctx , fmt . Errorf ( "remote target is offline for bucket:%s arn:%s retry:%d" , bucket , tgt . ARN , ri . RetryCount ) , "replication-target-offline" + tgt . ARN )
2021-08-23 17:16:18 +02:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-08-23 17:16:18 +02:00
} )
return
}
2022-06-06 15:14:56 -07:00
versioned := globalBucketVersioningSys . PrefixEnabled ( bucket , object )
versionSuspended := globalBucketVersioningSys . PrefixSuspended ( bucket , object )
2023-04-17 12:16:37 -07:00
gr , err := objectAPI . GetObjectNInfo ( ctx , bucket , object , nil , http . Header { } , ObjectOptions {
2022-06-06 15:14:56 -07:00
VersionID : objInfo . VersionID ,
Versioned : versioned ,
VersionSuspended : versionSuspended ,
2020-09-15 20:44:48 -07:00
} )
2020-07-21 17:49:56 -07:00
if err != nil {
2022-10-08 00:11:41 +01:00
if ! isErrVersionNotFound ( err ) && ! isErrObjectNotFound ( err ) {
2022-08-19 16:21:05 -07:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2022-08-19 16:21:05 -07:00
} )
2023-08-08 13:27:40 -07:00
logger . LogOnceIf ( ctx , fmt . Errorf ( "unable to read source object %s/%s(%s): %w" , bucket , object , objInfo . VersionID , err ) , object + ":" + objInfo . VersionID )
2022-08-19 16:21:05 -07:00
}
2020-07-21 17:49:56 -07:00
return
}
2022-09-14 18:44:04 -07:00
defer gr . Close ( )
objInfo = gr . ObjInfo
2023-06-01 18:52:55 -07:00
// make sure we have the latest metadata for metrics calculation
rinfo . PrevReplicationStatus = objInfo . TargetReplicationStatus ( tgt . ARN )
2022-09-14 18:44:04 -07:00
size , err := objInfo . GetActualSize ( )
if err != nil {
logger . LogIf ( ctx , err )
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2022-09-14 18:44:04 -07:00
} )
return
}
if tgt . Bucket == "" {
2023-06-01 18:52:55 -07:00
logger . LogIf ( ctx , fmt . Errorf ( "unable to replicate object %s(%s), bucket is empty" , objInfo . Name , objInfo . VersionID ) )
2022-09-14 18:44:04 -07:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2022-09-14 18:44:04 -07:00
} )
return rinfo
}
2021-06-28 23:58:08 -07:00
defer func ( ) {
2022-09-14 18:44:04 -07:00
if rinfo . ReplicationStatus == replication . Completed && ri . OpType == replication . ExistingObjectReplicationType && tgt . ResetID != "" {
rinfo . ResyncTimestamp = fmt . Sprintf ( "%s;%s" , UTCNow ( ) . Format ( http . TimeFormat ) , tgt . ResetID )
rinfo . ReplicationResynced = true
2021-06-28 23:58:08 -07:00
}
2022-09-14 18:44:04 -07:00
rinfo . Duration = time . Since ( startTime )
2021-06-28 23:58:08 -07:00
} ( )
2022-09-14 18:44:04 -07:00
rinfo . ReplicationStatus = replication . Completed
rinfo . Size = size
rinfo . ReplicationAction = rAction
// use core client to avoid doing multipart on PUT
2022-12-05 20:18:50 +01:00
c := & minio . Core { Client : tgt . Client }
2022-09-14 18:44:04 -07:00
putOpts , err := putReplicationOpts ( ctx , tgt . StorageClass , objInfo )
if err != nil {
logger . LogIf ( ctx , fmt . Errorf ( "failed to get target for replication bucket:%s err:%w" , bucket , err ) )
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2022-09-14 18:44:04 -07:00
} )
return
}
var headerSize int
for k , v := range putOpts . Header ( ) {
headerSize += len ( k ) + len ( v )
}
opts := & bandwidth . MonitorReaderOptions {
Bucket : objInfo . Bucket ,
2023-01-19 05:22:16 -08:00
TargetARN : tgt . ARN ,
2022-09-14 18:44:04 -07:00
HeaderSize : headerSize ,
}
newCtx := ctx
2023-01-19 05:22:16 -08:00
if globalBucketMonitor . IsThrottled ( bucket , tgt . ARN ) {
2022-09-14 18:44:04 -07:00
var cancel context . CancelFunc
newCtx , cancel = context . WithTimeout ( ctx , throttleDeadline )
defer cancel ( )
}
r := bandwidth . NewMonitoredReader ( newCtx , globalBucketMonitor , gr , opts )
if objInfo . isMultipart ( ) {
if err := replicateObjectWithMultipart ( ctx , c , tgt . Bucket , object ,
r , objInfo , putOpts ) ; err != nil {
2023-02-02 05:52:02 -08:00
if minio . ToErrorResponse ( err ) . Code != "PreconditionFailed" {
2022-09-14 18:44:04 -07:00
rinfo . ReplicationStatus = replication . Failed
2023-06-01 18:52:55 -07:00
logger . LogIf ( ctx , fmt . Errorf ( "unable to replicate for object %s/%s(%s): %s" , bucket , objInfo . Name , objInfo . VersionID , err ) )
2022-09-14 18:44:04 -07:00
}
}
} else {
if _ , err = c . PutObject ( ctx , tgt . Bucket , object , r , size , "" , "" , putOpts ) ; err != nil {
2023-02-02 05:52:02 -08:00
if minio . ToErrorResponse ( err ) . Code != "PreconditionFailed" {
2022-09-14 18:44:04 -07:00
rinfo . ReplicationStatus = replication . Failed
2023-06-01 18:52:55 -07:00
logger . LogIf ( ctx , fmt . Errorf ( "unable to replicate for object %s/%s(%s): %s" , bucket , objInfo . Name , objInfo . VersionID , err ) )
2022-09-14 18:44:04 -07:00
}
}
}
return
}
// replicateAll replicates metadata for specified version of the object to destination bucket
// if the destination version is missing it automatically does fully copy as well.
// The source object is then updated to reflect the replication status.
func ( ri ReplicateObjectInfo ) replicateAll ( ctx context . Context , objectAPI ObjectLayer , tgt * TargetClient ) ( rinfo replicatedTargetInfo ) {
startTime := time . Now ( )
objInfo := ri . ObjectInfo . Clone ( )
bucket := objInfo . Bucket
object := objInfo . Name
sz , _ := objInfo . GetActualSize ( )
// set defaults for replication action based on operation being performed - actual
// replication action can only be determined after stat on remote. This default is
// needed for updating replication metrics correctly when target is offline.
rAction := replicateMetadata
rinfo = replicatedTargetInfo {
Size : sz ,
Arn : tgt . ARN ,
PrevReplicationStatus : objInfo . TargetReplicationStatus ( tgt . ARN ) ,
ReplicationStatus : replication . Failed ,
OpType : ri . OpType ,
ReplicationAction : rAction ,
}
if globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
2023-07-13 02:51:33 -04:00
logger . LogOnceIf ( ctx , fmt . Errorf ( "remote target is offline for bucket:%s arn:%s retry:%d" , bucket , tgt . ARN , ri . RetryCount ) , "replication-target-offline-heal" + tgt . ARN )
2022-09-14 18:44:04 -07:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2022-09-14 18:44:04 -07:00
} )
return
}
versioned := globalBucketVersioningSys . PrefixEnabled ( bucket , object )
versionSuspended := globalBucketVersioningSys . PrefixSuspended ( bucket , object )
2023-04-17 12:16:37 -07:00
gr , err := objectAPI . GetObjectNInfo ( ctx , bucket , object , nil , http . Header { } , ObjectOptions {
2022-09-14 18:44:04 -07:00
VersionID : objInfo . VersionID ,
Versioned : versioned ,
VersionSuspended : versionSuspended ,
} )
if err != nil {
2022-10-08 00:11:41 +01:00
if ! isErrVersionNotFound ( err ) && ! isErrObjectNotFound ( err ) {
2022-09-14 18:44:04 -07:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2022-09-14 18:44:04 -07:00
} )
2023-06-01 18:52:55 -07:00
logger . LogIf ( ctx , fmt . Errorf ( "unable to update replicate metadata for %s/%s(%s): %w" , bucket , object , objInfo . VersionID , err ) )
2022-09-14 18:44:04 -07:00
}
return
}
defer gr . Close ( )
2021-02-03 20:41:33 -08:00
2020-09-16 16:04:55 -07:00
objInfo = gr . ObjInfo
2023-06-01 18:52:55 -07:00
// make sure we have the latest metadata for metrics calculation
rinfo . PrevReplicationStatus = objInfo . TargetReplicationStatus ( tgt . ARN )
2022-09-14 18:44:04 -07:00
2023-03-07 07:43:38 -08:00
// use latest ObjectInfo to check if previous replication attempt succeeded
if objInfo . TargetReplicationStatus ( tgt . ARN ) == replication . Completed && ! ri . ExistingObjResync . Empty ( ) && ! ri . ExistingObjResync . mustResyncTarget ( tgt . ARN ) {
rinfo . ReplicationStatus = replication . Completed
rinfo . ReplicationResynced = true
return
}
2022-09-14 18:44:04 -07:00
size , err := objInfo . GetActualSize ( )
2020-07-21 17:49:56 -07:00
if err != nil {
logger . LogIf ( ctx , err )
2021-02-03 20:41:33 -08:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 20:41:33 -08:00
} )
2020-07-21 17:49:56 -07:00
return
}
2021-09-18 16:31:35 -04:00
if tgt . Bucket == "" {
2023-03-07 07:43:38 -08:00
logger . LogIf ( ctx , fmt . Errorf ( "unable to replicate object %s(%s), bucket is empty" , objInfo . Name , objInfo . VersionID ) )
2021-02-03 20:41:33 -08:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 20:41:33 -08:00
} )
2021-09-18 16:31:35 -04:00
return rinfo
2020-07-21 17:49:56 -07:00
}
2022-02-10 10:16:52 -08:00
defer func ( ) {
if rinfo . ReplicationStatus == replication . Completed && ri . OpType == replication . ExistingObjectReplicationType && tgt . ResetID != "" {
rinfo . ResyncTimestamp = fmt . Sprintf ( "%s;%s" , UTCNow ( ) . Format ( http . TimeFormat ) , tgt . ResetID )
rinfo . ReplicationResynced = true
}
rinfo . Duration = time . Since ( startTime )
} ( )
2020-09-16 16:04:55 -07:00
2021-09-18 16:31:35 -04:00
rAction = replicateAll
2022-12-05 20:18:50 +01:00
oi , cerr := tgt . StatObject ( ctx , tgt . Bucket , object , minio . StatObjectOptions {
2021-01-27 11:22:34 -08:00
VersionID : objInfo . VersionID ,
2022-12-05 20:18:50 +01:00
Internal : minio . AdvancedGetOptions {
2021-01-27 11:22:34 -08:00
ReplicationProxyRequest : "false" ,
2022-01-02 09:15:06 -08:00
} ,
} )
2021-09-18 16:31:35 -04:00
if cerr == nil {
2021-09-28 13:26:12 -04:00
rAction = getReplicationAction ( objInfo , oi , ri . OpType )
2021-09-18 16:31:35 -04:00
rinfo . ReplicationStatus = replication . Completed
if rAction == replicateNone {
2021-09-28 13:26:12 -04:00
if ri . OpType == replication . ExistingObjectReplicationType &&
objInfo . ModTime . Unix ( ) > oi . LastModified . Unix ( ) && objInfo . VersionID == nullVersionID {
2023-03-07 07:43:38 -08:00
logger . LogIf ( ctx , fmt . Errorf ( "unable to replicate %s/%s (null). Newer version exists on target" , bucket , object ) )
2021-09-28 13:26:12 -04:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-09-28 13:26:12 -04:00
} )
}
2020-07-21 17:49:56 -07:00
// object with same VersionID already exists, replication kicked off by
2021-04-03 09:03:42 -07:00
// PutObject might have completed
2021-09-18 16:31:35 -04:00
if objInfo . TargetReplicationStatus ( tgt . ARN ) == replication . Pending || objInfo . TargetReplicationStatus ( tgt . ARN ) == replication . Failed || ri . OpType == replication . ExistingObjectReplicationType {
2021-07-01 14:02:44 -07:00
// if metadata is not updated for some reason after replication, such as
// 503 encountered while updating metadata - make sure to set ReplicationStatus
// as Completed.
//
// Note: Replication Stats would have been updated despite metadata update failure.
2022-02-10 10:16:52 -08:00
rinfo . ReplicationAction = rAction
rinfo . ReplicationStatus = replication . Completed
2021-04-29 16:46:26 -07:00
}
2020-07-21 17:49:56 -07:00
return
}
}
2023-06-17 07:30:53 -07:00
// if target returns error other than NoSuchKey, defer replication attempt
2023-06-18 18:20:15 -07:00
if cerr != nil {
errResp := minio . ToErrorResponse ( cerr )
switch errResp . Code {
case "NoSuchKey" , "NoSuchVersion" , "SlowDownRead" :
rAction = replicateAll
default :
logger . LogIf ( ctx , fmt . Errorf ( "unable to replicate %s/%s (%s). Target (%s) returned %s error on HEAD" ,
bucket , object , objInfo . VersionID , tgt . EndpointURL ( ) , cerr ) )
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
} )
return
}
2023-06-17 07:30:53 -07:00
}
2021-09-18 16:31:35 -04:00
rinfo . ReplicationStatus = replication . Completed
rinfo . Size = size
rinfo . ReplicationAction = rAction
2021-02-20 00:22:17 -08:00
// use core client to avoid doing multipart on PUT
2022-12-05 20:18:50 +01:00
c := & minio . Core { Client : tgt . Client }
2021-09-18 16:31:35 -04:00
if rAction != replicateAll {
2020-11-19 11:50:22 -08:00
// replicate metadata for object tagging/copy with metadata replacement
2022-12-05 20:18:50 +01:00
srcOpts := minio . CopySrcOptions {
2021-09-18 16:31:35 -04:00
Bucket : tgt . Bucket ,
2021-02-10 17:25:04 -08:00
Object : object ,
2021-04-03 09:03:42 -07:00
VersionID : objInfo . VersionID ,
}
2022-12-05 20:18:50 +01:00
dstOpts := minio . PutObjectOptions {
Internal : minio . AdvancedPutOptions {
2021-03-03 11:13:31 -08:00
SourceVersionID : objInfo . VersionID ,
ReplicationRequest : true , // always set this to distinguish between `mc mirror` replication and serverside
2022-01-02 09:15:06 -08:00
} ,
}
2021-09-18 16:31:35 -04:00
if _ , err = c . CopyObject ( ctx , tgt . Bucket , object , tgt . Bucket , object , getCopyObjMetadata ( objInfo , tgt . StorageClass ) , srcOpts , dstOpts ) ; err != nil {
rinfo . ReplicationStatus = replication . Failed
2023-03-07 07:43:38 -08:00
logger . LogIf ( ctx , fmt . Errorf ( "unable to replicate metadata for object %s/%s(%s): %s" , bucket , objInfo . Name , objInfo . VersionID , err ) )
2021-01-06 16:13:10 -08:00
}
} else {
2021-09-18 16:31:35 -04:00
var putOpts minio . PutObjectOptions
putOpts , err = putReplicationOpts ( ctx , tgt . StorageClass , objInfo )
2021-02-08 16:19:05 -08:00
if err != nil {
2021-09-18 16:31:35 -04:00
logger . LogIf ( ctx , fmt . Errorf ( "failed to get target for replication bucket:%s err:%w" , bucket , err ) )
2021-02-08 18:12:28 -08:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 10:20:53 -07:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-08 18:12:28 -08:00
} )
2021-02-08 16:19:05 -08:00
return
}
2021-01-06 16:13:10 -08:00
var headerSize int
for k , v := range putOpts . Header ( ) {
headerSize += len ( k ) + len ( v )
}
2021-01-08 10:12:26 -08:00
2021-04-05 16:07:53 -07:00
opts := & bandwidth . MonitorReaderOptions {
2021-06-24 18:29:30 -07:00
Bucket : objInfo . Bucket ,
2023-01-19 05:22:16 -08:00
TargetARN : tgt . ARN ,
2021-06-24 18:29:30 -07:00
HeaderSize : headerSize ,
2021-04-05 16:07:53 -07:00
}
2021-07-28 15:20:01 -07:00
newCtx := ctx
2023-01-19 05:22:16 -08:00
if globalBucketMonitor . IsThrottled ( bucket , tgt . ARN ) {
2021-07-28 15:20:01 -07:00
var cancel context . CancelFunc
newCtx , cancel = context . WithTimeout ( ctx , throttleDeadline )
defer cancel ( )
}
2021-06-24 18:29:30 -07:00
r := bandwidth . NewMonitoredReader ( newCtx , globalBucketMonitor , gr , opts )
2021-09-08 22:25:23 -07:00
if objInfo . isMultipart ( ) {
2021-09-18 16:31:35 -04:00
if err := replicateObjectWithMultipart ( ctx , c , tgt . Bucket , object ,
2021-07-28 22:11:55 -07:00
r , objInfo , putOpts ) ; err != nil {
2023-02-02 05:52:02 -08:00
if minio . ToErrorResponse ( err ) . Code != "PreconditionFailed" {
2023-01-20 18:03:04 -08:00
rinfo . ReplicationStatus = replication . Failed
2023-03-07 07:43:38 -08:00
logger . LogIf ( ctx , fmt . Errorf ( "unable to replicate for object %s/%s(%s): %s" , bucket , objInfo . Name , objInfo . VersionID , err ) )
2023-01-20 18:03:04 -08:00
} else {
rinfo . ReplicationStatus = replication . Completed
}
2021-06-30 07:44:24 -07:00
}
} else {
2021-09-18 16:31:35 -04:00
if _ , err = c . PutObject ( ctx , tgt . Bucket , object , r , size , "" , "" , putOpts ) ; err != nil {
2023-02-02 05:52:02 -08:00
if minio . ToErrorResponse ( err ) . Code != "PreconditionFailed" {
2023-01-20 18:03:04 -08:00
rinfo . ReplicationStatus = replication . Failed
2023-03-07 07:43:38 -08:00
logger . LogIf ( ctx , fmt . Errorf ( "unable to replicate for object %s/%s(%s): %s" , bucket , objInfo . Name , objInfo . VersionID , err ) )
2023-01-20 18:03:04 -08:00
} else {
rinfo . ReplicationStatus = replication . Completed
}
2021-06-30 07:44:24 -07:00
}
2021-01-06 16:13:10 -08:00
}
2020-07-21 17:49:56 -07:00
}
2021-09-18 16:31:35 -04:00
return
2020-07-21 17:49:56 -07:00
}
2020-08-12 17:32:24 -07:00
2022-12-05 20:18:50 +01:00
func replicateObjectWithMultipart ( ctx context . Context , c * minio . Core , bucket , object string , r io . Reader , objInfo ObjectInfo , opts minio . PutObjectOptions ) ( err error ) {
var uploadedParts [ ] minio . CompletePart
2023-06-02 14:38:09 -07:00
// new multipart must not set mtime as it may lead to erroneous cleanups at various intervals.
opts . Internal . SourceMTime = time . Time { } // this value is saved properly in CompleteMultipartUpload()
2023-08-05 12:27:07 -07:00
nctx , cancel := context . WithTimeout ( ctx , 5 * time . Minute )
defer cancel ( )
uploadID , err := c . NewMultipartUpload ( nctx , bucket , object , opts )
2021-06-30 07:44:24 -07:00
if err != nil {
2021-07-28 22:11:55 -07:00
return err
2021-06-30 07:44:24 -07:00
}
2021-07-28 22:11:55 -07:00
defer func ( ) {
if err != nil {
// block and abort remote upload upon failure.
2022-01-11 22:32:29 -08:00
attempts := 1
for attempts <= 3 {
2023-08-05 12:27:07 -07:00
actx , acancel := context . WithTimeout ( ctx , time . Minute )
aerr := c . AbortMultipartUpload ( actx , bucket , object , uploadID )
2022-01-11 22:32:29 -08:00
if aerr == nil {
2023-08-05 12:27:07 -07:00
acancel ( )
2022-01-11 22:32:29 -08:00
return
}
2023-08-05 12:27:07 -07:00
acancel ( )
logger . LogIf ( actx ,
fmt . Errorf ( "trying %s: Unable to cleanup failed multipart replication %s on remote %s/%s: %w - this may consume space on remote cluster" ,
2022-01-11 22:32:29 -08:00
humanize . Ordinal ( attempts ) , uploadID , bucket , object , aerr ) )
attempts ++
time . Sleep ( time . Second )
2021-07-28 22:11:55 -07:00
}
}
} ( )
2021-06-30 07:44:24 -07:00
var (
hr * hash . Reader
2022-12-05 20:18:50 +01:00
pInfo minio . ObjectPart
2021-06-30 07:44:24 -07:00
)
2021-07-28 22:11:55 -07:00
2021-06-30 07:44:24 -07:00
for _ , partInfo := range objInfo . Parts {
2023-05-16 13:14:37 -07:00
hr , err = hash . NewReader ( io . LimitReader ( r , partInfo . ActualSize ) , partInfo . ActualSize , "" , "" , partInfo . ActualSize )
2021-06-30 07:44:24 -07:00
if err != nil {
2021-07-28 22:11:55 -07:00
return err
2021-06-30 07:44:24 -07:00
}
2023-03-28 03:45:24 -07:00
popts := minio . PutObjectPartOptions {
SSE : opts . ServerSideEncryption ,
}
pInfo , err = c . PutObjectPart ( ctx , bucket , object , uploadID , partInfo . Number , hr , partInfo . ActualSize , popts )
2021-06-30 07:44:24 -07:00
if err != nil {
2021-07-28 22:11:55 -07:00
return err
2021-06-30 07:44:24 -07:00
}
2021-08-24 17:41:05 -04:00
if pInfo . Size != partInfo . ActualSize {
return fmt . Errorf ( "Part size mismatch: got %d, want %d" , pInfo . Size , partInfo . ActualSize )
2021-06-30 07:44:24 -07:00
}
2022-12-05 20:18:50 +01:00
uploadedParts = append ( uploadedParts , minio . CompletePart {
2021-06-30 07:44:24 -07:00
PartNumber : pInfo . PartNumber ,
ETag : pInfo . ETag ,
} )
}
2023-08-05 12:27:07 -07:00
cctx , ccancel := context . WithTimeout ( ctx , 10 * time . Minute )
defer ccancel ( )
_ , err = c . CompleteMultipartUpload ( cctx , bucket , object , uploadID , uploadedParts , minio . PutObjectOptions {
2022-12-05 20:18:50 +01:00
Internal : minio . AdvancedPutOptions {
2021-07-28 22:11:55 -07:00
SourceMTime : objInfo . ModTime ,
// always set this to distinguish between `mc mirror` replication and serverside
ReplicationRequest : true ,
2022-01-02 09:15:06 -08:00
} ,
} )
2021-07-28 22:11:55 -07:00
return err
2021-06-30 07:44:24 -07:00
}
2020-08-12 17:32:24 -07:00
// filterReplicationStatusMetadata filters replication status metadata for COPY
func filterReplicationStatusMetadata ( metadata map [ string ] string ) map [ string ] string {
// Copy on write
dst := metadata
var copied bool
delKey := func ( key string ) {
if _ , ok := metadata [ key ] ; ! ok {
return
}
if ! copied {
dst = make ( map [ string ] string , len ( metadata ) )
for k , v := range metadata {
dst [ k ] = v
}
copied = true
}
delete ( dst , key )
}
delKey ( xhttp . AmzBucketReplicationStatus )
return dst
}
2020-09-16 16:04:55 -07:00
2021-06-01 19:59:11 -07:00
// DeletedObjectReplicationInfo has info on deleted object
type DeletedObjectReplicationInfo struct {
2020-11-19 18:43:58 -08:00
DeletedObject
2021-09-18 16:31:35 -04:00
Bucket string
2022-07-12 10:43:32 -07:00
EventType string
2021-09-18 16:31:35 -04:00
OpType replication . Type
ResetID string
TargetArn string
2020-11-19 18:43:58 -08:00
}
2022-08-22 16:53:06 -07:00
// ToMRFEntry returns the relevant info needed by MRF
func ( di DeletedObjectReplicationInfo ) ToMRFEntry ( ) MRFReplicateEntry {
versionID := di . DeleteMarkerVersionID
if versionID == "" {
versionID = di . VersionID
}
return MRFReplicateEntry {
Bucket : di . Bucket ,
Object : di . ObjectName ,
versionID : versionID ,
}
}
2021-07-01 14:02:44 -07:00
// Replication specific APIName
const (
ReplicateObjectAPI = "ReplicateObject"
ReplicateDeleteAPI = "ReplicateDelete"
)
2021-06-28 23:58:08 -07:00
const (
2021-07-01 14:02:44 -07:00
// ReplicateQueued - replication being queued trail
ReplicateQueued = "replicate:queue"
// ReplicateExisting - audit trail for existing objects replication
ReplicateExisting = "replicate:existing"
// ReplicateExistingDelete - audit trail for delete replication triggered for existing delete markers
ReplicateExistingDelete = "replicate:existing:delete"
// ReplicateMRF - audit trail for replication from Most Recent Failures (MRF) queue
ReplicateMRF = "replicate:mrf"
2022-07-12 10:43:32 -07:00
// ReplicateIncoming - audit trail of inline replication
2021-07-01 14:02:44 -07:00
ReplicateIncoming = "replicate:incoming"
2022-07-12 10:43:32 -07:00
// ReplicateIncomingDelete - audit trail of inline replication of deletes.
ReplicateIncomingDelete = "replicate:incoming:delete"
2021-07-01 14:02:44 -07:00
// ReplicateHeal - audit trail for healing of failed/pending replications
ReplicateHeal = "replicate:heal"
2022-07-12 10:43:32 -07:00
// ReplicateHealDelete - audit trail of healing of failed/pending delete replications.
ReplicateHealDelete = "replicate:heal:delete"
2021-06-28 23:58:08 -07:00
)
2020-09-21 13:43:29 -07:00
var (
2021-04-03 09:03:42 -07:00
globalReplicationPool * ReplicationPool
globalReplicationStats * ReplicationStats
2020-09-21 13:43:29 -07:00
)
2020-09-16 16:04:55 -07:00
2021-03-09 02:56:42 -08:00
// ReplicationPool describes replication pool
type ReplicationPool struct {
2022-11-18 00:20:09 +01:00
// atomic ops:
2022-11-17 16:35:02 +01:00
activeWorkers int32
activeMRFWorkers int32
2022-11-18 00:20:09 +01:00
objLayer ObjectLayer
ctx context . Context
priority string
mu sync . RWMutex
resyncer * replicationResyncer
// workers:
workers [ ] chan ReplicationWorkerOperation
2023-07-25 20:02:02 -07:00
lrgworkers [ ] chan ReplicationWorkerOperation
2022-11-18 00:20:09 +01:00
existingWorkers chan ReplicationWorkerOperation
// mrf:
mrfWorkerKillCh chan struct { }
mrfReplicaCh chan ReplicationWorkerOperation
mrfSaveCh chan MRFReplicateEntry
2022-12-22 14:25:13 -08:00
mrfStopCh chan struct { }
2022-11-18 00:20:09 +01:00
mrfWorkerSize int
saveStateCh chan struct { }
}
// ReplicationWorkerOperation is a shared interface of replication operations.
type ReplicationWorkerOperation interface {
ToMRFEntry ( ) MRFReplicateEntry
2021-03-09 02:56:42 -08:00
}
2022-09-24 16:20:28 -07:00
const (
// WorkerMaxLimit max number of workers per node for "fast" mode
WorkerMaxLimit = 500
// WorkerMinLimit min number of workers per node for "slow" mode
WorkerMinLimit = 50
// WorkerAutoDefault is default number of workers for "auto" mode
WorkerAutoDefault = 100
// MRFWorkerMaxLimit max number of mrf workers per node for "fast" mode
MRFWorkerMaxLimit = 8
// MRFWorkerMinLimit min number of mrf workers per node for "slow" mode
MRFWorkerMinLimit = 2
// MRFWorkerAutoDefault is default number of mrf workers for "auto" mode
MRFWorkerAutoDefault = 4
2023-07-25 20:02:02 -07:00
// LargeWorkerCount is default number of workers assigned to large uploads ( >= 128MiB)
LargeWorkerCount = 10
2022-09-24 16:20:28 -07:00
)
2021-03-09 02:56:42 -08:00
// NewReplicationPool creates a pool of replication workers of specified size
2021-04-23 21:58:45 -07:00
func NewReplicationPool ( ctx context . Context , o ObjectLayer , opts replicationPoolOpts ) * ReplicationPool {
2022-09-24 16:20:28 -07:00
var workers , failedWorkers int
priority := "auto"
if opts . Priority != "" {
priority = opts . Priority
}
switch priority {
case "fast" :
workers = WorkerMaxLimit
failedWorkers = MRFWorkerMaxLimit
case "slow" :
workers = WorkerMinLimit
failedWorkers = MRFWorkerMinLimit
default :
workers = WorkerAutoDefault
failedWorkers = MRFWorkerAutoDefault
}
2022-11-18 00:20:09 +01:00
2021-03-09 02:56:42 -08:00
pool := & ReplicationPool {
2022-11-18 00:20:09 +01:00
workers : make ( [ ] chan ReplicationWorkerOperation , 0 , workers ) ,
2023-07-25 20:02:02 -07:00
lrgworkers : make ( [ ] chan ReplicationWorkerOperation , 0 , LargeWorkerCount ) ,
2022-11-18 00:20:09 +01:00
existingWorkers : make ( chan ReplicationWorkerOperation , 100000 ) ,
2023-07-25 20:02:02 -07:00
2022-11-18 00:20:09 +01:00
mrfReplicaCh : make ( chan ReplicationWorkerOperation , 100000 ) ,
mrfWorkerKillCh : make ( chan struct { } , failedWorkers ) ,
resyncer : newresyncer ( ) ,
mrfSaveCh : make ( chan MRFReplicateEntry , 100000 ) ,
2022-12-22 14:25:13 -08:00
mrfStopCh : make ( chan struct { } , 1 ) ,
2022-11-18 00:20:09 +01:00
saveStateCh : make ( chan struct { } , 1 ) ,
ctx : ctx ,
objLayer : o ,
priority : priority ,
}
2023-07-25 20:02:02 -07:00
pool . AddLargeWorkers ( )
2022-11-18 00:20:09 +01:00
pool . ResizeWorkers ( workers , 0 )
2022-09-24 16:20:28 -07:00
pool . ResizeFailedWorkers ( failedWorkers )
2022-11-18 00:20:09 +01:00
go pool . AddWorker ( pool . existingWorkers , nil )
2022-11-14 07:16:40 -08:00
go pool . resyncer . PersistToDisk ( ctx , o )
2022-08-22 16:53:06 -07:00
go pool . processMRF ( )
go pool . persistMRF ( )
2022-09-12 12:40:02 -07:00
go pool . saveStatsToDisk ( )
2021-03-09 02:56:42 -08:00
return pool
2020-09-16 16:04:55 -07:00
}
2021-04-03 09:03:42 -07:00
// AddMRFWorker adds a pending/failed replication worker to handle requests that could not be queued
// to the other workers
func ( p * ReplicationPool ) AddMRFWorker ( ) {
for {
select {
case <- p . ctx . Done ( ) :
return
case oi , ok := <- p . mrfReplicaCh :
if ! ok {
return
}
2022-11-18 00:20:09 +01:00
switch v := oi . ( type ) {
case ReplicateObjectInfo :
atomic . AddInt32 ( & p . activeMRFWorkers , 1 )
replicateObject ( p . ctx , v , p . objLayer )
atomic . AddInt32 ( & p . activeMRFWorkers , - 1 )
default :
logger . LogOnceIf ( p . ctx , fmt . Errorf ( "unknown mrf replication type: %T" , oi ) , "unknown-mrf-replicate-type" )
}
2021-05-28 13:28:37 -07:00
case <- p . mrfWorkerKillCh :
return
2021-04-03 09:03:42 -07:00
}
}
}
2022-11-18 00:20:09 +01:00
// AddWorker adds a replication worker to the pool.
// An optional pointer to a tracker that will be atomically
// incremented when operations are running can be provided.
func ( p * ReplicationPool ) AddWorker ( input <- chan ReplicationWorkerOperation , opTracker * int32 ) {
2021-03-09 02:56:42 -08:00
for {
select {
case <- p . ctx . Done ( ) :
return
2022-11-18 00:20:09 +01:00
case oi , ok := <- input :
2021-03-09 02:56:42 -08:00
if ! ok {
return
}
2022-11-18 00:20:09 +01:00
switch v := oi . ( type ) {
case ReplicateObjectInfo :
if opTracker != nil {
atomic . AddInt32 ( opTracker , 1 )
}
replicateObject ( p . ctx , v , p . objLayer )
if opTracker != nil {
atomic . AddInt32 ( opTracker , - 1 )
}
case DeletedObjectReplicationInfo :
if opTracker != nil {
atomic . AddInt32 ( opTracker , 1 )
}
replicateDelete ( p . ctx , v , p . objLayer )
if opTracker != nil {
atomic . AddInt32 ( opTracker , - 1 )
}
default :
logger . LogOnceIf ( p . ctx , fmt . Errorf ( "unknown replication type: %T" , oi ) , "unknown-replicate-type" )
2021-06-01 19:59:11 -07:00
}
}
}
}
2023-07-25 20:02:02 -07:00
// AddLargeWorkers adds a static number of workers to handle large uploads
func ( p * ReplicationPool ) AddLargeWorkers ( ) {
for i := 0 ; i < LargeWorkerCount ; i ++ {
p . lrgworkers = append ( p . lrgworkers , make ( chan ReplicationWorkerOperation , 100000 ) )
}
go func ( ) {
<- p . ctx . Done ( )
for i := 0 ; i < LargeWorkerCount ; i ++ {
close ( p . lrgworkers [ i ] )
}
} ( )
}
2022-09-24 16:20:28 -07:00
// ActiveWorkers returns the number of active workers handling replication traffic.
func ( p * ReplicationPool ) ActiveWorkers ( ) int {
return int ( atomic . LoadInt32 ( & p . activeWorkers ) )
}
// ActiveMRFWorkers returns the number of active workers handling replication failures.
func ( p * ReplicationPool ) ActiveMRFWorkers ( ) int {
return int ( atomic . LoadInt32 ( & p . activeMRFWorkers ) )
}
2022-11-18 00:20:09 +01:00
// ResizeWorkers sets replication workers pool to new size.
// checkOld can be set to an expected value.
// If the worker count changed
func ( p * ReplicationPool ) ResizeWorkers ( n , checkOld int ) {
2021-03-09 02:56:42 -08:00
p . mu . Lock ( )
defer p . mu . Unlock ( )
2022-11-18 00:20:09 +01:00
if ( checkOld > 0 && len ( p . workers ) != checkOld ) || n == len ( p . workers ) || n < 1 {
// Either already satisfied or worker count changed while we waited for the lock.
return
}
for len ( p . workers ) < n {
input := make ( chan ReplicationWorkerOperation , 10000 )
p . workers = append ( p . workers , input )
go p . AddWorker ( input , & p . activeWorkers )
2021-03-09 02:56:42 -08:00
}
2022-11-18 00:20:09 +01:00
for len ( p . workers ) > n {
worker := p . workers [ len ( p . workers ) - 1 ]
p . workers = p . workers [ : len ( p . workers ) - 1 ]
close ( worker )
2021-04-23 21:58:45 -07:00
}
}
2022-09-24 16:20:28 -07:00
// ResizeWorkerPriority sets replication failed workers pool size
func ( p * ReplicationPool ) ResizeWorkerPriority ( pri string ) {
var workers , mrfWorkers int
p . mu . Lock ( )
switch pri {
case "fast" :
workers = WorkerMaxLimit
mrfWorkers = MRFWorkerMaxLimit
case "slow" :
workers = WorkerMinLimit
mrfWorkers = MRFWorkerMinLimit
default :
workers = WorkerAutoDefault
mrfWorkers = MRFWorkerAutoDefault
2022-11-18 00:20:09 +01:00
if len ( p . workers ) < WorkerAutoDefault {
workers = int ( math . Min ( float64 ( len ( p . workers ) + 1 ) , WorkerAutoDefault ) )
2022-09-24 16:20:28 -07:00
}
if p . mrfWorkerSize < MRFWorkerAutoDefault {
mrfWorkers = int ( math . Min ( float64 ( p . mrfWorkerSize + 1 ) , MRFWorkerAutoDefault ) )
}
}
p . priority = pri
p . mu . Unlock ( )
2022-11-18 00:20:09 +01:00
p . ResizeWorkers ( workers , 0 )
2022-09-24 16:20:28 -07:00
p . ResizeFailedWorkers ( mrfWorkers )
}
2021-04-23 21:58:45 -07:00
// ResizeFailedWorkers sets replication failed workers pool size
func ( p * ReplicationPool ) ResizeFailedWorkers ( n int ) {
p . mu . Lock ( )
defer p . mu . Unlock ( )
for p . mrfWorkerSize < n {
p . mrfWorkerSize ++
go p . AddMRFWorker ( )
}
for p . mrfWorkerSize > n {
p . mrfWorkerSize --
go func ( ) { p . mrfWorkerKillCh <- struct { } { } } ( )
2021-03-09 02:56:42 -08:00
}
}
2023-07-25 20:02:02 -07:00
const (
minLargeObjSize = 128 * humanize . MiByte // 128MiB
)
2022-11-18 00:20:09 +01:00
// getWorkerCh gets a worker channel deterministically based on bucket and object names.
// Must be able to grab read lock from p.
2023-07-25 20:02:02 -07:00
func ( p * ReplicationPool ) getWorkerCh ( bucket , object string , sz int64 ) chan <- ReplicationWorkerOperation {
2022-11-18 00:20:09 +01:00
h := xxh3 . HashString ( bucket + object )
p . mu . RLock ( )
defer p . mu . RUnlock ( )
if len ( p . workers ) == 0 {
return nil
}
return p . workers [ h % uint64 ( len ( p . workers ) ) ]
}
2021-04-29 18:20:39 -07:00
func ( p * ReplicationPool ) queueReplicaTask ( ri ReplicateObjectInfo ) {
if p == nil {
return
}
2023-07-25 20:02:02 -07:00
// if object is large, queue it to a static set of large workers
if ri . Size >= int64 ( minLargeObjSize ) {
h := xxh3 . HashString ( ri . Bucket + ri . Name )
select {
case <- p . ctx . Done ( ) :
case p . lrgworkers [ h % LargeWorkerCount ] <- ri :
default :
globalReplicationPool . queueMRFSave ( ri . ToMRFEntry ( ) )
}
return
}
2022-11-18 00:20:09 +01:00
var ch , healCh chan <- ReplicationWorkerOperation
2021-06-01 19:59:11 -07:00
switch ri . OpType {
case replication . ExistingObjectReplicationType :
2022-11-18 00:20:09 +01:00
ch = p . existingWorkers
2021-06-28 23:58:08 -07:00
case replication . HealReplicationType :
2022-08-22 16:53:06 -07:00
ch = p . mrfReplicaCh
2023-07-25 20:02:02 -07:00
healCh = p . getWorkerCh ( ri . Name , ri . Bucket , ri . Size )
2021-06-01 19:59:11 -07:00
default :
2023-07-25 20:02:02 -07:00
ch = p . getWorkerCh ( ri . Name , ri . Bucket , ri . Size )
2021-06-01 19:59:11 -07:00
}
2022-11-18 00:20:09 +01:00
if ch == nil && healCh == nil {
return
}
2021-04-29 18:20:39 -07:00
select {
2022-11-18 00:20:09 +01:00
case <- p . ctx . Done ( ) :
2022-08-22 16:53:06 -07:00
case healCh <- ri :
2021-06-01 19:59:11 -07:00
case ch <- ri :
2021-04-29 18:20:39 -07:00
default :
2022-09-12 12:40:02 -07:00
globalReplicationPool . queueMRFSave ( ri . ToMRFEntry ( ) )
2022-09-24 16:20:28 -07:00
p . mu . RLock ( )
2022-11-17 16:35:02 +01:00
prio := p . priority
p . mu . RUnlock ( )
switch prio {
2022-09-24 16:20:28 -07:00
case "fast" :
logger . LogOnceIf ( GlobalContext , fmt . Errorf ( "WARNING: Unable to keep up with incoming traffic" ) , string ( replicationSubsystem ) )
case "slow" :
logger . LogOnceIf ( GlobalContext , fmt . Errorf ( "WARNING: Unable to keep up with incoming traffic - we recommend increasing replication priority with `mc admin config set api replication_priority=auto`" ) , string ( replicationSubsystem ) )
default :
if p . ActiveWorkers ( ) < WorkerMaxLimit {
2022-11-17 16:35:02 +01:00
p . mu . RLock ( )
2022-11-18 00:20:09 +01:00
workers := int ( math . Min ( float64 ( len ( p . workers ) + 1 ) , WorkerMaxLimit ) )
existing := len ( p . workers )
2022-11-17 16:35:02 +01:00
p . mu . RUnlock ( )
2022-11-18 00:20:09 +01:00
p . ResizeWorkers ( workers , existing )
2022-09-24 16:20:28 -07:00
}
if p . ActiveMRFWorkers ( ) < MRFWorkerMaxLimit {
2022-11-17 16:35:02 +01:00
p . mu . RLock ( )
2022-09-24 16:20:28 -07:00
workers := int ( math . Min ( float64 ( p . mrfWorkerSize + 1 ) , MRFWorkerMaxLimit ) )
2022-11-17 16:35:02 +01:00
p . mu . RUnlock ( )
2022-09-24 16:20:28 -07:00
p . ResizeFailedWorkers ( workers )
}
}
2021-04-29 18:20:39 -07:00
}
}
2021-09-18 16:31:35 -04:00
func queueReplicateDeletesWrapper ( doi DeletedObjectReplicationInfo , existingObjectResync ResyncDecision ) {
for k , v := range existingObjectResync . targets {
if v . Replicate {
doi . ResetID = v . ResetID
doi . TargetArn = k
globalReplicationPool . queueReplicaDeleteTask ( doi )
}
}
}
2021-06-01 19:59:11 -07:00
func ( p * ReplicationPool ) queueReplicaDeleteTask ( doi DeletedObjectReplicationInfo ) {
2021-03-09 02:56:42 -08:00
if p == nil {
return
2020-09-21 13:43:29 -07:00
}
2022-11-18 00:20:09 +01:00
var ch chan <- ReplicationWorkerOperation
2021-06-01 19:59:11 -07:00
switch doi . OpType {
case replication . ExistingObjectReplicationType :
2022-11-18 00:20:09 +01:00
ch = p . existingWorkers
2021-06-28 23:58:08 -07:00
case replication . HealReplicationType :
2021-07-01 14:02:44 -07:00
fallthrough
2021-06-01 19:59:11 -07:00
default :
2023-07-25 20:02:02 -07:00
ch = p . getWorkerCh ( doi . Bucket , doi . ObjectName , 0 )
2021-06-01 19:59:11 -07:00
}
2021-03-09 02:56:42 -08:00
select {
2022-11-18 00:20:09 +01:00
case <- p . ctx . Done ( ) :
2021-06-01 19:59:11 -07:00
case ch <- doi :
2021-03-09 02:56:42 -08:00
default :
2022-09-12 12:40:02 -07:00
globalReplicationPool . queueMRFSave ( doi . ToMRFEntry ( ) )
2022-09-24 16:20:28 -07:00
p . mu . RLock ( )
2022-11-17 16:35:02 +01:00
prio := p . priority
p . mu . RUnlock ( )
switch prio {
2022-09-24 16:20:28 -07:00
case "fast" :
logger . LogOnceIf ( GlobalContext , fmt . Errorf ( "WARNING: Unable to keep up with incoming deletes" ) , string ( replicationSubsystem ) )
case "slow" :
logger . LogOnceIf ( GlobalContext , fmt . Errorf ( "WARNING: Unable to keep up with incoming deletes - we recommend increasing replication priority with `mc admin config set api replication_priority=auto`" ) , string ( replicationSubsystem ) )
default :
if p . ActiveWorkers ( ) < WorkerMaxLimit {
2022-11-17 16:35:02 +01:00
p . mu . RLock ( )
2022-11-18 00:20:09 +01:00
workers := int ( math . Min ( float64 ( len ( p . workers ) + 1 ) , WorkerMaxLimit ) )
existing := len ( p . workers )
2022-11-17 16:35:02 +01:00
p . mu . RUnlock ( )
2022-11-18 00:20:09 +01:00
p . ResizeWorkers ( workers , existing )
2022-09-24 16:20:28 -07:00
}
}
2021-03-09 02:56:42 -08:00
}
}
2021-04-23 21:58:45 -07:00
type replicationPoolOpts struct {
2022-09-24 16:20:28 -07:00
Priority string
2021-04-23 21:58:45 -07:00
}
2021-03-09 02:56:42 -08:00
func initBackgroundReplication ( ctx context . Context , objectAPI ObjectLayer ) {
2021-04-23 21:58:45 -07:00
globalReplicationPool = NewReplicationPool ( ctx , objectAPI , replicationPoolOpts {
2022-09-24 16:20:28 -07:00
Priority : globalAPIConfig . getReplicationPriority ( ) ,
2021-04-23 21:58:45 -07:00
} )
2021-04-03 09:03:42 -07:00
globalReplicationStats = NewReplicationStats ( ctx , objectAPI )
2021-10-21 21:52:55 -04:00
go globalReplicationStats . loadInitialReplicationMetrics ( ctx )
2020-09-21 13:43:29 -07:00
}
2021-01-11 22:36:51 -08:00
2022-03-08 13:58:55 -08:00
type proxyResult struct {
Proxy bool
Err error
}
2021-01-11 22:36:51 -08:00
// get Reader from replication target if active-active replication is in place and
// this node returns a 404
2022-12-05 20:18:50 +01:00
func proxyGetToReplicationTarget ( ctx context . Context , bucket , object string , rs * HTTPRangeSpec , _ http . Header , opts ObjectOptions , proxyTargets * madmin . BucketTargets ) ( gr * GetObjectReader , proxy proxyResult , err error ) {
2022-03-08 13:58:55 -08:00
tgt , oi , proxy := proxyHeadToRepTarget ( ctx , bucket , object , rs , opts , proxyTargets )
if ! proxy . Proxy {
return nil , proxy , nil
2021-01-11 22:36:51 -08:00
}
2022-03-08 13:58:55 -08:00
fn , _ , _ , err := NewGetObjectReader ( nil , oi , opts )
2021-01-11 22:36:51 -08:00
if err != nil {
2022-03-08 13:58:55 -08:00
return nil , proxy , err
2021-01-11 22:36:51 -08:00
}
2022-12-05 20:18:50 +01:00
gopts := minio . GetObjectOptions {
2021-01-11 22:36:51 -08:00
VersionID : opts . VersionID ,
ServerSideEncryption : opts . ServerSideEncryption ,
2022-12-05 20:18:50 +01:00
Internal : minio . AdvancedGetOptions {
2021-01-27 11:22:34 -08:00
ReplicationProxyRequest : "true" ,
2021-01-11 22:36:51 -08:00
} ,
2022-03-08 13:58:55 -08:00
PartNumber : opts . PartNumber ,
2021-01-11 22:36:51 -08:00
}
// get correct offsets for encrypted object
2022-03-08 13:58:55 -08:00
if rs != nil {
h , err := rs . ToHeader ( )
if err != nil {
return nil , proxy , err
2021-01-11 22:36:51 -08:00
}
2022-03-08 13:58:55 -08:00
gopts . Set ( xhttp . Range , h )
2021-01-11 22:36:51 -08:00
}
2021-02-03 20:41:33 -08:00
// Make sure to match ETag when proxying.
if err = gopts . SetMatchETag ( oi . ETag ) ; err != nil {
2022-03-08 13:58:55 -08:00
return nil , proxy , err
2021-02-03 20:41:33 -08:00
}
2022-12-05 20:18:50 +01:00
c := minio . Core { Client : tgt . Client }
2023-01-27 10:24:51 -08:00
obj , _ , h , err := c . GetObject ( ctx , tgt . Bucket , object , gopts )
2021-01-11 22:36:51 -08:00
if err != nil {
2022-03-08 13:58:55 -08:00
return nil , proxy , err
2021-01-11 22:36:51 -08:00
}
closeReader := func ( ) { obj . Close ( ) }
2021-06-24 09:44:00 -07:00
reader , err := fn ( obj , h , closeReader )
2021-01-11 22:36:51 -08:00
if err != nil {
2022-03-08 13:58:55 -08:00
return nil , proxy , err
2021-01-11 22:36:51 -08:00
}
2021-02-10 17:25:04 -08:00
reader . ObjInfo = oi . Clone ( )
2022-03-08 13:58:55 -08:00
if rs != nil {
contentSize , err := parseSizeFromContentRange ( h )
if err != nil {
return nil , proxy , err
}
reader . ObjInfo . Size = contentSize
}
return reader , proxyResult { Proxy : true } , nil
2021-01-11 22:36:51 -08:00
}
2022-05-08 16:50:31 -07:00
func getProxyTargets ( ctx context . Context , bucket , object string , opts ObjectOptions ) ( tgts * madmin . BucketTargets ) {
if opts . VersionSuspended {
return & madmin . BucketTargets { }
}
2023-01-27 01:29:32 -08:00
if opts . ProxyRequest || ( opts . ProxyHeaderSet && ! opts . ProxyRequest ) {
2022-10-13 16:43:36 -07:00
return & madmin . BucketTargets { }
}
2021-01-11 22:36:51 -08:00
cfg , err := getReplicationConfig ( ctx , bucket )
2021-09-18 16:31:35 -04:00
if err != nil || cfg == nil {
return & madmin . BucketTargets { }
}
topts := replication . ObjectOpts { Name : object }
tgtArns := cfg . FilterTargetArns ( topts )
tgts = & madmin . BucketTargets { Targets : make ( [ ] madmin . BucketTarget , len ( tgtArns ) ) }
for i , tgtArn := range tgtArns {
tgt := globalBucketTargetSys . GetRemoteBucketTargetByArn ( ctx , bucket , tgtArn )
tgts . Targets [ i ] = tgt
2021-01-11 22:36:51 -08:00
}
2021-09-18 16:31:35 -04:00
return tgts
2021-01-11 22:36:51 -08:00
}
2021-01-27 11:22:34 -08:00
2022-03-08 13:58:55 -08:00
func proxyHeadToRepTarget ( ctx context . Context , bucket , object string , rs * HTTPRangeSpec , opts ObjectOptions , proxyTargets * madmin . BucketTargets ) ( tgt * TargetClient , oi ObjectInfo , proxy proxyResult ) {
2021-01-11 22:36:51 -08:00
// this option is set when active-active replication is in place between site A -> B,
// and site B does not have the object yet.
2021-01-27 11:22:34 -08:00
if opts . ProxyRequest || ( opts . ProxyHeaderSet && ! opts . ProxyRequest ) { // true only when site B sets MinIOSourceProxyRequest header
2022-03-08 13:58:55 -08:00
return nil , oi , proxy
2021-01-11 22:36:51 -08:00
}
2021-09-18 16:31:35 -04:00
for _ , t := range proxyTargets . Targets {
tgt = globalBucketTargetSys . GetRemoteTargetClient ( ctx , t . Arn )
2022-08-16 17:46:22 -07:00
if tgt == nil || globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
2021-09-18 16:31:35 -04:00
continue
}
// if proxying explicitly disabled on remote target
if tgt . disableProxy {
continue
}
2021-01-11 22:36:51 -08:00
2022-12-05 20:18:50 +01:00
gopts := minio . GetObjectOptions {
2021-09-18 16:31:35 -04:00
VersionID : opts . VersionID ,
ServerSideEncryption : opts . ServerSideEncryption ,
2022-12-05 20:18:50 +01:00
Internal : minio . AdvancedGetOptions {
2021-09-18 16:31:35 -04:00
ReplicationProxyRequest : "true" ,
} ,
2022-03-08 13:58:55 -08:00
PartNumber : opts . PartNumber ,
2021-09-18 16:31:35 -04:00
}
2022-03-08 13:58:55 -08:00
if rs != nil {
h , err := rs . ToHeader ( )
if err != nil {
logger . LogIf ( ctx , fmt . Errorf ( "Invalid range header for %s/%s(%s) - %w" , bucket , object , opts . VersionID , err ) )
continue
}
gopts . Set ( xhttp . Range , h )
}
2021-09-18 16:31:35 -04:00
objInfo , err := tgt . StatObject ( ctx , t . TargetBucket , object , gopts )
if err != nil {
2022-03-08 13:58:55 -08:00
if isErrInvalidRange ( ErrorRespToObjectError ( err , bucket , object ) ) {
return nil , oi , proxyResult { Err : err }
}
2021-09-18 16:31:35 -04:00
continue
}
tags , _ := tags . MapToObjectTags ( objInfo . UserTags )
oi = ObjectInfo {
Bucket : bucket ,
Name : object ,
ModTime : objInfo . LastModified ,
Size : objInfo . Size ,
ETag : objInfo . ETag ,
VersionID : objInfo . VersionID ,
IsLatest : objInfo . IsLatest ,
DeleteMarker : objInfo . IsDeleteMarker ,
ContentType : objInfo . ContentType ,
Expires : objInfo . Expires ,
StorageClass : objInfo . StorageClass ,
ReplicationStatusInternal : objInfo . ReplicationStatus ,
UserTags : tags . String ( ) ,
2023-06-17 07:30:53 -07:00
ReplicationStatus : replication . StatusType ( objInfo . ReplicationStatus ) ,
2021-09-18 16:31:35 -04:00
}
oi . UserDefined = make ( map [ string ] string , len ( objInfo . Metadata ) )
for k , v := range objInfo . Metadata {
oi . UserDefined [ k ] = v [ 0 ]
}
ce , ok := oi . UserDefined [ xhttp . ContentEncoding ]
if ! ok {
ce , ok = oi . UserDefined [ strings . ToLower ( xhttp . ContentEncoding ) ]
}
if ok {
oi . ContentEncoding = ce
}
2022-03-08 13:58:55 -08:00
return tgt , oi , proxyResult { Proxy : true }
2021-01-11 22:36:51 -08:00
}
2022-03-08 13:58:55 -08:00
return nil , oi , proxy
2021-01-11 22:36:51 -08:00
}
// get object info from replication target if active-active replication is in place and
// this node returns a 404
2022-03-08 13:58:55 -08:00
func proxyHeadToReplicationTarget ( ctx context . Context , bucket , object string , rs * HTTPRangeSpec , opts ObjectOptions , proxyTargets * madmin . BucketTargets ) ( oi ObjectInfo , proxy proxyResult ) {
_ , oi , proxy = proxyHeadToRepTarget ( ctx , bucket , object , rs , opts , proxyTargets )
2021-09-18 16:31:35 -04:00
return oi , proxy
2021-01-11 22:36:51 -08:00
}
2021-09-18 16:31:35 -04:00
func scheduleReplication ( ctx context . Context , objInfo ObjectInfo , o ObjectLayer , dsc ReplicateDecision , opType replication . Type ) {
2022-07-12 10:43:32 -07:00
ri := ReplicateObjectInfo { ObjectInfo : objInfo , OpType : opType , Dsc : dsc , EventType : ReplicateIncoming }
2021-09-18 16:31:35 -04:00
if dsc . Synchronous ( ) {
2022-07-12 10:43:32 -07:00
replicateObject ( ctx , ri , o )
2021-01-11 22:36:51 -08:00
} else {
2022-07-12 10:43:32 -07:00
globalReplicationPool . queueReplicaTask ( ri )
2021-04-03 09:03:42 -07:00
}
if sz , err := objInfo . GetActualSize ( ) ; err == nil {
2021-09-18 16:31:35 -04:00
for arn := range dsc . targetsMap {
2021-11-17 21:10:57 +01:00
globalReplicationStats . Update ( objInfo . Bucket , arn , sz , 0 , objInfo . ReplicationStatus , replication . StatusType ( "" ) , opType )
2021-09-18 16:31:35 -04:00
}
2021-01-11 22:36:51 -08:00
}
}
2021-09-18 16:31:35 -04:00
func scheduleReplicationDelete ( ctx context . Context , dv DeletedObjectReplicationInfo , o ObjectLayer ) {
2021-04-29 18:20:39 -07:00
globalReplicationPool . queueReplicaDeleteTask ( dv )
2021-09-18 16:31:35 -04:00
for arn := range dv . ReplicationState . Targets {
2021-11-17 21:10:57 +01:00
globalReplicationStats . Update ( dv . Bucket , arn , 0 , 0 , replication . Pending , replication . StatusType ( "" ) , replication . DeleteReplicationType )
2021-09-18 16:31:35 -04:00
}
for arn := range dv . ReplicationState . PurgeTargets {
2021-11-17 21:10:57 +01:00
globalReplicationStats . Update ( dv . Bucket , arn , 0 , 0 , replication . Pending , replication . StatusType ( "" ) , replication . DeleteReplicationType )
2021-09-18 16:31:35 -04:00
}
2021-01-11 22:36:51 -08:00
}
2021-06-01 19:59:11 -07:00
type replicationConfig struct {
2021-09-18 16:31:35 -04:00
Config * replication . Config
remotes * madmin . BucketTargets
2021-06-01 19:59:11 -07:00
}
func ( c replicationConfig ) Empty ( ) bool {
return c . Config == nil
}
2022-01-02 09:15:06 -08:00
2021-06-01 19:59:11 -07:00
func ( c replicationConfig ) Replicate ( opts replication . ObjectOpts ) bool {
return c . Config . Replicate ( opts )
}
// Resync returns true if replication reset is requested
2021-09-18 16:31:35 -04:00
func ( c replicationConfig ) Resync ( ctx context . Context , oi ObjectInfo , dsc * ReplicateDecision , tgtStatuses map [ string ] replication . StatusType ) ( r ResyncDecision ) {
2021-06-01 19:59:11 -07:00
if c . Empty ( ) {
2021-09-18 16:31:35 -04:00
return
2021-06-01 19:59:11 -07:00
}
2021-09-18 16:31:35 -04:00
// Now overlay existing object replication choices for target
2021-06-01 19:59:11 -07:00
if oi . DeleteMarker {
2021-09-18 16:31:35 -04:00
opts := replication . ObjectOpts {
2021-06-01 19:59:11 -07:00
Name : oi . Name ,
SSEC : crypto . SSEC . IsEncrypted ( oi . UserDefined ) ,
UserTags : oi . UserTags ,
DeleteMarker : oi . DeleteMarker ,
VersionID : oi . VersionID ,
OpType : replication . DeleteReplicationType ,
2022-01-02 09:15:06 -08:00
ExistingObject : true ,
}
2021-09-18 16:31:35 -04:00
tgtArns := c . Config . FilterTargetArns ( opts )
// indicates no matching target with Existing object replication enabled.
if len ( tgtArns ) == 0 {
return
2021-06-01 19:59:11 -07:00
}
2021-09-18 16:31:35 -04:00
for _ , t := range tgtArns {
opts . TargetArn = t
// Update replication decision for target based on existing object replciation rule.
dsc . Set ( newReplicateTargetDecision ( t , c . Replicate ( opts ) , false ) )
}
return c . resync ( oi , dsc , tgtStatuses )
}
// Ignore previous replication status when deciding if object can be re-replicated
objInfo := oi . Clone ( )
objInfo . ReplicationStatusInternal = ""
objInfo . VersionPurgeStatusInternal = ""
objInfo . ReplicationStatus = ""
objInfo . VersionPurgeStatus = ""
2022-07-28 13:43:02 -07:00
delete ( objInfo . UserDefined , xhttp . AmzBucketReplicationStatus )
2021-09-18 16:31:35 -04:00
resyncdsc := mustReplicate ( ctx , oi . Bucket , oi . Name , getMustReplicateOptions ( objInfo , replication . ExistingObjectReplicationType , ObjectOptions { } ) )
dsc = & resyncdsc
return c . resync ( oi , dsc , tgtStatuses )
2021-06-01 19:59:11 -07:00
}
// wrapper function for testability. Returns true if a new reset is requested on
// already replicated objects OR object qualifies for existing object replication
// and no reset requested.
2021-09-18 16:31:35 -04:00
func ( c replicationConfig ) resync ( oi ObjectInfo , dsc * ReplicateDecision , tgtStatuses map [ string ] replication . StatusType ) ( r ResyncDecision ) {
r = ResyncDecision {
targets : make ( map [ string ] ResyncTargetDecision ) ,
}
if c . remotes == nil {
return
}
for _ , tgt := range c . remotes . Targets {
d , ok := dsc . targetsMap [ tgt . Arn ]
if ! ok {
continue
}
if ! d . Replicate {
continue
}
r . targets [ d . Arn ] = resyncTarget ( oi , tgt . Arn , tgt . ResetID , tgt . ResetBeforeDate , tgtStatuses [ tgt . Arn ] )
}
return
}
func targetResetHeader ( arn string ) string {
return fmt . Sprintf ( "%s-%s" , ReservedMetadataPrefixLower + ReplicationReset , arn )
}
func resyncTarget ( oi ObjectInfo , arn string , resetID string , resetBeforeDate time . Time , tgtStatus replication . StatusType ) ( rd ResyncTargetDecision ) {
rd = ResyncTargetDecision {
ResetID : resetID ,
ResetBeforeDate : resetBeforeDate ,
}
rs , ok := oi . UserDefined [ targetResetHeader ( arn ) ]
if ! ok {
2021-11-16 09:28:29 -08:00
rs , ok = oi . UserDefined [ xhttp . MinIOReplicationResetStatus ] // for backward compatibility
2021-06-01 19:59:11 -07:00
}
if ! ok { // existing object replication is enabled and object version is unreplicated so far.
2021-09-18 16:31:35 -04:00
if resetID != "" && oi . ModTime . Before ( resetBeforeDate ) { // trigger replication if `mc replicate reset` requested
rd . Replicate = true
return
2021-06-01 19:59:11 -07:00
}
2021-09-18 16:31:35 -04:00
// For existing object reset - this condition is needed
rd . Replicate = tgtStatus == ""
return
2021-06-01 19:59:11 -07:00
}
2021-09-18 16:31:35 -04:00
if resetID == "" || resetBeforeDate . Equal ( timeSentinel ) { // no reset in progress
return
2021-06-01 19:59:11 -07:00
}
2021-09-18 16:31:35 -04:00
2021-06-01 19:59:11 -07:00
// if already replicated, return true if a new reset was requested.
splits := strings . SplitN ( rs , ";" , 2 )
2021-09-18 16:31:35 -04:00
if len ( splits ) != 2 {
return
}
newReset := splits [ 1 ] != resetID
if ! newReset && tgtStatus == replication . Completed {
2021-06-01 19:59:11 -07:00
// already replicated and no reset requested
2021-09-18 16:31:35 -04:00
return
2021-06-01 19:59:11 -07:00
}
2021-09-18 16:31:35 -04:00
rd . Replicate = newReset && oi . ModTime . Before ( resetBeforeDate )
return
2021-06-01 19:59:11 -07:00
}
2021-11-19 14:46:14 -08:00
2022-07-12 10:43:32 -07:00
const resyncTimeInterval = time . Minute * 1
2022-02-10 10:16:52 -08:00
2022-11-14 07:16:40 -08:00
// PersistToDisk persists in-memory resync metadata stats to disk at periodic intervals
func ( s * replicationResyncer ) PersistToDisk ( ctx context . Context , objectAPI ObjectLayer ) {
2022-02-10 10:16:52 -08:00
resyncTimer := time . NewTimer ( resyncTimeInterval )
defer resyncTimer . Stop ( )
2022-09-02 00:53:36 +01:00
// For each bucket name, store the last timestamp of the
// successful save of replication status in the backend disks.
lastResyncStatusSave := make ( map [ string ] time . Time )
2022-02-10 10:16:52 -08:00
for {
select {
case <- resyncTimer . C :
2022-11-14 07:16:40 -08:00
s . RLock ( )
for bucket , brs := range s . statusMap {
2022-02-10 10:16:52 -08:00
var updt bool
2022-09-02 00:53:36 +01:00
// Save the replication status if one resync to any bucket target is still not finished
2022-02-10 10:16:52 -08:00
for _ , st := range brs . TargetsMap {
2022-11-14 07:16:40 -08:00
if st . LastUpdate . Equal ( timeSentinel ) {
2022-02-10 10:16:52 -08:00
updt = true
break
}
}
2022-09-02 00:53:36 +01:00
// Save the replication status if a new stats update is found and not saved in the backend yet
if brs . LastUpdate . After ( lastResyncStatusSave [ bucket ] ) {
updt = true
}
2022-02-10 10:16:52 -08:00
if updt {
if err := saveResyncStatus ( ctx , bucket , brs , objectAPI ) ; err != nil {
2022-08-04 16:10:08 -07:00
logger . LogIf ( ctx , fmt . Errorf ( "Could not save resync metadata to drive for %s - %w" , bucket , err ) )
2022-09-02 00:53:36 +01:00
} else {
lastResyncStatusSave [ bucket ] = brs . LastUpdate
2022-02-10 10:16:52 -08:00
}
}
}
2022-11-14 07:16:40 -08:00
s . RUnlock ( )
2022-05-17 22:42:59 -07:00
resyncTimer . Reset ( resyncTimeInterval )
2022-02-10 10:16:52 -08:00
case <- ctx . Done ( ) :
// server could be restarting - need
// to exit immediately
return
}
}
}
2023-02-24 12:07:34 -08:00
const (
resyncWorkerCnt = 10 // limit of number of bucket resyncs is progress at any given time
resyncParallelRoutines = 10 // number of parallel resync ops per bucket
)
2022-11-14 07:16:40 -08:00
func newresyncer ( ) * replicationResyncer {
rs := replicationResyncer {
statusMap : make ( map [ string ] BucketReplicationResyncStatus ) ,
workerSize : resyncWorkerCnt ,
resyncCancelCh : make ( chan struct { } , resyncWorkerCnt ) ,
workerCh : make ( chan struct { } , resyncWorkerCnt ) ,
}
for i := 0 ; i < rs . workerSize ; i ++ {
rs . workerCh <- struct { } { }
}
return & rs
}
2023-02-24 12:07:34 -08:00
// mark status of replication resync on remote target for the bucket
func ( s * replicationResyncer ) markStatus ( status ResyncStatusType , opts resyncOpts ) {
s . Lock ( )
defer s . Unlock ( )
m := s . statusMap [ opts . bucket ]
st := m . TargetsMap [ opts . arn ]
st . LastUpdate = UTCNow ( )
st . ResyncStatus = status
m . TargetsMap [ opts . arn ] = st
m . LastUpdate = UTCNow ( )
s . statusMap [ opts . bucket ] = m
}
// update replication resync stats for bucket's remote target
func ( s * replicationResyncer ) incStats ( ts TargetReplicationResyncStatus , opts resyncOpts ) {
s . Lock ( )
defer s . Unlock ( )
m := s . statusMap [ opts . bucket ]
st := m . TargetsMap [ opts . arn ]
st . Object = ts . Object
st . ReplicatedCount += ts . ReplicatedCount
st . FailedCount += ts . FailedCount
st . ReplicatedSize += ts . ReplicatedSize
st . FailedSize += ts . FailedSize
m . TargetsMap [ opts . arn ] = st
m . LastUpdate = UTCNow ( )
s . statusMap [ opts . bucket ] = m
}
2022-02-10 10:16:52 -08:00
// resyncBucket resyncs all qualifying objects as per replication rules for the target
// ARN
2022-11-14 07:16:40 -08:00
func ( s * replicationResyncer ) resyncBucket ( ctx context . Context , objectAPI ObjectLayer , heal bool , opts resyncOpts ) {
select {
case <- s . workerCh : // block till a worker is available
case <- ctx . Done ( ) :
return
}
2022-02-10 10:16:52 -08:00
resyncStatus := ResyncFailed
defer func ( ) {
2023-02-24 12:07:34 -08:00
s . markStatus ( resyncStatus , opts )
2022-11-14 07:16:40 -08:00
globalSiteResyncMetrics . incBucket ( opts , resyncStatus )
s . workerCh <- struct { } { }
2022-02-10 10:16:52 -08:00
} ( )
// Allocate new results channel to receive ObjectInfo.
objInfoCh := make ( chan ObjectInfo )
2022-11-14 07:16:40 -08:00
cfg , err := getReplicationConfig ( ctx , opts . bucket )
2022-02-10 10:16:52 -08:00
if err != nil {
2023-08-01 11:51:15 -07:00
logger . LogIf ( ctx , fmt . Errorf ( "replication resync of %s for arn %s failed with %w" , opts . bucket , opts . arn , err ) )
2022-02-10 10:16:52 -08:00
return
}
2022-11-14 07:16:40 -08:00
tgts , err := globalBucketTargetSys . ListBucketTargets ( ctx , opts . bucket )
2022-02-10 10:16:52 -08:00
if err != nil {
2023-08-01 11:51:15 -07:00
logger . LogIf ( ctx , fmt . Errorf ( "replication resync of %s for arn %s failed %w" , opts . bucket , opts . arn , err ) )
2022-02-10 10:16:52 -08:00
return
}
rcfg := replicationConfig {
Config : cfg ,
remotes : tgts ,
}
tgtArns := cfg . FilterTargetArns (
replication . ObjectOpts {
OpType : replication . ResyncReplicationType ,
2022-11-14 07:16:40 -08:00
TargetArn : opts . arn ,
2022-02-10 10:16:52 -08:00
} )
if len ( tgtArns ) != 1 {
2023-08-01 11:51:15 -07:00
logger . LogIf ( ctx , fmt . Errorf ( "replication resync failed for %s - arn specified %s is missing in the replication config" , opts . bucket , opts . arn ) )
2022-02-10 10:16:52 -08:00
return
}
2022-11-14 07:16:40 -08:00
tgt := globalBucketTargetSys . GetRemoteTargetClient ( ctx , opts . arn )
2022-02-10 10:16:52 -08:00
if tgt == nil {
2023-08-01 11:51:15 -07:00
logger . LogIf ( ctx , fmt . Errorf ( "replication resync failed for %s - target could not be created for arn %s" , opts . bucket , opts . arn ) )
2022-02-10 10:16:52 -08:00
return
}
2022-11-14 07:16:40 -08:00
// mark resync status as resync started
if ! heal {
2023-02-24 12:07:34 -08:00
s . markStatus ( ResyncStarted , opts )
2022-11-14 07:16:40 -08:00
}
2023-02-24 12:07:34 -08:00
2022-08-18 17:49:08 -07:00
// Walk through all object versions - Walk() is always in ascending order needed to ensure
// delete marker replicated to target after object version is first created.
2022-11-14 07:16:40 -08:00
if err := objectAPI . Walk ( ctx , opts . bucket , "" , objInfoCh , ObjectOptions { } ) ; err != nil {
2022-02-10 10:16:52 -08:00
logger . LogIf ( ctx , err )
return
}
2022-11-14 07:16:40 -08:00
s . RLock ( )
m := s . statusMap [ opts . bucket ]
st := m . TargetsMap [ opts . arn ]
s . RUnlock ( )
2022-02-10 10:16:52 -08:00
var lastCheckpoint string
if st . ResyncStatus == ResyncStarted || st . ResyncStatus == ResyncFailed {
lastCheckpoint = st . Object
}
2023-08-01 11:51:15 -07:00
workers := make ( [ ] chan ReplicateObjectInfo , resyncParallelRoutines )
resultCh := make ( chan TargetReplicationResyncStatus , 1 )
defer close ( resultCh )
var wg sync . WaitGroup
for i := 0 ; i < resyncParallelRoutines ; i ++ {
wg . Add ( 1 )
workers [ i ] = make ( chan ReplicateObjectInfo , 100 )
i := i
go func ( ctx context . Context , idx int ) {
defer wg . Done ( )
for roi := range workers [ idx ] {
select {
case <- ctx . Done ( ) :
return
case <- s . resyncCancelCh :
default :
}
traceFn := s . trace ( tgt . ResetID , fmt . Sprintf ( "%s/%s (%s)" , opts . bucket , roi . Name , roi . VersionID ) )
if roi . DeleteMarker || ! roi . VersionPurgeStatus . Empty ( ) {
versionID := ""
dmVersionID := ""
if roi . VersionPurgeStatus . Empty ( ) {
dmVersionID = roi . VersionID
} else {
versionID = roi . VersionID
}
doi := DeletedObjectReplicationInfo {
DeletedObject : DeletedObject {
ObjectName : roi . Name ,
DeleteMarkerVersionID : dmVersionID ,
VersionID : versionID ,
ReplicationState : roi . getReplicationState ( ) ,
DeleteMarkerMTime : DeleteMarkerMTime { roi . ModTime } ,
DeleteMarker : roi . DeleteMarker ,
} ,
Bucket : roi . Bucket ,
OpType : replication . ExistingObjectReplicationType ,
EventType : ReplicateExistingDelete ,
}
replicateDelete ( ctx , doi , objectAPI )
} else {
roi . OpType = replication . ExistingObjectReplicationType
roi . EventType = ReplicateExisting
replicateObject ( ctx , roi , objectAPI )
}
_ , err = tgt . StatObject ( ctx , tgt . Bucket , roi . Name , minio . StatObjectOptions {
VersionID : roi . VersionID ,
Internal : minio . AdvancedGetOptions {
ReplicationProxyRequest : "false" ,
} ,
} )
st := TargetReplicationResyncStatus {
Object : roi . Name ,
Bucket : roi . Bucket ,
}
if err != nil {
if roi . DeleteMarker && isErrMethodNotAllowed ( ErrorRespToObjectError ( err , opts . bucket , roi . Name ) ) {
st . ReplicatedCount ++
} else {
st . FailedCount ++
}
} else {
st . ReplicatedCount ++
st . ReplicatedSize += roi . Size
}
traceFn ( err )
select {
case <- ctx . Done ( ) :
return
case <- s . resyncCancelCh :
return
case resultCh <- st :
}
}
} ( ctx , i )
}
2022-02-10 10:16:52 -08:00
for obj := range objInfoCh {
2022-11-14 07:16:40 -08:00
select {
case <- s . resyncCancelCh :
resyncStatus = ResyncCanceled
return
2023-02-24 12:07:34 -08:00
case <- ctx . Done ( ) :
return
2022-11-14 07:16:40 -08:00
default :
}
2022-02-10 10:16:52 -08:00
if heal && lastCheckpoint != "" && lastCheckpoint != obj . Name {
continue
}
lastCheckpoint = ""
2023-08-01 11:51:15 -07:00
roi := getHealReplicateObjectInfo ( obj , rcfg )
if ! roi . ExistingObjResync . mustResync ( ) {
continue
}
select {
case <- s . resyncCancelCh :
return
case <- ctx . Done ( ) :
return
default :
h := xxh3 . HashString ( roi . Bucket + roi . Name )
workers [ h % uint64 ( resyncParallelRoutines ) ] <- roi
}
2022-02-10 10:16:52 -08:00
}
2023-08-01 11:51:15 -07:00
for i := 0 ; i < resyncParallelRoutines ; i ++ {
close ( workers [ i ] )
}
go func ( ) {
for r := range resultCh {
s . incStats ( r , opts )
globalSiteResyncMetrics . updateMetric ( r , opts . resyncID )
}
} ( )
wg . Wait ( )
2022-02-10 10:16:52 -08:00
resyncStatus = ResyncCompleted
}
// start replication resync for the remote target ARN specified
2022-11-14 07:16:40 -08:00
func ( s * replicationResyncer ) start ( ctx context . Context , objAPI ObjectLayer , opts resyncOpts ) error {
if opts . bucket == "" {
2022-02-10 10:16:52 -08:00
return fmt . Errorf ( "bucket name is empty" )
}
2022-11-14 07:16:40 -08:00
if opts . arn == "" {
2022-02-10 10:16:52 -08:00
return fmt . Errorf ( "target ARN specified for resync is empty" )
}
// Check if the current bucket has quota restrictions, if not skip it
2022-11-14 07:16:40 -08:00
cfg , err := getReplicationConfig ( ctx , opts . bucket )
2022-02-10 10:16:52 -08:00
if err != nil {
return err
}
tgtArns := cfg . FilterTargetArns (
replication . ObjectOpts {
OpType : replication . ResyncReplicationType ,
2022-11-14 07:16:40 -08:00
TargetArn : opts . arn ,
2022-02-10 10:16:52 -08:00
} )
if len ( tgtArns ) == 0 {
2022-11-14 07:16:40 -08:00
return fmt . Errorf ( "arn %s specified for resync not found in replication config" , opts . arn )
2022-02-10 10:16:52 -08:00
}
2022-11-14 07:16:40 -08:00
globalReplicationPool . resyncer . RLock ( )
data , ok := globalReplicationPool . resyncer . statusMap [ opts . bucket ]
globalReplicationPool . resyncer . RUnlock ( )
if ! ok {
data , err = loadBucketResyncMetadata ( ctx , opts . bucket , objAPI )
if err != nil {
return err
}
2022-02-10 10:16:52 -08:00
}
// validate if resync is in progress for this arn
for tArn , st := range data . TargetsMap {
2022-11-14 07:16:40 -08:00
if opts . arn == tArn && ( st . ResyncStatus == ResyncStarted || st . ResyncStatus == ResyncPending ) {
return fmt . Errorf ( "Resync of bucket %s is already in progress for remote bucket %s" , opts . bucket , opts . arn )
2022-02-10 10:16:52 -08:00
}
}
status := TargetReplicationResyncStatus {
2022-11-14 07:16:40 -08:00
ResyncID : opts . resyncID ,
ResyncBeforeDate : opts . resyncBefore ,
2022-02-10 10:16:52 -08:00
StartTime : UTCNow ( ) ,
2022-11-14 07:16:40 -08:00
ResyncStatus : ResyncPending ,
Bucket : opts . bucket ,
2022-02-10 10:16:52 -08:00
}
2022-11-14 07:16:40 -08:00
data . TargetsMap [ opts . arn ] = status
if err = saveResyncStatus ( ctx , opts . bucket , data , objAPI ) ; err != nil {
2022-02-10 10:16:52 -08:00
return err
}
2022-11-14 07:16:40 -08:00
globalReplicationPool . resyncer . Lock ( )
defer globalReplicationPool . resyncer . Unlock ( )
brs , ok := globalReplicationPool . resyncer . statusMap [ opts . bucket ]
2022-02-10 10:16:52 -08:00
if ! ok {
brs = BucketReplicationResyncStatus {
Version : resyncMetaVersion ,
TargetsMap : make ( map [ string ] TargetReplicationResyncStatus ) ,
}
}
2022-11-14 07:16:40 -08:00
brs . TargetsMap [ opts . arn ] = status
globalReplicationPool . resyncer . statusMap [ opts . bucket ] = brs
go globalReplicationPool . resyncer . resyncBucket ( GlobalContext , objAPI , false , opts )
2022-02-10 10:16:52 -08:00
return nil
}
2022-11-14 07:16:40 -08:00
func ( s * replicationResyncer ) trace ( resyncID string , path string ) func ( err error ) {
startTime := time . Now ( )
return func ( err error ) {
duration := time . Since ( startTime )
if globalTrace . NumSubscribers ( madmin . TraceReplicationResync ) > 0 {
globalTrace . Publish ( replicationResyncTrace ( resyncID , startTime , duration , path , err ) )
}
}
}
func replicationResyncTrace ( resyncID string , startTime time . Time , duration time . Duration , path string , err error ) madmin . TraceInfo {
var errStr string
if err != nil {
errStr = err . Error ( )
}
funcName := fmt . Sprintf ( "replication.(resyncID=%s)" , resyncID )
return madmin . TraceInfo {
TraceType : madmin . TraceReplicationResync ,
Time : startTime ,
NodeName : globalLocalNodeName ,
FuncName : funcName ,
Duration : duration ,
Path : path ,
Error : errStr ,
}
}
2022-02-10 10:16:52 -08:00
// delete resync metadata from replication resync state in memory
func ( p * ReplicationPool ) deleteResyncMetadata ( ctx context . Context , bucket string ) {
if p == nil {
return
}
2022-11-14 07:16:40 -08:00
p . resyncer . Lock ( )
delete ( p . resyncer . statusMap , bucket )
defer p . resyncer . Unlock ( )
globalSiteResyncMetrics . deleteBucket ( bucket )
2022-02-10 10:16:52 -08:00
}
// initResync - initializes bucket replication resync for all buckets.
func ( p * ReplicationPool ) initResync ( ctx context . Context , buckets [ ] BucketInfo , objAPI ObjectLayer ) error {
if objAPI == nil {
return errServerNotInitialized
}
// Load bucket metadata sys in background
2022-11-14 07:16:40 -08:00
go p . startResyncRoutine ( ctx , buckets , objAPI )
2022-02-10 10:16:52 -08:00
return nil
}
2022-11-14 07:16:40 -08:00
func ( p * ReplicationPool ) startResyncRoutine ( ctx context . Context , buckets [ ] BucketInfo , objAPI ObjectLayer ) {
r := rand . New ( rand . NewSource ( time . Now ( ) . UnixNano ( ) ) )
// Run the replication resync in a loop
for {
if err := p . loadResync ( ctx , buckets , objAPI ) ; err == nil {
<- ctx . Done ( )
return
}
duration := time . Duration ( r . Float64 ( ) * float64 ( time . Minute ) )
if duration < time . Second {
// Make sure to sleep atleast a second to avoid high CPU ticks.
duration = time . Second
}
time . Sleep ( duration )
}
}
2022-02-10 10:16:52 -08:00
// Loads bucket replication resync statuses into memory.
2022-11-14 07:16:40 -08:00
func ( p * ReplicationPool ) loadResync ( ctx context . Context , buckets [ ] BucketInfo , objAPI ObjectLayer ) error {
// Make sure only one node running resync on the cluster.
2022-12-01 21:10:09 +01:00
ctx , cancel := globalLeaderLock . GetLock ( ctx )
defer cancel ( )
2022-02-10 10:16:52 -08:00
for index := range buckets {
2023-07-14 04:00:29 -07:00
bucket := buckets [ index ] . Name
meta , err := loadBucketResyncMetadata ( ctx , bucket , objAPI )
2022-02-10 10:16:52 -08:00
if err != nil {
2022-07-13 16:29:10 -07:00
if ! errors . Is ( err , errVolumeNotFound ) {
2022-02-10 10:16:52 -08:00
logger . LogIf ( ctx , err )
}
2022-07-13 16:29:10 -07:00
continue
2022-02-10 10:16:52 -08:00
}
2022-11-14 07:16:40 -08:00
p . resyncer . Lock ( )
2023-07-14 04:00:29 -07:00
p . resyncer . statusMap [ bucket ] = meta
2022-11-14 07:16:40 -08:00
p . resyncer . Unlock ( )
2023-07-14 04:00:29 -07:00
tgts := meta . cloneTgtStats ( )
2022-11-14 07:16:40 -08:00
for arn , st := range tgts {
switch st . ResyncStatus {
case ResyncFailed , ResyncStarted , ResyncPending :
go p . resyncer . resyncBucket ( ctx , objAPI , true , resyncOpts {
bucket : bucket ,
arn : arn ,
resyncID : st . ResyncID ,
resyncBefore : st . ResyncBeforeDate ,
} )
2022-02-10 10:16:52 -08:00
}
}
}
2022-11-14 07:16:40 -08:00
return nil
2022-02-10 10:16:52 -08:00
}
// load bucket resync metadata from disk
func loadBucketResyncMetadata ( ctx context . Context , bucket string , objAPI ObjectLayer ) ( brs BucketReplicationResyncStatus , e error ) {
brs = newBucketResyncStatus ( bucket )
resyncDirPath := path . Join ( bucketMetaPrefix , bucket , replicationDir )
data , err := readConfig ( GlobalContext , objAPI , pathJoin ( resyncDirPath , resyncFileName ) )
if err != nil && err != errConfigNotFound {
return brs , err
}
if len ( data ) == 0 {
// Seems to be empty.
return brs , nil
}
if len ( data ) <= 4 {
return brs , fmt . Errorf ( "replication resync: no data" )
}
// Read resync meta header
switch binary . LittleEndian . Uint16 ( data [ 0 : 2 ] ) {
case resyncMetaFormat :
default :
return brs , fmt . Errorf ( "resyncMeta: unknown format: %d" , binary . LittleEndian . Uint16 ( data [ 0 : 2 ] ) )
}
switch binary . LittleEndian . Uint16 ( data [ 2 : 4 ] ) {
case resyncMetaVersion :
default :
return brs , fmt . Errorf ( "resyncMeta: unknown version: %d" , binary . LittleEndian . Uint16 ( data [ 2 : 4 ] ) )
}
// OK, parse data.
if _ , err = brs . UnmarshalMsg ( data [ 4 : ] ) ; err != nil {
return brs , err
}
switch brs . Version {
case resyncMetaVersionV1 :
default :
return brs , fmt . Errorf ( "unexpected resync meta version: %d" , brs . Version )
}
return brs , nil
}
// save resync status to resync.bin
func saveResyncStatus ( ctx context . Context , bucket string , brs BucketReplicationResyncStatus , objectAPI ObjectLayer ) error {
data := make ( [ ] byte , 4 , brs . Msgsize ( ) + 4 )
// Initialize the resync meta header.
binary . LittleEndian . PutUint16 ( data [ 0 : 2 ] , resyncMetaFormat )
binary . LittleEndian . PutUint16 ( data [ 2 : 4 ] , resyncMetaVersion )
buf , err := brs . MarshalMsg ( data )
if err != nil {
return err
}
configFile := path . Join ( bucketMetaPrefix , bucket , replicationDir , resyncFileName )
return saveConfig ( ctx , objectAPI , configFile , buf )
}
2022-07-21 11:05:44 -07:00
2022-11-15 16:59:21 +01:00
// getReplicationDiff returns un-replicated objects in a channel.
// If a non-nil channel is returned it must be consumed fully or
// the provided context must be canceled.
func getReplicationDiff ( ctx context . Context , objAPI ObjectLayer , bucket string , opts madmin . ReplDiffOpts ) ( chan madmin . DiffInfo , error ) {
2022-07-21 11:05:44 -07:00
cfg , err := getReplicationConfig ( ctx , bucket )
if err != nil {
logger . LogIf ( ctx , err )
2022-11-15 16:59:21 +01:00
return nil , err
2022-07-21 11:05:44 -07:00
}
tgts , err := globalBucketTargetSys . ListBucketTargets ( ctx , bucket )
if err != nil {
logger . LogIf ( ctx , err )
2022-11-15 16:59:21 +01:00
return nil , err
}
objInfoCh := make ( chan ObjectInfo , 10 )
if err := objAPI . Walk ( ctx , bucket , opts . Prefix , objInfoCh , ObjectOptions { } ) ; err != nil {
logger . LogIf ( ctx , err )
return nil , err
2022-07-21 11:05:44 -07:00
}
rcfg := replicationConfig {
Config : cfg ,
remotes : tgts ,
}
2022-11-15 16:59:21 +01:00
diffCh := make ( chan madmin . DiffInfo , 4000 )
2022-07-21 11:05:44 -07:00
go func ( ) {
defer close ( diffCh )
for obj := range objInfoCh {
2022-11-15 16:59:21 +01:00
if contextCanceled ( ctx ) {
// Just consume input...
continue
}
2022-07-21 11:05:44 -07:00
// Ignore object prefixes which are excluded
// from versioning via the MinIO bucket versioning extension.
if globalBucketVersioningSys . PrefixSuspended ( bucket , obj . Name ) {
continue
}
roi := getHealReplicateObjectInfo ( obj , rcfg )
switch roi . ReplicationStatus {
case replication . Completed , replication . Replica :
if ! opts . Verbose {
continue
}
fallthrough
default :
// ignore pre-existing objects that don't satisfy replication rule(s)
if roi . ReplicationStatus . Empty ( ) && ! roi . ExistingObjResync . mustResync ( ) {
continue
}
tgtsMap := make ( map [ string ] madmin . TgtDiffInfo )
for arn , st := range roi . TargetStatuses {
if opts . ARN == "" || opts . ARN == arn {
if ! opts . Verbose && ( st == replication . Completed || st == replication . Replica ) {
continue
}
tgtsMap [ arn ] = madmin . TgtDiffInfo {
ReplicationStatus : st . String ( ) ,
}
}
}
for arn , st := range roi . TargetPurgeStatuses {
if opts . ARN == "" || opts . ARN == arn {
if ! opts . Verbose && st == Complete {
continue
}
t , ok := tgtsMap [ arn ]
if ! ok {
t = madmin . TgtDiffInfo { }
}
t . DeleteReplicationStatus = string ( st )
tgtsMap [ arn ] = t
}
}
select {
case diffCh <- madmin . DiffInfo {
Object : obj . Name ,
VersionID : obj . VersionID ,
LastModified : obj . ModTime ,
IsDeleteMarker : obj . DeleteMarker ,
ReplicationStatus : string ( roi . ReplicationStatus ) ,
DeleteReplicationStatus : string ( roi . VersionPurgeStatus ) ,
ReplicationTimestamp : roi . ReplicationTimestamp ,
Targets : tgtsMap ,
} :
case <- ctx . Done ( ) :
2022-11-15 16:59:21 +01:00
continue
2022-07-21 11:05:44 -07:00
}
}
}
} ( )
return diffCh , nil
}
2022-08-09 15:00:24 -07:00
// QueueReplicationHeal is a wrapper for queueReplicationHeal
2023-07-13 02:51:33 -04:00
func QueueReplicationHeal ( ctx context . Context , bucket string , oi ObjectInfo , retryCount int ) {
2022-09-01 23:22:11 +01:00
// un-versioned or a prefix
if oi . VersionID == "" || oi . ModTime . IsZero ( ) {
2022-08-09 15:00:24 -07:00
return
}
2022-09-13 21:23:33 -07:00
rcfg , _ := getReplicationConfig ( ctx , bucket )
2022-08-09 15:00:24 -07:00
tgts , _ := globalBucketTargetSys . ListBucketTargets ( ctx , bucket )
queueReplicationHeal ( ctx , bucket , oi , replicationConfig {
Config : rcfg ,
remotes : tgts ,
2023-07-13 02:51:33 -04:00
} , retryCount )
2022-08-09 15:00:24 -07:00
}
// queueReplicationHeal enqueues objects that failed replication OR eligible for resyncing through
// an ongoing resync operation or via existing objects replication configuration setting.
2023-07-13 02:51:33 -04:00
func queueReplicationHeal ( ctx context . Context , bucket string , oi ObjectInfo , rcfg replicationConfig , retryCount int ) ( roi ReplicateObjectInfo ) {
2022-09-01 23:22:11 +01:00
// un-versioned or a prefix
if oi . VersionID == "" || oi . ModTime . IsZero ( ) {
2022-08-09 15:00:24 -07:00
return roi
}
if rcfg . Config == nil || rcfg . remotes == nil {
return roi
}
roi = getHealReplicateObjectInfo ( oi , rcfg )
2023-07-13 02:51:33 -04:00
roi . RetryCount = uint32 ( retryCount )
2022-08-09 15:00:24 -07:00
if ! roi . Dsc . ReplicateAny ( ) {
return
}
// early return if replication already done, otherwise we need to determine if this
// version is an existing object that needs healing.
if oi . ReplicationStatus == replication . Completed && oi . VersionPurgeStatus . Empty ( ) && ! roi . ExistingObjResync . mustResync ( ) {
return
}
if roi . DeleteMarker || ! roi . VersionPurgeStatus . Empty ( ) {
versionID := ""
dmVersionID := ""
if roi . VersionPurgeStatus . Empty ( ) {
dmVersionID = roi . VersionID
} else {
versionID = roi . VersionID
}
dv := DeletedObjectReplicationInfo {
DeletedObject : DeletedObject {
ObjectName : roi . Name ,
DeleteMarkerVersionID : dmVersionID ,
VersionID : versionID ,
2023-07-10 10:57:56 -04:00
ReplicationState : roi . getReplicationState ( ) ,
2022-08-09 15:00:24 -07:00
DeleteMarkerMTime : DeleteMarkerMTime { roi . ModTime } ,
DeleteMarker : roi . DeleteMarker ,
} ,
Bucket : roi . Bucket ,
OpType : replication . HealReplicationType ,
EventType : ReplicateHealDelete ,
}
// heal delete marker replication failure or versioned delete replication failure
if roi . ReplicationStatus == replication . Pending ||
roi . ReplicationStatus == replication . Failed ||
roi . VersionPurgeStatus == Failed || roi . VersionPurgeStatus == Pending {
globalReplicationPool . queueReplicaDeleteTask ( dv )
return
}
// if replication status is Complete on DeleteMarker and existing object resync required
if roi . ExistingObjResync . mustResync ( ) && ( roi . ReplicationStatus == replication . Completed || roi . ReplicationStatus . Empty ( ) ) {
queueReplicateDeletesWrapper ( dv , roi . ExistingObjResync )
return
}
return
}
if roi . ExistingObjResync . mustResync ( ) {
roi . OpType = replication . ExistingObjectReplicationType
}
switch roi . ReplicationStatus {
case replication . Pending , replication . Failed :
roi . EventType = ReplicateHeal
globalReplicationPool . queueReplicaTask ( roi )
return
}
if roi . ExistingObjResync . mustResync ( ) {
roi . EventType = ReplicateExisting
globalReplicationPool . queueReplicaTask ( roi )
}
return
}
2022-08-22 16:53:06 -07:00
2023-07-13 02:51:33 -04:00
const (
mrfSaveInterval = 5 * time . Minute
mrfQueueInterval = 6 * time . Minute
mrfRetryLimit = 3 // max number of retries before letting scanner catch up on this object version
mrfMaxEntries = 1000000
)
2022-08-22 16:53:06 -07:00
func ( p * ReplicationPool ) persistMRF ( ) {
2022-10-12 15:47:41 -07:00
if ! p . initialized ( ) {
return
}
2022-08-22 16:53:06 -07:00
var mu sync . Mutex
entries := make ( map [ string ] MRFReplicateEntry )
2023-07-13 02:51:33 -04:00
mTimer := time . NewTimer ( mrfSaveInterval )
2022-08-22 16:53:06 -07:00
defer mTimer . Stop ( )
saveMRFToDisk := func ( drain bool ) {
mu . Lock ( )
defer mu . Unlock ( )
if len ( entries ) == 0 {
return
}
cctx := p . ctx
if drain {
cctx = context . Background ( )
// drain all mrf entries and save to disk
for e := range p . mrfSaveCh {
entries [ e . versionID ] = e
}
}
2023-07-13 02:51:33 -04:00
// queue all entries for healing before overwriting the node mrf file
p . queueMRFHeal ( )
2022-08-22 16:53:06 -07:00
if err := p . saveMRFEntries ( cctx , entries ) ; err != nil {
2023-07-13 02:51:33 -04:00
logger . LogOnceIf ( p . ctx , fmt . Errorf ( "unable to persist replication failures to disk:%w" , err ) , string ( replicationSubsystem ) )
2022-08-22 16:53:06 -07:00
}
entries = make ( map [ string ] MRFReplicateEntry )
}
for {
select {
case <- mTimer . C :
saveMRFToDisk ( false )
2023-07-13 02:51:33 -04:00
mTimer . Reset ( mrfSaveInterval )
2022-08-22 16:53:06 -07:00
case <- p . ctx . Done ( ) :
2022-12-22 14:25:13 -08:00
p . mrfStopCh <- struct { } { }
2022-08-22 16:53:06 -07:00
close ( p . mrfSaveCh )
saveMRFToDisk ( true )
return
2022-09-12 12:40:02 -07:00
case <- p . saveStateCh :
saveMRFToDisk ( true )
return
2022-08-22 16:53:06 -07:00
case e , ok := <- p . mrfSaveCh :
if ! ok {
return
}
var cnt int
mu . Lock ( )
entries [ e . versionID ] = e
cnt = len ( entries )
mu . Unlock ( )
2023-07-13 02:51:33 -04:00
if cnt >= mrfMaxEntries {
2022-08-22 16:53:06 -07:00
saveMRFToDisk ( true )
}
}
}
}
func ( p * ReplicationPool ) queueMRFSave ( entry MRFReplicateEntry ) {
2022-10-12 15:47:41 -07:00
if ! p . initialized ( ) {
2022-08-22 16:53:06 -07:00
return
}
2023-07-13 02:51:33 -04:00
if entry . RetryCount > mrfRetryLimit {
return
}
2022-08-22 16:53:06 -07:00
select {
case <- GlobalContext . Done ( ) :
return
2022-12-22 14:25:13 -08:00
case <- p . mrfStopCh :
return
2022-11-10 10:20:02 -08:00
default :
2022-12-22 14:25:13 -08:00
select {
case p . mrfSaveCh <- entry :
default :
}
2022-08-22 16:53:06 -07:00
}
}
2023-07-13 02:51:33 -04:00
// save mrf entries to nodenamehex.bin
2022-08-22 16:53:06 -07:00
func ( p * ReplicationPool ) saveMRFEntries ( ctx context . Context , entries map [ string ] MRFReplicateEntry ) error {
2022-10-12 15:47:41 -07:00
if ! p . initialized ( ) {
return nil
}
2022-08-22 16:53:06 -07:00
if len ( entries ) == 0 {
return nil
}
v := MRFReplicateEntries {
Entries : entries ,
Version : mrfMetaVersionV1 ,
}
data := make ( [ ] byte , 4 , v . Msgsize ( ) + 4 )
// Initialize the resync meta header.
2023-07-13 02:51:33 -04:00
binary . LittleEndian . PutUint16 ( data [ 0 : 2 ] , mrfMetaFormat )
binary . LittleEndian . PutUint16 ( data [ 2 : 4 ] , mrfMetaVersion )
2022-08-22 16:53:06 -07:00
buf , err := v . MarshalMsg ( data )
if err != nil {
return err
}
2023-07-13 02:51:33 -04:00
for _ , diskPath := range globalEndpoints . LocalDisksPaths ( ) {
// write to first drive
mrfDir := filepath . Join ( diskPath , minioMetaBucket , replicationMRFDir )
mrfFileName := filepath . Join ( mrfDir , globalLocalNodeNameHex + ".bin" )
if err := os . MkdirAll ( mrfDir , 0 o777 ) ; err != nil {
return err
}
file , err := OpenFile ( mrfFileName , os . O_CREATE | os . O_WRONLY | writeMode , 0 o666 )
if err != nil {
continue
}
defer file . Close ( )
if _ , err = file . Write ( buf ) ; err != nil {
return err
}
break
}
return nil
2022-08-22 16:53:06 -07:00
}
// load mrf entries from disk
func ( p * ReplicationPool ) loadMRF ( fileName string ) ( re MRFReplicateEntries , e error ) {
2022-10-12 15:47:41 -07:00
if ! p . initialized ( ) {
return re , nil
}
2023-07-13 02:51:33 -04:00
file , err := Open ( fileName )
if err != nil {
return re , err
}
defer file . Close ( )
2022-10-12 15:47:41 -07:00
2023-07-13 02:51:33 -04:00
data , err := io . ReadAll ( file )
if err != nil {
2022-08-22 16:53:06 -07:00
return re , err
}
if len ( data ) == 0 {
// Seems to be empty.
return re , nil
}
if len ( data ) <= 4 {
return re , fmt . Errorf ( "replication mrf: no data" )
}
// Read resync meta header
switch binary . LittleEndian . Uint16 ( data [ 0 : 2 ] ) {
case mrfMetaFormat :
default :
return re , fmt . Errorf ( "replication mrf: unknown format: %d" , binary . LittleEndian . Uint16 ( data [ 0 : 2 ] ) )
}
switch binary . LittleEndian . Uint16 ( data [ 2 : 4 ] ) {
case mrfMetaVersion :
default :
return re , fmt . Errorf ( "replication mrf: unknown version: %d" , binary . LittleEndian . Uint16 ( data [ 2 : 4 ] ) )
}
// OK, parse data.
if _ , err = re . UnmarshalMsg ( data [ 4 : ] ) ; err != nil {
return re , err
}
switch re . Version {
case mrfMetaVersionV1 :
default :
return re , fmt . Errorf ( "unexpected mrf meta version: %d" , re . Version )
}
return re , nil
}
func ( p * ReplicationPool ) processMRF ( ) {
2022-10-12 15:47:41 -07:00
if ! p . initialized ( ) {
2022-08-22 16:53:06 -07:00
return
}
2023-07-13 02:51:33 -04:00
pTimer := time . NewTimer ( mrfQueueInterval )
2022-08-22 16:53:06 -07:00
defer pTimer . Stop ( )
for {
select {
case <- pTimer . C :
// skip healing if all targets are offline
var offlineCnt int
tgts := globalBucketTargetSys . ListTargets ( p . ctx , "" , "" )
for _ , tgt := range tgts {
if globalBucketTargetSys . isOffline ( tgt . URL ( ) ) {
offlineCnt ++
}
}
if len ( tgts ) == offlineCnt {
2023-07-13 02:51:33 -04:00
pTimer . Reset ( mrfQueueInterval )
2022-08-22 16:53:06 -07:00
continue
}
2023-07-13 02:51:33 -04:00
if err := p . queueMRFHeal ( ) ; err != nil && ! osIsNotExist ( err ) {
2022-08-22 16:53:06 -07:00
logger . LogIf ( p . ctx , err )
}
2023-07-13 02:51:33 -04:00
pTimer . Reset ( mrfQueueInterval )
2022-08-22 16:53:06 -07:00
case <- p . ctx . Done ( ) :
return
}
}
}
// process sends error logs to the heal channel for an attempt to heal replication.
2023-07-13 02:51:33 -04:00
func ( p * ReplicationPool ) queueMRFHeal ( ) error {
2022-10-12 15:47:41 -07:00
if ! p . initialized ( ) {
2022-08-22 16:53:06 -07:00
return errServerNotInitialized
}
2023-07-13 02:51:33 -04:00
for _ , diskPath := range globalEndpoints . LocalDisksPaths ( ) {
fileName := filepath . Join ( diskPath , minioMetaBucket , replicationMRFDir , globalLocalNodeNameHex + ".bin" )
mrfRec , err := p . loadMRF ( fileName )
2022-08-22 16:53:06 -07:00
if err != nil {
2023-07-13 02:51:33 -04:00
return err
2022-08-22 16:53:06 -07:00
}
2023-07-13 02:51:33 -04:00
// finally delete the file after processing mrf entries
os . Remove ( fileName )
// queue replication heal in a goroutine to avoid holding up mrf save routine
go func ( mrfRec MRFReplicateEntries ) {
for vID , e := range mrfRec . Entries {
oi , err := p . objLayer . GetObjectInfo ( p . ctx , e . Bucket , e . Object , ObjectOptions {
VersionID : vID ,
} )
if err != nil {
continue
}
QueueReplicationHeal ( p . ctx , e . Bucket , oi , e . RetryCount )
}
} ( mrfRec )
break
2022-08-22 16:53:06 -07:00
}
2023-07-13 02:51:33 -04:00
2022-08-22 16:53:06 -07:00
return nil
}
2022-09-12 12:40:02 -07:00
// load replication stats from disk
func ( p * ReplicationPool ) loadStatsFromDisk ( ) ( rs map [ string ] BucketReplicationStats , e error ) {
2022-10-12 15:47:41 -07:00
if ! p . initialized ( ) {
return map [ string ] BucketReplicationStats { } , nil
}
2022-12-22 14:25:13 -08:00
data , err := readConfig ( p . ctx , p . objLayer , getReplicationStatsPath ( ) )
2022-09-12 12:40:02 -07:00
if err != nil {
2023-05-17 03:23:38 +08:00
if errors . Is ( err , errConfigNotFound ) {
2022-09-12 12:40:02 -07:00
return rs , nil
}
return rs , err
}
if len ( data ) <= 4 {
logger . LogIf ( p . ctx , fmt . Errorf ( "replication stats: no data" ) )
return map [ string ] BucketReplicationStats { } , nil
}
// Read repl stats meta header
switch binary . LittleEndian . Uint16 ( data [ 0 : 2 ] ) {
case replStatsMetaFormat :
default :
return rs , fmt . Errorf ( "replication stats: unknown format: %d" , binary . LittleEndian . Uint16 ( data [ 0 : 2 ] ) )
}
switch binary . LittleEndian . Uint16 ( data [ 2 : 4 ] ) {
case replStatsVersion :
default :
return rs , fmt . Errorf ( "replication stats: unknown version: %d" , binary . LittleEndian . Uint16 ( data [ 2 : 4 ] ) )
}
ss := BucketStatsMap { }
if _ , err = ss . UnmarshalMsg ( data [ 4 : ] ) ; err != nil {
return rs , err
}
rs = make ( map [ string ] BucketReplicationStats , len ( ss . Stats ) )
for bucket , st := range ss . Stats {
rs [ bucket ] = st . ReplicationStats
}
return rs , nil
}
2022-10-12 15:47:41 -07:00
func ( p * ReplicationPool ) initialized ( ) bool {
return ! ( p == nil || p . objLayer == nil )
}
2022-09-12 12:40:02 -07:00
func ( p * ReplicationPool ) saveStatsToDisk ( ) {
2022-10-12 15:47:41 -07:00
if ! p . initialized ( ) {
2022-09-12 12:40:02 -07:00
return
}
2022-12-22 14:25:13 -08:00
ctx , cancel := globalLeaderLock . GetLock ( p . ctx )
defer cancel ( )
2022-09-12 12:40:02 -07:00
sTimer := time . NewTimer ( replStatsSaveInterval )
defer sTimer . Stop ( )
for {
select {
case <- sTimer . C :
dui , err := loadDataUsageFromBackend ( GlobalContext , newObjectLayerFn ( ) )
if err == nil && ! dui . LastUpdate . IsZero ( ) {
globalReplicationStats . getAllLatest ( dui . BucketsUsage )
}
p . saveStats ( p . ctx )
sTimer . Reset ( replStatsSaveInterval )
2022-12-22 14:25:13 -08:00
case <- ctx . Done ( ) :
2022-09-12 12:40:02 -07:00
return
}
}
}
// save replication stats to .minio.sys/buckets/replication/node-name.stats
func ( p * ReplicationPool ) saveStats ( ctx context . Context ) error {
2022-10-12 15:47:41 -07:00
if ! p . initialized ( ) {
return nil
}
2022-11-04 17:59:14 +01:00
data , err := globalReplicationStats . serializeStats ( )
if data == nil {
2022-09-12 12:40:02 -07:00
return err
}
2022-12-22 14:25:13 -08:00
return saveConfig ( ctx , p . objLayer , getReplicationStatsPath ( ) , data )
2022-09-12 12:40:02 -07:00
}
2023-07-13 02:51:33 -04:00
// getMRF returns MRF entries for this node.
func ( p * ReplicationPool ) getMRF ( ctx context . Context , bucket string ) ( ch chan madmin . ReplicationMRF , err error ) {
mrfCh := make ( chan madmin . ReplicationMRF , 100 )
go func ( ) {
defer close ( mrfCh )
for _ , diskPath := range globalEndpoints . LocalDisksPaths ( ) {
file := filepath . Join ( diskPath , minioMetaBucket , replicationMRFDir , globalLocalNodeNameHex + ".bin" )
mrfRec , err := p . loadMRF ( file )
if err != nil {
break
}
for vID , e := range mrfRec . Entries {
if e . Bucket != bucket && bucket != "" {
continue
}
select {
case mrfCh <- madmin . ReplicationMRF {
NodeName : globalLocalNodeName ,
Object : e . Object ,
VersionID : vID ,
Bucket : e . Bucket ,
RetryCount : e . RetryCount ,
} :
case <- ctx . Done ( ) :
return
}
}
}
} ( )
return mrfCh , nil
}