2021-04-18 15:41:13 -04:00
// Copyright (c) 2015-2021 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2020-07-21 20:49:56 -04:00
package cmd
import (
"context"
2024-06-10 11:31:51 -04:00
"encoding/base64"
2022-02-10 13:16:52 -05:00
"encoding/binary"
"errors"
2020-09-15 23:44:48 -04:00
"fmt"
2021-06-30 10:44:24 -04:00
"io"
2022-11-14 10:16:40 -05:00
"math/rand"
2020-07-21 20:49:56 -04:00
"net/http"
2023-05-06 16:35:43 -04:00
"net/url"
2022-02-10 13:16:52 -05:00
"path"
2021-02-03 23:41:33 -05:00
"reflect"
2020-08-12 20:32:24 -04:00
"strings"
2021-03-09 05:56:42 -05:00
"sync"
2022-09-24 19:20:28 -04:00
"sync/atomic"
2020-07-21 20:49:56 -04:00
"time"
2022-01-12 01:32:29 -05:00
"github.com/dustin/go-humanize"
2023-06-19 20:53:08 -04:00
"github.com/minio/madmin-go/v3"
2021-08-23 11:16:18 -04:00
"github.com/minio/minio-go/v7"
2020-07-21 20:49:56 -04:00
"github.com/minio/minio-go/v7/pkg/encrypt"
"github.com/minio/minio-go/v7/pkg/tags"
2022-12-12 13:28:30 -05:00
"github.com/minio/minio/internal/amztime"
2021-06-01 17:59:40 -04:00
"github.com/minio/minio/internal/bucket/bandwidth"
2022-10-13 20:46:49 -04:00
objectlock "github.com/minio/minio/internal/bucket/object/lock"
2021-06-01 17:59:40 -04:00
"github.com/minio/minio/internal/bucket/replication"
"github.com/minio/minio/internal/config/storageclass"
"github.com/minio/minio/internal/crypto"
"github.com/minio/minio/internal/event"
2021-06-30 10:44:24 -04:00
"github.com/minio/minio/internal/hash"
2021-06-01 17:59:40 -04:00
xhttp "github.com/minio/minio/internal/http"
2024-01-28 13:04:17 -05:00
xioutil "github.com/minio/minio/internal/ioutil"
2021-06-01 17:59:40 -04:00
"github.com/minio/minio/internal/logger"
2023-09-11 23:59:11 -04:00
"github.com/tinylib/msgp/msgp"
2022-11-17 18:20:09 -05:00
"github.com/zeebo/xxh3"
2024-03-28 13:44:56 -04:00
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
2020-07-21 20:49:56 -04:00
)
2021-09-18 16:31:35 -04:00
const (
throttleDeadline = 1 * time . Hour
// ReplicationReset has reset id and timestamp of last reset operation
ReplicationReset = "replication-reset"
// ReplicationStatus has internal replication status - stringified representation of target's replication status for all replication
// activity initiated from this cluster
ReplicationStatus = "replication-status"
// ReplicationTimestamp - the last time replication was initiated on this cluster for this object version
ReplicationTimestamp = "replication-timestamp"
// ReplicaStatus - this header is present if a replica was received by this cluster for this object version
ReplicaStatus = "replica-status"
// ReplicaTimestamp - the last time a replica was received by this cluster for this object version
ReplicaTimestamp = "replica-timestamp"
// TaggingTimestamp - the last time a tag metadata modification happened on this cluster for this object version
TaggingTimestamp = "tagging-timestamp"
// ObjectLockRetentionTimestamp - the last time a object lock metadata modification happened on this cluster for this object version
ObjectLockRetentionTimestamp = "objectlock-retention-timestamp"
// ObjectLockLegalHoldTimestamp - the last time a legal hold metadata modification happened on this cluster for this object version
ObjectLockLegalHoldTimestamp = "objectlock-legalhold-timestamp"
2024-06-10 11:31:51 -04:00
// ReplicationSsecChecksumHeader - the encrypted checksum of the SSE-C encrypted object.
2024-06-10 13:40:33 -04:00
ReplicationSsecChecksumHeader = "X-Minio-Replication-Ssec-Crc"
2021-09-18 16:31:35 -04:00
)
2021-07-28 18:20:01 -04:00
2020-07-30 22:55:22 -04:00
// gets replication config associated to a given bucket name.
func getReplicationConfig ( ctx context . Context , bucketName string ) ( rc * replication . Config , err error ) {
2022-04-24 05:36:31 -04:00
rCfg , _ , err := globalBucketMetadataSys . GetReplicationConfig ( ctx , bucketName )
2024-04-22 13:49:30 -04:00
if err != nil && ! errors . Is ( err , BucketReplicationConfigNotFound { Bucket : bucketName } ) {
return rCfg , err
2022-09-14 00:23:33 -04:00
}
2024-04-22 13:49:30 -04:00
return rCfg , nil
2020-07-21 20:49:56 -04:00
}
2020-07-30 22:55:22 -04:00
// validateReplicationDestination returns error if replication destination bucket missing or not configured
2020-07-21 20:49:56 -04:00
// It also returns true if replication destination is same as this server.
2022-05-26 20:57:23 -04:00
func validateReplicationDestination ( ctx context . Context , bucket string , rCfg * replication . Config , checkRemote bool ) ( bool , APIError ) {
2021-09-18 16:31:35 -04:00
var arns [ ] string
if rCfg . RoleArn != "" {
arns = append ( arns , rCfg . RoleArn )
} else {
for _ , rule := range rCfg . Rules {
arns = append ( arns , rule . Destination . String ( ) )
}
2020-07-21 20:49:56 -04:00
}
2022-05-26 20:57:23 -04:00
var sameTarget bool
2021-09-18 16:31:35 -04:00
for _ , arnStr := range arns {
arn , err := madmin . ParseARN ( arnStr )
if err != nil {
2022-05-26 20:57:23 -04:00
return sameTarget , errorCodes . ToAPIErrWithErr ( ErrBucketRemoteArnInvalid , err )
2021-09-18 16:31:35 -04:00
}
if arn . Type != madmin . ReplicationService {
2022-05-26 20:57:23 -04:00
return sameTarget , toAPIError ( ctx , BucketRemoteArnTypeInvalid { Bucket : bucket } )
2021-09-18 16:31:35 -04:00
}
2023-10-28 00:08:53 -04:00
clnt := globalBucketTargetSys . GetRemoteTargetClient ( bucket , arnStr )
2021-09-18 16:31:35 -04:00
if clnt == nil {
2022-05-26 20:57:23 -04:00
return sameTarget , toAPIError ( ctx , BucketRemoteTargetNotFound { Bucket : bucket } )
2021-09-18 16:31:35 -04:00
}
2022-05-26 20:57:23 -04:00
if checkRemote { // validate remote bucket
2022-10-13 20:46:49 -04:00
found , err := clnt . BucketExists ( ctx , arn . Bucket )
if err != nil {
2022-05-26 20:57:23 -04:00
return sameTarget , errorCodes . ToAPIErrWithErr ( ErrRemoteDestinationNotFoundError , err )
}
2022-10-13 20:46:49 -04:00
if ! found {
return sameTarget , errorCodes . ToAPIErrWithErr ( ErrRemoteDestinationNotFoundError , BucketRemoteTargetNotFound { Bucket : arn . Bucket } )
}
2022-05-26 20:57:23 -04:00
if ret , err := globalBucketObjectLockSys . Get ( bucket ) ; err == nil {
if ret . LockEnabled {
lock , _ , _ , _ , err := clnt . GetObjectLockConfig ( ctx , arn . Bucket )
2022-10-13 20:46:49 -04:00
if err != nil {
2022-05-26 20:57:23 -04:00
return sameTarget , errorCodes . ToAPIErrWithErr ( ErrReplicationDestinationMissingLock , err )
}
2022-10-13 20:46:49 -04:00
if lock != objectlock . Enabled {
return sameTarget , errorCodes . ToAPIErrWithErr ( ErrReplicationDestinationMissingLock , nil )
}
2021-09-18 16:31:35 -04:00
}
2020-08-05 02:02:27 -04:00
}
}
2021-09-18 16:31:35 -04:00
// validate replication ARN against target endpoint
2023-10-28 00:08:53 -04:00
c := globalBucketTargetSys . GetRemoteTargetClient ( bucket , arnStr )
2022-12-22 04:31:20 -05:00
if c != nil {
2023-05-06 16:35:43 -04:00
if err := checkRemoteEndpoint ( ctx , c . EndpointURL ( ) ) ; err != nil {
switch err . ( type ) {
case BucketRemoteIdenticalToSource :
return true , errorCodes . ToAPIErrWithErr ( ErrBucketRemoteIdenticalToSource , fmt . Errorf ( "remote target endpoint %s is self referential" , c . EndpointURL ( ) . String ( ) ) )
default :
}
}
2021-09-18 16:31:35 -04:00
if c . EndpointURL ( ) . String ( ) == clnt . EndpointURL ( ) . String ( ) {
2022-05-26 20:57:23 -04:00
selfTarget , _ := isLocalHost ( clnt . EndpointURL ( ) . Hostname ( ) , clnt . EndpointURL ( ) . Port ( ) , globalMinioPort )
if ! sameTarget {
sameTarget = selfTarget
}
continue
2021-09-18 16:31:35 -04:00
}
2020-07-21 20:49:56 -04:00
}
}
2022-05-26 20:57:23 -04:00
if len ( arns ) == 0 {
return false , toAPIError ( ctx , BucketRemoteTargetNotFound { Bucket : bucket } )
}
return sameTarget , toAPIError ( ctx , nil )
2020-07-21 20:49:56 -04:00
}
2023-05-06 16:35:43 -04:00
// performs a http request to remote endpoint to check if deployment id of remote endpoint is same as
// local cluster deployment id. This is to prevent replication to self, especially in case of a loadbalancer
// in front of MinIO.
func checkRemoteEndpoint ( ctx context . Context , epURL * url . URL ) error {
reqURL := & url . URL {
Scheme : epURL . Scheme ,
Host : epURL . Host ,
2023-06-20 12:27:54 -04:00
Path : healthCheckPathPrefix + healthCheckReadinessPath ,
2023-05-06 16:35:43 -04:00
}
req , err := http . NewRequestWithContext ( ctx , http . MethodGet , reqURL . String ( ) , nil )
if err != nil {
return err
}
client := & http . Client {
2024-01-30 02:05:39 -05:00
Transport : globalRemoteTargetTransport ,
2023-05-06 16:35:43 -04:00
Timeout : 10 * time . Second ,
}
resp , err := client . Do ( req )
if err != nil {
return err
}
if err == nil {
// Drain the connection.
xhttp . DrainBody ( resp . Body )
}
if resp != nil {
amzid := resp . Header . Get ( xhttp . AmzRequestHostID )
if _ , ok := globalNodeNamesHex [ amzid ] ; ok {
return BucketRemoteIdenticalToSource {
Endpoint : epURL . String ( ) ,
}
}
}
return nil
}
2021-06-01 22:59:11 -04:00
type mustReplicateOptions struct {
2021-09-18 16:31:35 -04:00
meta map [ string ] string
status replication . StatusType
opType replication . Type
replicationRequest bool // incoming request is a replication request
2021-06-01 22:59:11 -04:00
}
func ( o mustReplicateOptions ) ReplicationStatus ( ) ( s replication . StatusType ) {
if rs , ok := o . meta [ xhttp . AmzBucketReplicationStatus ] ; ok {
return replication . StatusType ( rs )
}
return s
}
2022-01-02 12:15:06 -05:00
2021-06-01 22:59:11 -04:00
func ( o mustReplicateOptions ) isExistingObjectReplication ( ) bool {
return o . opType == replication . ExistingObjectReplicationType
}
func ( o mustReplicateOptions ) isMetadataReplication ( ) bool {
return o . opType == replication . MetadataReplicationType
}
2022-01-02 12:15:06 -05:00
2023-09-16 05:28:06 -04:00
func ( o ObjectInfo ) getMustReplicateOptions ( op replication . Type , opts ObjectOptions ) mustReplicateOptions {
return getMustReplicateOptions ( o . UserDefined , o . UserTags , o . ReplicationStatus , op , opts )
}
func getMustReplicateOptions ( userDefined map [ string ] string , userTags string , status replication . StatusType , op replication . Type , opts ObjectOptions ) mustReplicateOptions {
meta := cloneMSS ( userDefined )
if userTags != "" {
meta [ xhttp . AmzObjectTagging ] = userTags
2021-06-01 22:59:11 -04:00
}
2021-09-18 16:31:35 -04:00
2021-06-01 22:59:11 -04:00
return mustReplicateOptions {
2021-09-18 16:31:35 -04:00
meta : meta ,
2023-09-16 05:28:06 -04:00
status : status ,
2021-09-18 16:31:35 -04:00
opType : op ,
replicationRequest : opts . ReplicationRequest ,
2021-06-01 22:59:11 -04:00
}
}
2021-04-29 22:01:43 -04:00
2021-01-12 01:36:51 -05:00
// mustReplicate returns 2 booleans - true if object meets replication criteria and true if replication is to be done in
// a synchronous manner.
2021-09-18 16:31:35 -04:00
func mustReplicate ( ctx context . Context , bucket , object string , mopts mustReplicateOptions ) ( dsc ReplicateDecision ) {
2022-05-31 05:57:57 -04:00
// object layer not initialized we return with no decision.
if newObjectLayerFn ( ) == nil {
return
}
2022-05-06 22:05:28 -04:00
// Disable server-side replication on object prefixes which are excluded
// from versioning via the MinIO bucket versioning extension.
2022-08-24 16:46:29 -04:00
if ! globalBucketVersioningSys . PrefixEnabled ( bucket , object ) {
2022-05-06 22:05:28 -04:00
return
}
2022-05-31 05:57:57 -04:00
2021-06-01 22:59:11 -04:00
replStatus := mopts . ReplicationStatus ( )
if replStatus == replication . Replica && ! mopts . isMetadataReplication ( ) {
2021-09-18 16:31:35 -04:00
return
}
if mopts . replicationRequest { // incoming replication request on target cluster
return
2020-07-21 20:49:56 -04:00
}
2024-04-22 13:49:30 -04:00
2020-07-30 22:55:22 -04:00
cfg , err := getReplicationConfig ( ctx , bucket )
2020-07-21 20:49:56 -04:00
if err != nil {
2024-04-22 13:49:30 -04:00
replLogOnceIf ( ctx , err , bucket )
2021-09-18 16:31:35 -04:00
return
2020-07-21 20:49:56 -04:00
}
2024-04-22 13:49:30 -04:00
if cfg == nil {
return
}
2020-07-21 20:49:56 -04:00
opts := replication . ObjectOpts {
2021-06-01 22:59:11 -04:00
Name : object ,
SSEC : crypto . SSEC . IsEncrypted ( mopts . meta ) ,
Replica : replStatus == replication . Replica ,
ExistingObject : mopts . isExistingObjectReplication ( ) ,
2020-07-21 20:49:56 -04:00
}
2021-06-01 22:59:11 -04:00
tagStr , ok := mopts . meta [ xhttp . AmzObjectTagging ]
2020-07-21 20:49:56 -04:00
if ok {
opts . UserTags = tagStr
}
2021-09-18 16:31:35 -04:00
tgtArns := cfg . FilterTargetArns ( opts )
for _ , tgtArn := range tgtArns {
2023-10-28 00:08:53 -04:00
tgt := globalBucketTargetSys . GetRemoteTargetClient ( bucket , tgtArn )
2021-09-18 16:31:35 -04:00
// the target online status should not be used here while deciding
// whether to replicate as the target could be temporarily down
opts . TargetArn = tgtArn
replicate := cfg . Replicate ( opts )
var synchronous bool
if tgt != nil {
synchronous = tgt . replicateSync
}
dsc . Set ( newReplicateTargetDecision ( tgtArn , replicate , synchronous ) )
2021-01-12 01:36:51 -05:00
}
2021-09-18 16:31:35 -04:00
return dsc
2021-01-12 01:36:51 -05:00
}
// Standard headers that needs to be extracted from User metadata.
var standardHeaders = [ ] string {
2021-01-27 14:22:34 -05:00
xhttp . ContentType ,
xhttp . CacheControl ,
xhttp . ContentEncoding ,
xhttp . ContentLanguage ,
xhttp . ContentDisposition ,
2021-01-12 01:36:51 -05:00
xhttp . AmzStorageClass ,
xhttp . AmzObjectTagging ,
xhttp . AmzBucketReplicationStatus ,
2021-01-27 14:22:34 -05:00
xhttp . AmzObjectLockMode ,
xhttp . AmzObjectLockRetainUntilDate ,
xhttp . AmzObjectLockLegalHold ,
xhttp . AmzTagCount ,
xhttp . AmzServerSideEncryption ,
2020-07-21 20:49:56 -04:00
}
2020-11-19 21:43:58 -05:00
// returns true if any of the objects being deleted qualifies for replication.
func hasReplicationRules ( ctx context . Context , bucket string , objects [ ] ObjectToDelete ) bool {
c , err := getReplicationConfig ( ctx , bucket )
if err != nil || c == nil {
2024-04-22 13:49:30 -04:00
replLogOnceIf ( ctx , err , bucket )
2020-11-19 21:43:58 -05:00
return false
}
for _ , obj := range objects {
if c . HasActiveRules ( obj . ObjectName , true ) {
return true
}
}
return false
}
2021-01-12 01:36:51 -05:00
// isStandardHeader returns true if header is a supported header and not a custom header
2021-02-03 23:41:33 -05:00
func isStandardHeader ( matchHeaderKey string ) bool {
return equals ( matchHeaderKey , standardHeaders ... )
2021-01-12 01:36:51 -05:00
}
2020-11-19 21:43:58 -05:00
// returns whether object version is a deletemarker and if object qualifies for replication
2021-09-18 16:31:35 -04:00
func checkReplicateDelete ( ctx context . Context , bucket string , dobj ObjectToDelete , oi ObjectInfo , delOpts ObjectOptions , gerr error ) ( dsc ReplicateDecision ) {
2020-11-19 21:43:58 -05:00
rcfg , err := getReplicationConfig ( ctx , bucket )
if err != nil || rcfg == nil {
2024-04-22 13:49:30 -04:00
replLogOnceIf ( ctx , err , bucket )
2021-09-18 16:31:35 -04:00
return
}
// If incoming request is a replication request, it does not need to be re-replicated.
if delOpts . ReplicationRequest {
return
2020-11-19 21:43:58 -05:00
}
2022-05-06 22:05:28 -04:00
// Skip replication if this object's prefix is excluded from being
// versioned.
2022-05-08 01:06:44 -04:00
if ! delOpts . Versioned {
2022-05-06 22:05:28 -04:00
return
}
2021-02-18 19:35:37 -05:00
opts := replication . ObjectOpts {
Name : dobj . ObjectName ,
SSEC : crypto . SSEC . IsEncrypted ( oi . UserDefined ) ,
UserTags : oi . UserTags ,
DeleteMarker : oi . DeleteMarker ,
VersionID : dobj . VersionID ,
2021-03-13 13:28:35 -05:00
OpType : replication . DeleteReplicationType ,
2021-02-18 19:35:37 -05:00
}
2021-09-18 16:31:35 -04:00
tgtArns := rcfg . FilterTargetArns ( opts )
2023-09-16 05:28:06 -04:00
dsc . targetsMap = make ( map [ string ] replicateTargetDecision , len ( tgtArns ) )
if len ( tgtArns ) == 0 {
return dsc
}
var sync , replicate bool
for _ , tgtArn := range tgtArns {
opts . TargetArn = tgtArn
replicate = rcfg . Replicate ( opts )
// when incoming delete is removal of a delete marker(a.k.a versioned delete),
// GetObjectInfo returns extra information even though it returns errFileNotFound
if gerr != nil {
validReplStatus := false
switch oi . TargetReplicationStatus ( tgtArn ) {
case replication . Pending , replication . Completed , replication . Failed :
validReplStatus = true
2021-09-18 16:31:35 -04:00
}
2023-09-16 05:28:06 -04:00
if oi . DeleteMarker && ( validReplStatus || replicate ) {
dsc . Set ( newReplicateTargetDecision ( tgtArn , replicate , sync ) )
continue
2021-09-18 16:31:35 -04:00
}
2023-12-06 21:17:03 -05:00
// can be the case that other cluster is down and duplicate `mc rm --vid`
// is issued - this still needs to be replicated back to the other target
if ! oi . VersionPurgeStatus . Empty ( ) {
replicate = oi . VersionPurgeStatus == Pending || oi . VersionPurgeStatus == Failed
dsc . Set ( newReplicateTargetDecision ( tgtArn , replicate , sync ) )
}
continue
2021-09-18 16:31:35 -04:00
}
2023-10-28 00:08:53 -04:00
tgt := globalBucketTargetSys . GetRemoteTargetClient ( bucket , tgtArn )
2023-09-16 05:28:06 -04:00
// the target online status should not be used here while deciding
// whether to replicate deletes as the target could be temporarily down
tgtDsc := newReplicateTargetDecision ( tgtArn , false , false )
if tgt != nil {
tgtDsc = newReplicateTargetDecision ( tgtArn , replicate , tgt . replicateSync )
}
dsc . Set ( tgtDsc )
2021-09-18 16:31:35 -04:00
}
return dsc
2020-11-19 21:43:58 -05:00
}
// replicate deletes to the designated replication target if replication configuration
// has delete marker replication or delete replication (MinIO extension to allow deletes where version id
// is specified) enabled.
// Similar to bucket replication for PUT operation, soft delete (a.k.a setting delete marker) and
// permanent deletes (by specifying a version ID in the delete operation) have three states "Pending", "Complete"
// and "Failed" to mark the status of the replication of "DELETE" operation. All failed operations can
// then be retried by healing. In the case of permanent deletes, until the replication is completed on the
// target cluster, the object version is marked deleted on the source and hidden from listing. It is permanently
// deleted from the source when the VersionPurgeStatus changes to "Complete", i.e after replication succeeds
// on target.
2022-07-12 13:43:32 -04:00
func replicateDelete ( ctx context . Context , dobj DeletedObjectReplicationInfo , objectAPI ObjectLayer ) {
2021-09-18 16:31:35 -04:00
var replicationStatus replication . StatusType
2020-11-19 21:43:58 -05:00
bucket := dobj . Bucket
2021-02-03 23:41:33 -05:00
versionID := dobj . DeleteMarkerVersionID
if versionID == "" {
versionID = dobj . VersionID
}
2021-07-01 17:02:44 -04:00
defer func ( ) {
2021-09-18 16:31:35 -04:00
replStatus := string ( replicationStatus )
2022-10-24 14:35:07 -04:00
auditLogInternal ( context . Background ( ) , AuditLogOptions {
2022-07-12 13:43:32 -04:00
Event : dobj . EventType ,
2021-07-01 17:02:44 -04:00
APIName : ReplicateDeleteAPI ,
2022-10-24 14:35:07 -04:00
Bucket : bucket ,
Object : dobj . ObjectName ,
2021-07-01 17:02:44 -04:00
VersionID : versionID ,
Status : replStatus ,
} )
} ( )
2020-11-19 21:43:58 -05:00
rcfg , err := getReplicationConfig ( ctx , bucket )
if err != nil || rcfg == nil {
2024-04-04 08:04:40 -04:00
replLogOnceIf ( ctx , fmt . Errorf ( "unable to obtain replication config for bucket: %s: err: %s" , bucket , err ) , bucket )
2021-02-03 23:41:33 -05:00
sendEvent ( eventArgs {
BucketName : bucket ,
Object : ObjectInfo {
Bucket : bucket ,
Name : dobj . ObjectName ,
VersionID : versionID ,
DeleteMarker : dobj . DeleteMarker ,
} ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 23:41:33 -05:00
EventName : event . ObjectReplicationNotTracked ,
} )
2020-11-19 21:43:58 -05:00
return
}
2023-08-08 16:27:40 -04:00
dsc , err := parseReplicateDecision ( ctx , bucket , dobj . ReplicationState . ReplicateDecisionStr )
2021-09-18 16:31:35 -04:00
if err != nil {
2024-04-04 08:04:40 -04:00
replLogOnceIf ( ctx , fmt . Errorf ( "unable to parse replication decision parameters for bucket: %s, err: %s, decision: %s" ,
2023-08-08 16:27:40 -04:00
bucket , err , dobj . ReplicationState . ReplicateDecisionStr ) , dobj . ReplicationState . ReplicateDecisionStr )
2021-02-03 23:41:33 -05:00
sendEvent ( eventArgs {
BucketName : bucket ,
Object : ObjectInfo {
Bucket : bucket ,
Name : dobj . ObjectName ,
VersionID : versionID ,
DeleteMarker : dobj . DeleteMarker ,
} ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 23:41:33 -05:00
EventName : event . ObjectReplicationNotTracked ,
} )
2020-11-19 21:43:58 -05:00
return
}
2021-02-03 23:41:33 -05:00
2021-08-23 11:16:18 -04:00
// Lock the object name before starting replication operation.
// Use separate lock that doesn't collide with regular objects.
lk := objectAPI . NewNSLock ( bucket , "/[replicate]/" + dobj . ObjectName )
lkctx , err := lk . GetLock ( ctx , globalOperationTimeout )
if err != nil {
2022-08-22 19:53:06 -04:00
globalReplicationPool . queueMRFSave ( dobj . ToMRFEntry ( ) )
2021-08-23 11:16:18 -04:00
sendEvent ( eventArgs {
BucketName : bucket ,
Object : ObjectInfo {
Bucket : bucket ,
Name : dobj . ObjectName ,
VersionID : versionID ,
DeleteMarker : dobj . DeleteMarker ,
} ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-08-23 11:16:18 -04:00
EventName : event . ObjectReplicationNotTracked ,
} )
return
}
ctx = lkctx . Context ( )
2022-12-23 22:49:07 -05:00
defer lk . Unlock ( lkctx )
2020-11-19 21:43:58 -05:00
2023-09-16 05:28:06 -04:00
rinfos := replicatedInfos { Targets : make ( [ ] replicatedTargetInfo , 0 , len ( dsc . targetsMap ) ) }
2021-09-18 16:31:35 -04:00
var wg sync . WaitGroup
2023-09-16 05:28:06 -04:00
var mu sync . Mutex
2023-08-08 16:27:40 -04:00
for _ , tgtEntry := range dsc . targetsMap {
if ! tgtEntry . Replicate {
2021-09-18 16:31:35 -04:00
continue
2020-11-19 21:43:58 -05:00
}
2021-09-18 16:31:35 -04:00
// if dobj.TargetArn is not empty string, this is a case of specific target being re-synced.
2023-08-08 16:27:40 -04:00
if dobj . TargetArn != "" && dobj . TargetArn != tgtEntry . Arn {
2021-09-18 16:31:35 -04:00
continue
}
2023-10-28 00:08:53 -04:00
tgtClnt := globalBucketTargetSys . GetRemoteTargetClient ( bucket , tgtEntry . Arn )
2023-09-16 05:28:06 -04:00
if tgtClnt == nil {
2024-01-18 02:03:17 -05:00
// Skip stale targets if any and log them to be missing at least once.
2024-04-04 08:04:40 -04:00
replLogOnceIf ( ctx , fmt . Errorf ( "failed to get target for bucket:%s arn:%s" , bucket , tgtEntry . Arn ) , tgtEntry . Arn )
2023-09-16 05:28:06 -04:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : ObjectInfo {
Bucket : bucket ,
Name : dobj . ObjectName ,
VersionID : versionID ,
DeleteMarker : dobj . DeleteMarker ,
} ,
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
} )
continue
}
2021-09-18 16:31:35 -04:00
wg . Add ( 1 )
2023-09-16 05:28:06 -04:00
go func ( tgt * TargetClient ) {
2021-09-18 16:31:35 -04:00
defer wg . Done ( )
2023-09-16 05:28:06 -04:00
tgtInfo := replicateDeleteToTarget ( ctx , dobj , tgt )
mu . Lock ( )
rinfos . Targets = append ( rinfos . Targets , tgtInfo )
mu . Unlock ( )
} ( tgtClnt )
2020-11-19 21:43:58 -05:00
}
2021-09-18 16:31:35 -04:00
wg . Wait ( )
replicationStatus = rinfos . ReplicationStatus ( )
prevStatus := dobj . DeleteMarkerReplicationStatus ( )
2021-04-03 12:03:42 -04:00
if dobj . VersionID != "" {
2021-09-18 16:31:35 -04:00
prevStatus = replication . StatusType ( dobj . VersionPurgeStatus ( ) )
replicationStatus = replication . StatusType ( rinfos . VersionPurgeStatus ( ) )
2021-04-03 12:03:42 -04:00
}
2021-09-18 16:31:35 -04:00
2021-04-04 18:34:33 -04:00
// to decrement pending count later.
2021-09-18 16:31:35 -04:00
for _ , rinfo := range rinfos . Targets {
if rinfo . ReplicationStatus != rinfo . PrevReplicationStatus {
2023-08-30 04:00:59 -04:00
globalReplicationStats . Update ( dobj . Bucket , rinfo , replicationStatus ,
prevStatus )
2021-09-18 16:31:35 -04:00
}
}
2021-01-25 17:04:41 -05:00
2022-01-02 12:15:06 -05:00
eventName := event . ObjectReplicationComplete
2021-09-18 16:31:35 -04:00
if replicationStatus == replication . Failed {
2020-11-22 02:48:50 -05:00
eventName = event . ObjectReplicationFailed
2022-08-22 19:53:06 -04:00
globalReplicationPool . queueMRFSave ( dobj . ToMRFEntry ( ) )
2020-11-22 02:48:50 -05:00
}
2021-09-18 16:31:35 -04:00
drs := getReplicationState ( rinfos , dobj . ReplicationState , dobj . VersionID )
2022-07-21 14:05:44 -04:00
if replicationStatus != prevStatus {
drs . ReplicationTimeStamp = UTCNow ( )
}
2021-02-03 23:41:33 -05:00
dobjInfo , err := objectAPI . DeleteObject ( ctx , bucket , dobj . ObjectName , ObjectOptions {
2021-09-18 16:31:35 -04:00
VersionID : versionID ,
MTime : dobj . DeleteMarkerMTime . Time ,
DeleteReplication : drs ,
2022-05-06 22:05:28 -04:00
Versioned : globalBucketVersioningSys . PrefixEnabled ( bucket , dobj . ObjectName ) ,
2022-08-24 16:46:29 -04:00
// Objects matching prefixes should not leave delete markers,
// dramatically reduces namespace pollution while keeping the
// benefits of replication, make sure to apply version suspension
// only at bucket level instead.
VersionSuspended : globalBucketVersioningSys . Suspended ( bucket ) ,
2021-01-25 17:04:41 -05:00
} )
2021-02-09 18:11:43 -05:00
if err != nil && ! isErrVersionNotFound ( err ) { // VersionNotFound would be reported by pool that object version is missing on.
2021-02-03 23:41:33 -05:00
sendEvent ( eventArgs {
BucketName : bucket ,
Object : ObjectInfo {
Bucket : bucket ,
Name : dobj . ObjectName ,
VersionID : versionID ,
DeleteMarker : dobj . DeleteMarker ,
} ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 23:41:33 -05:00
EventName : eventName ,
} )
} else {
sendEvent ( eventArgs {
BucketName : bucket ,
Object : dobjInfo ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 23:41:33 -05:00
EventName : eventName ,
} )
2020-11-19 21:43:58 -05:00
}
}
2022-03-16 19:59:43 -04:00
func replicateDeleteToTarget ( ctx context . Context , dobj DeletedObjectReplicationInfo , tgt * TargetClient ) ( rinfo replicatedTargetInfo ) {
2021-09-18 16:31:35 -04:00
versionID := dobj . DeleteMarkerVersionID
if versionID == "" {
versionID = dobj . VersionID
}
rinfo = dobj . ReplicationState . targetState ( tgt . ARN )
rinfo . OpType = dobj . OpType
2023-08-30 04:00:59 -04:00
rinfo . endpoint = tgt . EndpointURL ( ) . Host
rinfo . secure = tgt . EndpointURL ( ) . Scheme == "https"
2021-09-18 16:31:35 -04:00
defer func ( ) {
if rinfo . ReplicationStatus == replication . Completed && tgt . ResetID != "" && dobj . OpType == replication . ExistingObjectReplicationType {
rinfo . ResyncTimestamp = fmt . Sprintf ( "%s;%s" , UTCNow ( ) . Format ( http . TimeFormat ) , tgt . ResetID )
}
} ( )
if dobj . VersionID == "" && rinfo . PrevReplicationStatus == replication . Completed && dobj . OpType != replication . ExistingObjectReplicationType {
rinfo . ReplicationStatus = rinfo . PrevReplicationStatus
return
}
if dobj . VersionID != "" && rinfo . VersionPurgeStatus == Complete {
return
}
2022-08-16 20:46:22 -04:00
if globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
2024-04-04 08:04:40 -04:00
replLogOnceIf ( ctx , fmt . Errorf ( "remote target is offline for bucket:%s arn:%s" , dobj . Bucket , tgt . ARN ) , "replication-target-offline-delete-" + tgt . ARN )
2021-09-18 16:31:35 -04:00
sendEvent ( eventArgs {
BucketName : dobj . Bucket ,
Object : ObjectInfo {
Bucket : dobj . Bucket ,
Name : dobj . ObjectName ,
VersionID : dobj . VersionID ,
DeleteMarker : dobj . DeleteMarker ,
} ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-09-18 16:31:35 -04:00
EventName : event . ObjectReplicationNotTracked ,
} )
if dobj . VersionID == "" {
rinfo . ReplicationStatus = replication . Failed
} else {
rinfo . VersionPurgeStatus = Failed
}
return
}
2021-12-16 18:34:55 -05:00
// early return if already replicated delete marker for existing object replication/ healing delete markers
2022-10-13 19:43:36 -04:00
if dobj . DeleteMarkerVersionID != "" {
2022-12-05 14:18:50 -05:00
toi , err := tgt . StatObject ( ctx , tgt . Bucket , dobj . ObjectName , minio . StatObjectOptions {
2021-09-18 16:31:35 -04:00
VersionID : versionID ,
2022-12-05 14:18:50 -05:00
Internal : minio . AdvancedGetOptions {
2022-10-21 17:45:06 -04:00
ReplicationProxyRequest : "false" ,
IsReplicationReadyForDeleteMarker : true ,
2022-01-02 12:15:06 -05:00
} ,
2022-10-13 19:43:36 -04:00
} )
2023-11-21 00:33:47 -05:00
serr := ErrorRespToObjectError ( err , dobj . Bucket , dobj . ObjectName , dobj . VersionID )
2023-07-10 10:57:56 -04:00
switch {
2023-11-21 00:33:47 -05:00
case isErrMethodNotAllowed ( serr ) :
2023-07-10 10:57:56 -04:00
// delete marker already replicated
if dobj . VersionID == "" && rinfo . VersionPurgeStatus . Empty ( ) {
2021-09-18 16:31:35 -04:00
rinfo . ReplicationStatus = replication . Completed
2021-12-16 18:34:55 -05:00
return
2021-09-18 16:31:35 -04:00
}
2023-11-21 00:33:47 -05:00
case isErrObjectNotFound ( serr ) , isErrVersionNotFound ( serr ) :
2023-07-10 10:57:56 -04:00
// version being purged is already not found on target.
if ! rinfo . VersionPurgeStatus . Empty ( ) {
rinfo . VersionPurgeStatus = Complete
return
}
2023-11-21 00:33:47 -05:00
case isErrReadQuorum ( serr ) , isErrWriteQuorum ( serr ) :
// destination has some quorum issues, perform removeObject() anyways
// to complete the operation.
2023-07-10 10:57:56 -04:00
default :
2023-08-24 12:24:26 -04:00
if err != nil && minio . IsNetworkOrHostDown ( err , true ) && ! globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
globalBucketTargetSys . markOffline ( tgt . EndpointURL ( ) )
}
2023-03-08 10:03:29 -05:00
// mark delete marker replication as failed if target cluster not ready to receive
// this request yet (object version not replicated yet)
if err != nil && ! toi . ReplicationReady {
rinfo . ReplicationStatus = replication . Failed
2023-08-30 04:00:59 -04:00
rinfo . Err = err
2023-03-08 10:03:29 -05:00
return
}
2022-10-13 19:43:36 -04:00
}
2021-09-18 16:31:35 -04:00
}
2022-12-05 14:18:50 -05:00
rmErr := tgt . RemoveObject ( ctx , tgt . Bucket , dobj . ObjectName , minio . RemoveObjectOptions {
2021-09-18 16:31:35 -04:00
VersionID : versionID ,
2022-12-05 14:18:50 -05:00
Internal : minio . AdvancedRemoveOptions {
2021-09-18 16:31:35 -04:00
ReplicationDeleteMarker : dobj . DeleteMarkerVersionID != "" ,
ReplicationMTime : dobj . DeleteMarkerMTime . Time ,
2022-12-05 14:18:50 -05:00
ReplicationStatus : minio . ReplicationStatusReplica ,
2021-09-18 16:31:35 -04:00
ReplicationRequest : true , // always set this to distinguish between `mc mirror` replication and serverside
} ,
} )
if rmErr != nil {
2023-08-30 04:00:59 -04:00
rinfo . Err = rmErr
2021-09-18 16:31:35 -04:00
if dobj . VersionID == "" {
rinfo . ReplicationStatus = replication . Failed
} else {
rinfo . VersionPurgeStatus = Failed
}
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "unable to replicate delete marker to %s: %s/%s(%s): %w" , tgt . EndpointURL ( ) , tgt . Bucket , dobj . ObjectName , versionID , rmErr ) )
2023-08-24 12:24:26 -04:00
if rmErr != nil && minio . IsNetworkOrHostDown ( rmErr , true ) && ! globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
globalBucketTargetSys . markOffline ( tgt . EndpointURL ( ) )
}
2021-09-18 16:31:35 -04:00
} else {
if dobj . VersionID == "" {
rinfo . ReplicationStatus = replication . Completed
} else {
rinfo . VersionPurgeStatus = Complete
}
}
return
}
func getCopyObjMetadata ( oi ObjectInfo , sc string ) map [ string ] string {
2020-11-19 14:50:22 -05:00
meta := make ( map [ string ] string , len ( oi . UserDefined ) )
for k , v := range oi . UserDefined {
2023-07-06 19:02:08 -04:00
if stringsHasPrefixFold ( k , ReservedMetadataPrefixLower ) {
2020-11-19 14:50:22 -05:00
continue
}
2021-02-03 23:41:33 -05:00
if equals ( k , xhttp . AmzBucketReplicationStatus ) {
continue
}
// https://github.com/google/security-research/security/advisories/GHSA-76wf-9vgp-pj7w
if equals ( k , xhttp . AmzMetaUnencryptedContentLength , xhttp . AmzMetaUnencryptedContentMD5 ) {
2020-11-19 14:50:22 -05:00
continue
}
meta [ k ] = v
}
2021-02-03 23:41:33 -05:00
2020-11-19 14:50:22 -05:00
if oi . ContentEncoding != "" {
meta [ xhttp . ContentEncoding ] = oi . ContentEncoding
}
2021-02-03 23:41:33 -05:00
2020-11-19 14:50:22 -05:00
if oi . ContentType != "" {
meta [ xhttp . ContentType ] = oi . ContentType
}
2021-02-03 23:41:33 -05:00
2023-09-06 22:05:02 -04:00
meta [ xhttp . AmzObjectTagging ] = oi . UserTags
meta [ xhttp . AmzTagDirective ] = "REPLACE"
2021-02-03 23:41:33 -05:00
2020-11-19 14:50:22 -05:00
if sc == "" {
sc = oi . StorageClass
}
2021-04-19 13:30:42 -04:00
// drop non standard storage classes for tiering from replication
if sc != "" && ( sc == storageclass . RRS || sc == storageclass . STANDARD ) {
2021-02-03 23:41:33 -05:00
meta [ xhttp . AmzStorageClass ] = sc
2020-11-19 14:50:22 -05:00
}
2021-04-19 13:30:42 -04:00
2020-11-19 14:50:22 -05:00
meta [ xhttp . MinIOSourceETag ] = oi . ETag
2022-10-27 12:46:52 -04:00
meta [ xhttp . MinIOSourceMTime ] = oi . ModTime . UTC ( ) . Format ( time . RFC3339Nano )
2020-11-19 14:50:22 -05:00
meta [ xhttp . AmzBucketReplicationStatus ] = replication . Replica . String ( )
return meta
}
2021-02-08 21:12:28 -05:00
type caseInsensitiveMap map [ string ] string
// Lookup map entry case insensitively.
func ( m caseInsensitiveMap ) Lookup ( key string ) ( string , bool ) {
if len ( m ) == 0 {
return "" , false
}
for _ , k := range [ ] string {
key ,
strings . ToLower ( key ) ,
http . CanonicalHeaderKey ( key ) ,
} {
v , ok := m [ k ]
if ok {
return v , ok
}
2021-02-08 19:19:05 -05:00
}
2021-02-08 21:12:28 -05:00
return "" , false
2021-02-08 19:19:05 -05:00
}
2024-06-10 11:31:51 -04:00
func getCRCMeta ( oi ObjectInfo , partNum int , h http . Header ) map [ string ] string {
2024-06-06 05:36:42 -04:00
meta := make ( map [ string ] string )
2024-06-10 11:31:51 -04:00
cs := oi . decryptChecksums ( partNum , h )
2024-06-06 05:36:42 -04:00
for k , v := range cs {
cksum := hash . NewChecksumString ( k , v )
if cksum == nil {
continue
}
if cksum . Valid ( ) {
meta [ cksum . Type . Key ( ) ] = v
}
}
return meta
}
func putReplicationOpts ( ctx context . Context , sc string , objInfo ObjectInfo , partNum int ) ( putOpts minio . PutObjectOptions , err error ) {
2020-07-21 20:49:56 -04:00
meta := make ( map [ string ] string )
2024-06-10 11:31:51 -04:00
isSSEC := crypto . SSEC . IsEncrypted ( objInfo . UserDefined )
2020-07-21 20:49:56 -04:00
for k , v := range objInfo . UserDefined {
2024-03-28 13:44:56 -04:00
// In case of SSE-C objects copy the allowed internal headers as well
2024-06-10 11:31:51 -04:00
if ! isSSEC || ! slices . Contains ( maps . Keys ( validSSEReplicationHeaders ) , k ) {
2024-03-28 13:44:56 -04:00
if stringsHasPrefixFold ( k , ReservedMetadataPrefixLower ) {
2024-06-06 05:36:42 -04:00
if strings . EqualFold ( k , ReservedMetadataPrefixLower + "crc" ) {
2024-06-10 11:31:51 -04:00
for k , v := range getCRCMeta ( objInfo , partNum , nil ) {
2024-06-06 05:36:42 -04:00
meta [ k ] = v
}
}
2024-03-28 13:44:56 -04:00
continue
}
if isStandardHeader ( k ) {
continue
}
2020-07-21 20:49:56 -04:00
}
2024-03-28 13:44:56 -04:00
if slices . Contains ( maps . Keys ( validSSEReplicationHeaders ) , k ) {
meta [ validSSEReplicationHeaders [ k ] ] = v
} else {
meta [ k ] = v
2020-08-12 20:32:24 -04:00
}
2020-07-21 20:49:56 -04:00
}
2021-02-03 23:41:33 -05:00
2024-06-06 05:36:42 -04:00
if len ( objInfo . Checksum ) > 0 {
2024-06-10 11:31:51 -04:00
// Add encrypted CRC to metadata for SSE-C objects.
if isSSEC {
meta [ ReplicationSsecChecksumHeader ] = base64 . StdEncoding . EncodeToString ( objInfo . Checksum )
} else {
for k , v := range getCRCMeta ( objInfo , 0 , nil ) {
meta [ k ] = v
}
2024-06-06 05:36:42 -04:00
}
}
2021-04-19 13:30:42 -04:00
if sc == "" && ( objInfo . StorageClass == storageclass . STANDARD || objInfo . StorageClass == storageclass . RRS ) {
2020-08-05 23:01:20 -04:00
sc = objInfo . StorageClass
}
2022-12-05 14:18:50 -05:00
putOpts = minio . PutObjectOptions {
2020-10-06 11:37:09 -04:00
UserMetadata : meta ,
ContentType : objInfo . ContentType ,
ContentEncoding : objInfo . ContentEncoding ,
2023-11-18 22:12:44 -05:00
Expires : objInfo . Expires ,
2020-10-06 11:37:09 -04:00
StorageClass : sc ,
2022-12-05 14:18:50 -05:00
Internal : minio . AdvancedPutOptions {
2021-03-03 14:13:31 -05:00
SourceVersionID : objInfo . VersionID ,
2022-12-05 14:18:50 -05:00
ReplicationStatus : minio . ReplicationStatusReplica ,
2021-03-03 14:13:31 -05:00
SourceMTime : objInfo . ModTime ,
SourceETag : objInfo . ETag ,
ReplicationRequest : true , // always set this to distinguish between `mc mirror` replication and serverside
2020-10-06 11:37:09 -04:00
} ,
2020-07-21 20:49:56 -04:00
}
2021-02-03 23:41:33 -05:00
if objInfo . UserTags != "" {
tag , _ := tags . ParseObjectTags ( objInfo . UserTags )
if tag != nil {
putOpts . UserTags = tag . ToMap ( )
2021-09-18 16:31:35 -04:00
// set tag timestamp in opts
tagTimestamp := objInfo . ModTime
if tagTmstampStr , ok := objInfo . UserDefined [ ReservedMetadataPrefixLower + TaggingTimestamp ] ; ok {
tagTimestamp , err = time . Parse ( time . RFC3339Nano , tagTmstampStr )
if err != nil {
return putOpts , err
}
}
putOpts . Internal . TaggingTimestamp = tagTimestamp
2021-02-03 23:41:33 -05:00
}
}
2021-02-08 21:12:28 -05:00
lkMap := caseInsensitiveMap ( objInfo . UserDefined )
if lang , ok := lkMap . Lookup ( xhttp . ContentLanguage ) ; ok {
2021-01-27 14:22:34 -05:00
putOpts . ContentLanguage = lang
}
2021-02-08 21:12:28 -05:00
if disp , ok := lkMap . Lookup ( xhttp . ContentDisposition ) ; ok {
2021-01-27 14:22:34 -05:00
putOpts . ContentDisposition = disp
}
2021-02-08 21:12:28 -05:00
if cc , ok := lkMap . Lookup ( xhttp . CacheControl ) ; ok {
2021-01-27 14:22:34 -05:00
putOpts . CacheControl = cc
}
2021-02-08 21:12:28 -05:00
if mode , ok := lkMap . Lookup ( xhttp . AmzObjectLockMode ) ; ok {
2022-12-05 14:18:50 -05:00
rmode := minio . RetentionMode ( mode )
2020-07-21 20:49:56 -04:00
putOpts . Mode = rmode
}
2021-02-08 21:12:28 -05:00
if retainDateStr , ok := lkMap . Lookup ( xhttp . AmzObjectLockRetainUntilDate ) ; ok {
2022-12-12 13:28:30 -05:00
rdate , err := amztime . ISO8601Parse ( retainDateStr )
2020-07-21 20:49:56 -04:00
if err != nil {
2022-12-12 13:28:30 -05:00
return putOpts , err
2020-07-21 20:49:56 -04:00
}
putOpts . RetainUntilDate = rdate
2021-09-18 16:31:35 -04:00
// set retention timestamp in opts
retTimestamp := objInfo . ModTime
if retainTmstampStr , ok := objInfo . UserDefined [ ReservedMetadataPrefixLower + ObjectLockRetentionTimestamp ] ; ok {
retTimestamp , err = time . Parse ( time . RFC3339Nano , retainTmstampStr )
if err != nil {
return putOpts , err
}
}
putOpts . Internal . RetentionTimestamp = retTimestamp
2020-07-21 20:49:56 -04:00
}
2021-02-08 21:12:28 -05:00
if lhold , ok := lkMap . Lookup ( xhttp . AmzObjectLockLegalHold ) ; ok {
2022-12-05 14:18:50 -05:00
putOpts . LegalHold = minio . LegalHoldStatus ( lhold )
2021-09-18 16:31:35 -04:00
// set legalhold timestamp in opts
lholdTimestamp := objInfo . ModTime
if lholdTmstampStr , ok := objInfo . UserDefined [ ReservedMetadataPrefixLower + ObjectLockLegalHoldTimestamp ] ; ok {
lholdTimestamp , err = time . Parse ( time . RFC3339Nano , lholdTmstampStr )
if err != nil {
return putOpts , err
}
}
putOpts . Internal . LegalholdTimestamp = lholdTimestamp
2020-07-21 20:49:56 -04:00
}
if crypto . S3 . IsEncrypted ( objInfo . UserDefined ) {
putOpts . ServerSideEncryption = encrypt . NewSSE ( )
}
return
}
2020-11-19 14:50:22 -05:00
type replicationAction string
const (
replicateMetadata replicationAction = "metadata"
replicateNone replicationAction = "none"
replicateAll replicationAction = "all"
)
2021-02-03 23:41:33 -05:00
// matches k1 with all keys, returns 'true' if one of them matches
func equals ( k1 string , keys ... string ) bool {
for _ , k2 := range keys {
2021-11-18 15:15:22 -05:00
if strings . EqualFold ( k1 , k2 ) {
2021-02-03 23:41:33 -05:00
return true
}
}
return false
}
2020-11-19 14:50:22 -05:00
// returns replicationAction by comparing metadata between source and target
2021-09-28 13:26:12 -04:00
func getReplicationAction ( oi1 ObjectInfo , oi2 minio . ObjectInfo , opType replication . Type ) replicationAction {
// Avoid resyncing null versions created prior to enabling replication if target has a newer copy
if opType == replication . ExistingObjectReplicationType &&
oi1 . ModTime . Unix ( ) > oi2 . LastModified . Unix ( ) && oi1 . VersionID == nullVersionID {
return replicateNone
}
2023-03-07 10:43:38 -05:00
sz , _ := oi1 . GetActualSize ( )
2020-11-19 14:50:22 -05:00
// needs full replication
if oi1 . ETag != oi2 . ETag ||
oi1 . VersionID != oi2 . VersionID ||
2023-03-07 10:43:38 -05:00
sz != oi2 . Size ||
2021-01-27 14:22:34 -05:00
oi1 . DeleteMarker != oi2 . IsDeleteMarker ||
2021-02-03 23:41:33 -05:00
oi1 . ModTime . Unix ( ) != oi2 . LastModified . Unix ( ) {
2020-11-19 14:50:22 -05:00
return replicateAll
}
2021-02-03 23:41:33 -05:00
2021-01-27 14:22:34 -05:00
if oi1 . ContentType != oi2 . ContentType {
2020-11-19 14:50:22 -05:00
return replicateMetadata
}
2021-02-03 23:41:33 -05:00
2020-11-19 14:50:22 -05:00
if oi1 . ContentEncoding != "" {
2021-01-27 14:22:34 -05:00
enc , ok := oi2 . Metadata [ xhttp . ContentEncoding ]
2021-02-03 23:41:33 -05:00
if ! ok {
enc , ok = oi2 . Metadata [ strings . ToLower ( xhttp . ContentEncoding ) ]
if ! ok {
return replicateMetadata
}
}
if strings . Join ( enc , "," ) != oi1 . ContentEncoding {
2020-11-19 14:50:22 -05:00
return replicateMetadata
}
}
2021-02-03 23:41:33 -05:00
t , _ := tags . ParseObjectTags ( oi1 . UserTags )
2023-09-06 22:05:02 -04:00
if ( oi2 . UserTagCount > 0 && ! reflect . DeepEqual ( oi2 . UserTags , t . ToMap ( ) ) ) || ( oi2 . UserTagCount != len ( t . ToMap ( ) ) ) {
2020-11-19 14:50:22 -05:00
return replicateMetadata
}
2021-02-03 23:41:33 -05:00
// Compare only necessary headers
compareKeys := [ ] string {
"Expires" ,
"Cache-Control" ,
"Content-Language" ,
"Content-Disposition" ,
"X-Amz-Object-Lock-Mode" ,
"X-Amz-Object-Lock-Retain-Until-Date" ,
"X-Amz-Object-Lock-Legal-Hold" ,
"X-Amz-Website-Redirect-Location" ,
"X-Amz-Meta-" ,
}
// compare metadata on both maps to see if meta is identical
compareMeta1 := make ( map [ string ] string )
for k , v := range oi1 . UserDefined {
var found bool
for _ , prefix := range compareKeys {
2023-07-06 19:02:08 -04:00
if ! stringsHasPrefixFold ( k , prefix ) {
2021-02-03 23:41:33 -05:00
continue
}
found = true
break
}
if found {
compareMeta1 [ strings . ToLower ( k ) ] = v
2021-01-27 14:22:34 -05:00
}
}
2021-02-03 23:41:33 -05:00
compareMeta2 := make ( map [ string ] string )
for k , v := range oi2 . Metadata {
var found bool
for _ , prefix := range compareKeys {
2023-07-06 19:02:08 -04:00
if ! stringsHasPrefixFold ( k , prefix ) {
2021-02-03 23:41:33 -05:00
continue
}
found = true
break
2021-01-27 14:22:34 -05:00
}
2021-02-03 23:41:33 -05:00
if found {
compareMeta2 [ strings . ToLower ( k ) ] = strings . Join ( v , "," )
2020-11-19 14:50:22 -05:00
}
}
2021-02-03 23:41:33 -05:00
if ! reflect . DeepEqual ( compareMeta1 , compareMeta2 ) {
2020-11-19 14:50:22 -05:00
return replicateMetadata
}
2021-02-03 23:41:33 -05:00
2020-11-19 14:50:22 -05:00
return replicateNone
}
2020-07-21 20:49:56 -04:00
// replicateObject replicates the specified version of the object to destination bucket
// The source object is then updated to reflect the replication status.
2022-07-12 13:43:32 -04:00
func replicateObject ( ctx context . Context , ri ReplicateObjectInfo , objectAPI ObjectLayer ) {
2021-07-01 17:02:44 -04:00
var replicationStatus replication . StatusType
defer func ( ) {
if replicationStatus . Empty ( ) {
// replication status is empty means
// replication was not attempted for some
// reason, notify the state of the object
// on disk.
replicationStatus = ri . ReplicationStatus
}
2022-10-24 14:35:07 -04:00
auditLogInternal ( ctx , AuditLogOptions {
2022-07-12 13:43:32 -04:00
Event : ri . EventType ,
2021-07-01 17:02:44 -04:00
APIName : ReplicateObjectAPI ,
2022-10-24 14:35:07 -04:00
Bucket : ri . Bucket ,
Object : ri . Name ,
2021-07-01 17:02:44 -04:00
VersionID : ri . VersionID ,
Status : replicationStatus . String ( ) ,
} )
} ( )
2023-09-16 05:28:06 -04:00
bucket := ri . Bucket
object := ri . Name
2020-09-16 19:04:55 -04:00
2020-07-30 22:55:22 -04:00
cfg , err := getReplicationConfig ( ctx , bucket )
2024-04-30 21:09:56 -04:00
if err != nil || cfg == nil {
2024-04-04 08:04:40 -04:00
replLogOnceIf ( ctx , err , "get-replication-config-" + bucket )
2021-02-03 23:41:33 -05:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
2023-09-16 05:28:06 -04:00
Object : ri . ToObjectInfo ( ) ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 23:41:33 -05:00
} )
2020-07-21 20:49:56 -04:00
return
}
2021-09-18 16:31:35 -04:00
tgtArns := cfg . FilterTargetArns ( replication . ObjectOpts {
2022-01-19 13:45:42 -05:00
Name : object ,
2023-09-16 05:28:06 -04:00
SSEC : ri . SSEC ,
UserTags : ri . UserTags ,
2021-09-18 16:31:35 -04:00
} )
// Lock the object name before starting replication.
// Use separate lock that doesn't collide with regular objects.
lk := objectAPI . NewNSLock ( bucket , "/[replicate]/" + object )
lkctx , err := lk . GetLock ( ctx , globalOperationTimeout )
if err != nil {
2021-02-03 23:41:33 -05:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
2023-09-16 05:28:06 -04:00
Object : ri . ToObjectInfo ( ) ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 23:41:33 -05:00
} )
2022-08-22 19:53:06 -04:00
globalReplicationPool . queueMRFSave ( ri . ToMRFEntry ( ) )
2020-07-21 20:49:56 -04:00
return
}
2021-09-18 16:31:35 -04:00
ctx = lkctx . Context ( )
2022-12-23 22:49:07 -05:00
defer lk . Unlock ( lkctx )
2021-09-18 16:31:35 -04:00
2023-09-16 05:28:06 -04:00
rinfos := replicatedInfos { Targets : make ( [ ] replicatedTargetInfo , 0 , len ( tgtArns ) ) }
2021-09-18 16:31:35 -04:00
var wg sync . WaitGroup
2023-09-16 05:28:06 -04:00
var mu sync . Mutex
for _ , tgtArn := range tgtArns {
2023-10-28 00:08:53 -04:00
tgt := globalBucketTargetSys . GetRemoteTargetClient ( bucket , tgtArn )
2021-09-18 16:31:35 -04:00
if tgt == nil {
2024-04-04 08:04:40 -04:00
replLogOnceIf ( ctx , fmt . Errorf ( "failed to get target for bucket:%s arn:%s" , bucket , tgtArn ) , tgtArn )
2021-09-18 16:31:35 -04:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
2023-09-16 05:28:06 -04:00
Object : ri . ToObjectInfo ( ) ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-09-18 16:31:35 -04:00
} )
continue
}
wg . Add ( 1 )
2023-09-16 05:28:06 -04:00
go func ( tgt * TargetClient ) {
2021-09-18 16:31:35 -04:00
defer wg . Done ( )
2023-09-16 05:28:06 -04:00
var tgtInfo replicatedTargetInfo
2022-09-14 21:44:04 -04:00
if ri . OpType == replication . ObjectReplicationType {
// all incoming calls go through optimized path.
2023-09-16 05:28:06 -04:00
tgtInfo = ri . replicateObject ( ctx , objectAPI , tgt )
2022-09-14 21:44:04 -04:00
} else {
2023-09-16 05:28:06 -04:00
tgtInfo = ri . replicateAll ( ctx , objectAPI , tgt )
2022-09-14 21:44:04 -04:00
}
2023-09-16 05:28:06 -04:00
mu . Lock ( )
rinfos . Targets = append ( rinfos . Targets , tgtInfo )
mu . Unlock ( )
} ( tgt )
2021-09-18 16:31:35 -04:00
}
wg . Wait ( )
2023-05-16 18:35:08 -04:00
replicationStatus = rinfos . ReplicationStatus ( ) // used in defer function
2021-09-18 16:31:35 -04:00
// FIXME: add support for missing replication events
// - event.ObjectReplicationMissedThreshold
// - event.ObjectReplicationReplicatedAfterThreshold
2022-01-02 12:15:06 -05:00
eventName := event . ObjectReplicationComplete
2023-05-16 18:35:08 -04:00
if replicationStatus == replication . Failed {
2021-09-18 16:31:35 -04:00
eventName = event . ObjectReplicationFailed
}
newReplStatusInternal := rinfos . ReplicationStatusInternal ( )
// Note that internal replication status(es) may match for previously replicated objects - in such cases
// metadata should be updated with last resync timestamp.
2023-09-16 05:28:06 -04:00
objInfo := ri . ToObjectInfo ( )
if ri . ReplicationStatusInternal != newReplStatusInternal || rinfos . ReplicationResynced ( ) {
2021-09-18 16:31:35 -04:00
popts := ObjectOptions {
2023-09-16 05:28:06 -04:00
MTime : ri . ModTime ,
VersionID : ri . VersionID ,
2023-07-10 10:57:56 -04:00
EvalMetadataFn : func ( oi * ObjectInfo , gerr error ) ( dsc ReplicateDecision , err error ) {
2021-10-30 11:22:04 -04:00
oi . UserDefined [ ReservedMetadataPrefixLower + ReplicationStatus ] = newReplStatusInternal
oi . UserDefined [ ReservedMetadataPrefixLower + ReplicationTimestamp ] = UTCNow ( ) . Format ( time . RFC3339Nano )
oi . UserDefined [ xhttp . AmzBucketReplicationStatus ] = string ( rinfos . ReplicationStatus ( ) )
for _ , rinfo := range rinfos . Targets {
if rinfo . ResyncTimestamp != "" {
oi . UserDefined [ targetResetHeader ( rinfo . Arn ) ] = rinfo . ResyncTimestamp
}
}
2023-09-16 05:28:06 -04:00
if ri . UserTags != "" {
oi . UserDefined [ xhttp . AmzObjectTagging ] = ri . UserTags
2021-10-30 11:22:04 -04:00
}
2023-07-10 10:57:56 -04:00
return dsc , nil
2021-10-30 11:22:04 -04:00
} ,
2021-09-18 16:31:35 -04:00
}
2021-10-30 11:22:04 -04:00
2023-09-16 05:28:06 -04:00
uobjInfo , _ := objectAPI . PutObjectMetadata ( ctx , bucket , object , popts )
if uobjInfo . Name != "" {
objInfo = uobjInfo
}
2021-09-18 16:31:35 -04:00
opType := replication . MetadataReplicationType
if rinfos . Action ( ) == replicateAll {
opType = replication . ObjectReplicationType
}
for _ , rinfo := range rinfos . Targets {
if rinfo . ReplicationStatus != rinfo . PrevReplicationStatus {
2023-08-30 04:00:59 -04:00
rinfo . OpType = opType // update optype to reflect correct operation.
globalReplicationStats . Update ( bucket , rinfo , rinfo . ReplicationStatus , rinfo . PrevReplicationStatus )
2021-09-18 16:31:35 -04:00
}
}
}
sendEvent ( eventArgs {
EventName : eventName ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-09-18 16:31:35 -04:00
} )
// re-queue failures once more - keep a retry count to avoid flooding the queue if
// the target site is down. Leave it to scanner to catch up instead.
2022-08-22 19:53:06 -04:00
if rinfos . ReplicationStatus ( ) != replication . Completed {
2021-09-18 16:31:35 -04:00
ri . OpType = replication . HealReplicationType
2022-07-12 13:43:32 -04:00
ri . EventType = ReplicateMRF
2021-09-18 16:31:35 -04:00
ri . ReplicationStatusInternal = rinfos . ReplicationStatusInternal ( )
ri . RetryCount ++
2022-08-22 19:53:06 -04:00
globalReplicationPool . queueMRFSave ( ri . ToMRFEntry ( ) )
2021-09-18 16:31:35 -04:00
}
}
2022-09-14 21:44:04 -04:00
// replicateObject replicates object data for specified version of the object to destination bucket
2021-09-18 16:31:35 -04:00
// The source object is then updated to reflect the replication status.
2022-09-14 21:44:04 -04:00
func ( ri ReplicateObjectInfo ) replicateObject ( ctx context . Context , objectAPI ObjectLayer , tgt * TargetClient ) ( rinfo replicatedTargetInfo ) {
2021-11-17 15:10:57 -05:00
startTime := time . Now ( )
2023-09-16 05:28:06 -04:00
bucket := ri . Bucket
object := ri . Name
2022-09-14 21:44:04 -04:00
rAction := replicateAll
2021-09-18 16:31:35 -04:00
rinfo = replicatedTargetInfo {
2023-09-16 05:28:06 -04:00
Size : ri . ActualSize ,
2021-09-18 16:31:35 -04:00
Arn : tgt . ARN ,
2023-09-16 05:28:06 -04:00
PrevReplicationStatus : ri . TargetReplicationStatus ( tgt . ARN ) ,
2021-09-18 16:31:35 -04:00
ReplicationStatus : replication . Failed ,
OpType : ri . OpType ,
ReplicationAction : rAction ,
2023-08-30 04:00:59 -04:00
endpoint : tgt . EndpointURL ( ) . Host ,
secure : tgt . EndpointURL ( ) . Scheme == "https" ,
2021-09-18 16:31:35 -04:00
}
2023-09-16 05:28:06 -04:00
if ri . TargetReplicationStatus ( tgt . ARN ) == replication . Completed && ! ri . ExistingObjResync . Empty ( ) && ! ri . ExistingObjResync . mustResyncTarget ( tgt . ARN ) {
2021-09-18 16:31:35 -04:00
rinfo . ReplicationStatus = replication . Completed
rinfo . ReplicationResynced = true
2021-09-08 18:34:50 -04:00
return
}
2022-09-14 21:44:04 -04:00
2022-08-16 20:46:22 -04:00
if globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
2024-04-04 08:04:40 -04:00
replLogOnceIf ( ctx , fmt . Errorf ( "remote target is offline for bucket:%s arn:%s retry:%d" , bucket , tgt . ARN , ri . RetryCount ) , "replication-target-offline" + tgt . ARN )
2021-08-23 11:16:18 -04:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
2023-09-16 05:28:06 -04:00
Object : ri . ToObjectInfo ( ) ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-08-23 11:16:18 -04:00
} )
return
}
2022-06-06 18:14:56 -04:00
versioned := globalBucketVersioningSys . PrefixEnabled ( bucket , object )
versionSuspended := globalBucketVersioningSys . PrefixSuspended ( bucket , object )
2023-04-17 15:16:37 -04:00
gr , err := objectAPI . GetObjectNInfo ( ctx , bucket , object , nil , http . Header { } , ObjectOptions {
2024-03-28 13:44:56 -04:00
VersionID : ri . VersionID ,
Versioned : versioned ,
VersionSuspended : versionSuspended ,
ReplicationRequest : true ,
2020-09-15 23:44:48 -04:00
} )
2020-07-21 20:49:56 -04:00
if err != nil {
2022-10-07 19:11:41 -04:00
if ! isErrVersionNotFound ( err ) && ! isErrObjectNotFound ( err ) {
2023-09-16 05:28:06 -04:00
objInfo := ri . ToObjectInfo ( )
2022-08-19 19:21:05 -04:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2022-08-19 19:21:05 -04:00
} )
2024-04-04 08:04:40 -04:00
replLogOnceIf ( ctx , fmt . Errorf ( "unable to read source object %s/%s(%s): %w" , bucket , object , objInfo . VersionID , err ) , object + ":" + objInfo . VersionID )
2022-08-19 19:21:05 -04:00
}
2020-07-21 20:49:56 -04:00
return
}
2022-09-14 21:44:04 -04:00
defer gr . Close ( )
2023-09-16 05:28:06 -04:00
objInfo := gr . ObjInfo
2023-06-01 21:52:55 -04:00
// make sure we have the latest metadata for metrics calculation
rinfo . PrevReplicationStatus = objInfo . TargetReplicationStatus ( tgt . ARN )
2022-09-14 21:44:04 -04:00
2024-06-06 15:31:01 -04:00
// Set the encrypted size for SSE-C objects
var size int64
if crypto . SSEC . IsEncrypted ( objInfo . UserDefined ) {
size = objInfo . Size
} else {
size , err = objInfo . GetActualSize ( )
if err != nil {
replLogIf ( ctx , err )
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
} )
return
}
2022-09-14 21:44:04 -04:00
}
if tgt . Bucket == "" {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "unable to replicate object %s(%s), bucket is empty for target %s" , objInfo . Name , objInfo . VersionID , tgt . EndpointURL ( ) ) )
2022-09-14 21:44:04 -04:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2022-09-14 21:44:04 -04:00
} )
return rinfo
}
2021-06-29 02:58:08 -04:00
defer func ( ) {
2022-09-14 21:44:04 -04:00
if rinfo . ReplicationStatus == replication . Completed && ri . OpType == replication . ExistingObjectReplicationType && tgt . ResetID != "" {
rinfo . ResyncTimestamp = fmt . Sprintf ( "%s;%s" , UTCNow ( ) . Format ( http . TimeFormat ) , tgt . ResetID )
rinfo . ReplicationResynced = true
2021-06-29 02:58:08 -04:00
}
2022-09-14 21:44:04 -04:00
rinfo . Duration = time . Since ( startTime )
2021-06-29 02:58:08 -04:00
} ( )
2022-09-14 21:44:04 -04:00
rinfo . ReplicationStatus = replication . Completed
rinfo . Size = size
rinfo . ReplicationAction = rAction
// use core client to avoid doing multipart on PUT
2022-12-05 14:18:50 -05:00
c := & minio . Core { Client : tgt . Client }
2022-09-14 21:44:04 -04:00
2024-06-06 05:36:42 -04:00
putOpts , err := putReplicationOpts ( ctx , tgt . StorageClass , objInfo , 0 )
2022-09-14 21:44:04 -04:00
if err != nil {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "failure setting options for replication bucket:%s err:%w" , bucket , err ) )
2022-09-14 21:44:04 -04:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2022-09-14 21:44:04 -04:00
} )
return
}
var headerSize int
for k , v := range putOpts . Header ( ) {
headerSize += len ( k ) + len ( v )
}
opts := & bandwidth . MonitorReaderOptions {
2023-09-05 23:21:59 -04:00
BucketOptions : bandwidth . BucketOptions {
2023-09-16 05:28:06 -04:00
Name : ri . Bucket ,
2023-09-05 23:21:59 -04:00
ReplicationARN : tgt . ARN ,
} ,
2022-09-14 21:44:04 -04:00
HeaderSize : headerSize ,
}
newCtx := ctx
2023-01-19 08:22:16 -05:00
if globalBucketMonitor . IsThrottled ( bucket , tgt . ARN ) {
2022-09-14 21:44:04 -04:00
var cancel context . CancelFunc
newCtx , cancel = context . WithTimeout ( ctx , throttleDeadline )
defer cancel ( )
}
r := bandwidth . NewMonitoredReader ( newCtx , globalBucketMonitor , gr , opts )
if objInfo . isMultipart ( ) {
2024-04-11 20:27:32 -04:00
rinfo . Err = replicateObjectWithMultipart ( ctx , c , tgt . Bucket , object , r , objInfo , putOpts )
2022-09-14 21:44:04 -04:00
} else {
2024-04-11 20:27:32 -04:00
_ , rinfo . Err = c . PutObject ( ctx , tgt . Bucket , object , r , size , "" , "" , putOpts )
2022-09-14 21:44:04 -04:00
}
2024-04-11 20:27:32 -04:00
if rinfo . Err != nil {
if minio . ToErrorResponse ( rinfo . Err ) . Code != "PreconditionFailed" {
rinfo . ReplicationStatus = replication . Failed
replLogIf ( ctx , fmt . Errorf ( "unable to replicate for object %s/%s(%s): to (target: %s): %w" ,
bucket , objInfo . Name , objInfo . VersionID , tgt . EndpointURL ( ) , rinfo . Err ) )
}
if minio . IsNetworkOrHostDown ( rinfo . Err , true ) && ! globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
globalBucketTargetSys . markOffline ( tgt . EndpointURL ( ) )
}
2023-08-24 12:24:26 -04:00
}
2022-09-14 21:44:04 -04:00
return
}
// replicateAll replicates metadata for specified version of the object to destination bucket
// if the destination version is missing it automatically does fully copy as well.
// The source object is then updated to reflect the replication status.
func ( ri ReplicateObjectInfo ) replicateAll ( ctx context . Context , objectAPI ObjectLayer , tgt * TargetClient ) ( rinfo replicatedTargetInfo ) {
startTime := time . Now ( )
2023-09-16 05:28:06 -04:00
bucket := ri . Bucket
object := ri . Name
2022-09-14 21:44:04 -04:00
// set defaults for replication action based on operation being performed - actual
// replication action can only be determined after stat on remote. This default is
// needed for updating replication metrics correctly when target is offline.
rAction := replicateMetadata
rinfo = replicatedTargetInfo {
2023-09-16 05:28:06 -04:00
Size : ri . ActualSize ,
2022-09-14 21:44:04 -04:00
Arn : tgt . ARN ,
2023-09-16 05:28:06 -04:00
PrevReplicationStatus : ri . TargetReplicationStatus ( tgt . ARN ) ,
2022-09-14 21:44:04 -04:00
ReplicationStatus : replication . Failed ,
OpType : ri . OpType ,
ReplicationAction : rAction ,
2023-08-30 04:00:59 -04:00
endpoint : tgt . EndpointURL ( ) . Host ,
secure : tgt . EndpointURL ( ) . Scheme == "https" ,
2022-09-14 21:44:04 -04:00
}
if globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
2024-04-04 08:04:40 -04:00
replLogOnceIf ( ctx , fmt . Errorf ( "remote target is offline for bucket:%s arn:%s retry:%d" , bucket , tgt . ARN , ri . RetryCount ) , "replication-target-offline-heal" + tgt . ARN )
2022-09-14 21:44:04 -04:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
2023-09-16 05:28:06 -04:00
Object : ri . ToObjectInfo ( ) ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2022-09-14 21:44:04 -04:00
} )
return
}
versioned := globalBucketVersioningSys . PrefixEnabled ( bucket , object )
versionSuspended := globalBucketVersioningSys . PrefixSuspended ( bucket , object )
2024-03-28 13:44:56 -04:00
gr , err := objectAPI . GetObjectNInfo ( ctx , bucket , object , nil , http . Header { } ,
ObjectOptions {
VersionID : ri . VersionID ,
Versioned : versioned ,
VersionSuspended : versionSuspended ,
ReplicationRequest : true ,
} )
2022-09-14 21:44:04 -04:00
if err != nil {
2022-10-07 19:11:41 -04:00
if ! isErrVersionNotFound ( err ) && ! isErrObjectNotFound ( err ) {
2023-09-16 05:28:06 -04:00
objInfo := ri . ToObjectInfo ( )
2022-09-14 21:44:04 -04:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2022-09-14 21:44:04 -04:00
} )
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "unable to replicate to target %s for %s/%s(%s): %w" , tgt . EndpointURL ( ) , bucket , object , objInfo . VersionID , err ) )
2022-09-14 21:44:04 -04:00
}
return
}
defer gr . Close ( )
2021-02-03 23:41:33 -05:00
2023-09-16 05:28:06 -04:00
objInfo := gr . ObjInfo
2024-03-28 13:44:56 -04:00
2023-06-01 21:52:55 -04:00
// make sure we have the latest metadata for metrics calculation
rinfo . PrevReplicationStatus = objInfo . TargetReplicationStatus ( tgt . ARN )
2022-09-14 21:44:04 -04:00
2023-03-07 10:43:38 -05:00
// use latest ObjectInfo to check if previous replication attempt succeeded
if objInfo . TargetReplicationStatus ( tgt . ARN ) == replication . Completed && ! ri . ExistingObjResync . Empty ( ) && ! ri . ExistingObjResync . mustResyncTarget ( tgt . ARN ) {
rinfo . ReplicationStatus = replication . Completed
rinfo . ReplicationResynced = true
return
}
2022-09-14 21:44:04 -04:00
size , err := objInfo . GetActualSize ( )
2020-07-21 20:49:56 -04:00
if err != nil {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , err )
2021-02-03 23:41:33 -05:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 23:41:33 -05:00
} )
2020-07-21 20:49:56 -04:00
return
}
2024-03-28 13:44:56 -04:00
// Set the encrypted size for SSE-C objects
2024-06-13 02:56:12 -04:00
isSSEC := crypto . SSEC . IsEncrypted ( objInfo . UserDefined )
if isSSEC {
2024-03-28 13:44:56 -04:00
size = objInfo . Size
}
2021-09-18 16:31:35 -04:00
if tgt . Bucket == "" {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "unable to replicate object %s(%s) to %s, target bucket is missing" , objInfo . Name , objInfo . VersionID , tgt . EndpointURL ( ) ) )
2021-02-03 23:41:33 -05:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-03 23:41:33 -05:00
} )
2021-09-18 16:31:35 -04:00
return rinfo
2020-07-21 20:49:56 -04:00
}
2022-02-10 13:16:52 -05:00
defer func ( ) {
if rinfo . ReplicationStatus == replication . Completed && ri . OpType == replication . ExistingObjectReplicationType && tgt . ResetID != "" {
rinfo . ResyncTimestamp = fmt . Sprintf ( "%s;%s" , UTCNow ( ) . Format ( http . TimeFormat ) , tgt . ResetID )
rinfo . ReplicationResynced = true
}
rinfo . Duration = time . Since ( startTime )
} ( )
2020-09-16 19:04:55 -04:00
2022-12-05 14:18:50 -05:00
oi , cerr := tgt . StatObject ( ctx , tgt . Bucket , object , minio . StatObjectOptions {
2021-01-27 14:22:34 -05:00
VersionID : objInfo . VersionID ,
2022-12-05 14:18:50 -05:00
Internal : minio . AdvancedGetOptions {
2021-01-27 14:22:34 -05:00
ReplicationProxyRequest : "false" ,
2022-01-02 12:15:06 -05:00
} ,
} )
2021-09-18 16:31:35 -04:00
if cerr == nil {
2021-09-28 13:26:12 -04:00
rAction = getReplicationAction ( objInfo , oi , ri . OpType )
2021-09-18 16:31:35 -04:00
rinfo . ReplicationStatus = replication . Completed
if rAction == replicateNone {
2021-09-28 13:26:12 -04:00
if ri . OpType == replication . ExistingObjectReplicationType &&
objInfo . ModTime . Unix ( ) > oi . LastModified . Unix ( ) && objInfo . VersionID == nullVersionID {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "unable to replicate %s/%s (null). Newer version exists on target %s" , bucket , object , tgt . EndpointURL ( ) ) )
2021-09-28 13:26:12 -04:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-09-28 13:26:12 -04:00
} )
}
2020-07-21 20:49:56 -04:00
// object with same VersionID already exists, replication kicked off by
2021-04-03 12:03:42 -04:00
// PutObject might have completed
2023-09-16 05:28:06 -04:00
if objInfo . TargetReplicationStatus ( tgt . ARN ) == replication . Pending ||
objInfo . TargetReplicationStatus ( tgt . ARN ) == replication . Failed ||
ri . OpType == replication . ExistingObjectReplicationType {
2021-07-01 17:02:44 -04:00
// if metadata is not updated for some reason after replication, such as
// 503 encountered while updating metadata - make sure to set ReplicationStatus
// as Completed.
//
// Note: Replication Stats would have been updated despite metadata update failure.
2022-02-10 13:16:52 -05:00
rinfo . ReplicationAction = rAction
rinfo . ReplicationStatus = replication . Completed
2021-04-29 19:46:26 -04:00
}
2020-07-21 20:49:56 -04:00
return
}
2023-11-21 00:33:47 -05:00
} else {
2024-06-13 02:56:12 -04:00
// SSEC objects will refuse HeadObject without the decryption key.
// Ignore the error, since we know the object exists and versioning prevents overwriting existing versions.
if isSSEC && strings . Contains ( cerr . Error ( ) , errorCodes [ ErrSSEEncryptedObject ] . Description ) {
rinfo . ReplicationStatus = replication . Completed
rinfo . ReplicationAction = replicateNone
goto applyAction
}
2023-11-21 00:33:47 -05:00
// if target returns error other than NoSuchKey, defer replication attempt
2023-08-24 12:24:26 -04:00
if minio . IsNetworkOrHostDown ( cerr , true ) && ! globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
globalBucketTargetSys . markOffline ( tgt . EndpointURL ( ) )
}
2023-11-21 00:33:47 -05:00
serr := ErrorRespToObjectError ( cerr , bucket , object , objInfo . VersionID )
switch {
case isErrMethodNotAllowed ( serr ) :
rAction = replicateAll
case isErrObjectNotFound ( serr ) , isErrVersionNotFound ( serr ) :
rAction = replicateAll
case isErrReadQuorum ( serr ) , isErrWriteQuorum ( serr ) :
2023-06-18 21:20:15 -04:00
rAction = replicateAll
default :
2023-08-30 04:00:59 -04:00
rinfo . Err = cerr
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "unable to replicate %s/%s (%s). Target (%s) returned %s error on HEAD" ,
2023-06-18 21:20:15 -04:00
bucket , object , objInfo . VersionID , tgt . EndpointURL ( ) , cerr ) )
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
} )
return
}
2023-06-17 10:30:53 -04:00
}
2024-06-13 02:56:12 -04:00
applyAction :
2021-09-18 16:31:35 -04:00
rinfo . ReplicationStatus = replication . Completed
rinfo . Size = size
rinfo . ReplicationAction = rAction
2021-02-20 03:22:17 -05:00
// use core client to avoid doing multipart on PUT
2022-12-05 14:18:50 -05:00
c := & minio . Core { Client : tgt . Client }
2021-09-18 16:31:35 -04:00
if rAction != replicateAll {
2020-11-19 14:50:22 -05:00
// replicate metadata for object tagging/copy with metadata replacement
2022-12-05 14:18:50 -05:00
srcOpts := minio . CopySrcOptions {
2021-09-18 16:31:35 -04:00
Bucket : tgt . Bucket ,
2021-02-10 20:25:04 -05:00
Object : object ,
2021-04-03 12:03:42 -04:00
VersionID : objInfo . VersionID ,
}
2022-12-05 14:18:50 -05:00
dstOpts := minio . PutObjectOptions {
Internal : minio . AdvancedPutOptions {
2021-03-03 14:13:31 -05:00
SourceVersionID : objInfo . VersionID ,
ReplicationRequest : true , // always set this to distinguish between `mc mirror` replication and serverside
2022-01-02 12:15:06 -05:00
} ,
}
2023-12-13 18:28:55 -05:00
if tagTmStr , ok := objInfo . UserDefined [ ReservedMetadataPrefixLower + TaggingTimestamp ] ; ok {
ondiskTimestamp , err := time . Parse ( time . RFC3339 , tagTmStr )
if err == nil {
dstOpts . Internal . TaggingTimestamp = ondiskTimestamp
}
}
if retTmStr , ok := objInfo . UserDefined [ ReservedMetadataPrefixLower + ObjectLockRetentionTimestamp ] ; ok {
ondiskTimestamp , err := time . Parse ( time . RFC3339 , retTmStr )
if err == nil {
dstOpts . Internal . RetentionTimestamp = ondiskTimestamp
}
}
if lholdTmStr , ok := objInfo . UserDefined [ ReservedMetadataPrefixLower + ObjectLockLegalHoldTimestamp ] ; ok {
ondiskTimestamp , err := time . Parse ( time . RFC3339 , lholdTmStr )
if err == nil {
dstOpts . Internal . LegalholdTimestamp = ondiskTimestamp
}
}
2023-08-30 04:00:59 -04:00
if _ , rinfo . Err = c . CopyObject ( ctx , tgt . Bucket , object , tgt . Bucket , object , getCopyObjMetadata ( objInfo , tgt . StorageClass ) , srcOpts , dstOpts ) ; rinfo . Err != nil {
2021-09-18 16:31:35 -04:00
rinfo . ReplicationStatus = replication . Failed
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "unable to replicate metadata for object %s/%s(%s) to target %s: %w" , bucket , objInfo . Name , objInfo . VersionID , tgt . EndpointURL ( ) , rinfo . Err ) )
2021-01-06 19:13:10 -05:00
}
} else {
2021-09-18 16:31:35 -04:00
var putOpts minio . PutObjectOptions
2024-06-06 05:36:42 -04:00
putOpts , err = putReplicationOpts ( ctx , tgt . StorageClass , objInfo , 0 )
2021-02-08 19:19:05 -05:00
if err != nil {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "failed to set replicate options for object %s/%s(%s) (target %s) err:%w" , bucket , objInfo . Name , objInfo . VersionID , tgt . EndpointURL ( ) , err ) )
2021-02-08 21:12:28 -05:00
sendEvent ( eventArgs {
EventName : event . ObjectReplicationNotTracked ,
BucketName : bucket ,
Object : objInfo ,
2023-04-06 13:20:53 -04:00
UserAgent : "Internal: [Replication]" ,
Host : globalLocalNodeName ,
2021-02-08 21:12:28 -05:00
} )
2021-02-08 19:19:05 -05:00
return
}
2021-01-06 19:13:10 -05:00
var headerSize int
for k , v := range putOpts . Header ( ) {
headerSize += len ( k ) + len ( v )
}
2021-01-08 13:12:26 -05:00
2021-04-05 19:07:53 -04:00
opts := & bandwidth . MonitorReaderOptions {
2023-09-05 23:21:59 -04:00
BucketOptions : bandwidth . BucketOptions {
Name : objInfo . Bucket ,
ReplicationARN : tgt . ARN ,
} ,
2021-06-24 21:29:30 -04:00
HeaderSize : headerSize ,
2021-04-05 19:07:53 -04:00
}
2021-07-28 18:20:01 -04:00
newCtx := ctx
2023-01-19 08:22:16 -05:00
if globalBucketMonitor . IsThrottled ( bucket , tgt . ARN ) {
2021-07-28 18:20:01 -04:00
var cancel context . CancelFunc
newCtx , cancel = context . WithTimeout ( ctx , throttleDeadline )
defer cancel ( )
}
2021-06-24 21:29:30 -04:00
r := bandwidth . NewMonitoredReader ( newCtx , globalBucketMonitor , gr , opts )
2021-09-09 01:25:23 -04:00
if objInfo . isMultipart ( ) {
2024-04-11 20:27:32 -04:00
rinfo . Err = replicateObjectWithMultipart ( ctx , c , tgt . Bucket , object , r , objInfo , putOpts )
2021-06-30 10:44:24 -04:00
} else {
2024-04-11 20:27:32 -04:00
_ , rinfo . Err = c . PutObject ( ctx , tgt . Bucket , object , r , size , "" , "" , putOpts )
}
2024-05-10 20:31:22 -04:00
if rinfo . Err != nil {
if minio . ToErrorResponse ( rinfo . Err ) . Code != "PreconditionFailed" {
rinfo . ReplicationStatus = replication . Failed
replLogIf ( ctx , fmt . Errorf ( "unable to replicate for object %s/%s(%s) to target %s: %w" ,
bucket , objInfo . Name , objInfo . VersionID , tgt . EndpointURL ( ) , rinfo . Err ) )
}
if minio . IsNetworkOrHostDown ( rinfo . Err , true ) && ! globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
globalBucketTargetSys . markOffline ( tgt . EndpointURL ( ) )
}
2023-08-24 12:24:26 -04:00
}
2020-07-21 20:49:56 -04:00
}
2021-09-18 16:31:35 -04:00
return
2020-07-21 20:49:56 -04:00
}
2020-08-12 20:32:24 -04:00
2022-12-05 14:18:50 -05:00
func replicateObjectWithMultipart ( ctx context . Context , c * minio . Core , bucket , object string , r io . Reader , objInfo ObjectInfo , opts minio . PutObjectOptions ) ( err error ) {
var uploadedParts [ ] minio . CompletePart
2023-06-02 17:38:09 -04:00
// new multipart must not set mtime as it may lead to erroneous cleanups at various intervals.
opts . Internal . SourceMTime = time . Time { } // this value is saved properly in CompleteMultipartUpload()
2023-09-14 14:53:52 -04:00
var uploadID string
attempts := 1
for attempts <= 3 {
nctx , cancel := context . WithTimeout ( ctx , time . Minute )
uploadID , err = c . NewMultipartUpload ( nctx , bucket , object , opts )
cancel ( )
if err == nil {
break
}
2024-03-20 21:12:37 -04:00
if minio . ToErrorResponse ( err ) . Code == "PreconditionFailed" {
2024-05-10 20:31:22 -04:00
return nil
2024-03-20 21:12:37 -04:00
}
2023-09-14 14:53:52 -04:00
attempts ++
time . Sleep ( time . Duration ( rand . Int63n ( int64 ( time . Second ) ) ) )
}
2021-06-30 10:44:24 -04:00
if err != nil {
2021-07-29 01:11:55 -04:00
return err
2021-06-30 10:44:24 -04:00
}
2021-07-29 01:11:55 -04:00
defer func ( ) {
if err != nil {
// block and abort remote upload upon failure.
2022-01-12 01:32:29 -05:00
attempts := 1
for attempts <= 3 {
2023-08-05 15:27:07 -04:00
actx , acancel := context . WithTimeout ( ctx , time . Minute )
aerr := c . AbortMultipartUpload ( actx , bucket , object , uploadID )
2024-04-05 07:39:55 -04:00
acancel ( )
2022-01-12 01:32:29 -05:00
if aerr == nil {
return
}
attempts ++
2023-09-14 14:53:52 -04:00
time . Sleep ( time . Duration ( rand . Int63n ( int64 ( time . Second ) ) ) )
2021-07-29 01:11:55 -04:00
}
}
} ( )
2021-06-30 10:44:24 -04:00
var (
2024-06-13 02:56:12 -04:00
hr * hash . Reader
pInfo minio . ObjectPart
isSSEC = crypto . SSEC . IsEncrypted ( objInfo . UserDefined )
2021-06-30 10:44:24 -04:00
)
2021-07-29 01:11:55 -04:00
2024-03-28 13:44:56 -04:00
var objectSize int64
2021-06-30 10:44:24 -04:00
for _ , partInfo := range objInfo . Parts {
2024-06-13 02:56:12 -04:00
if isSSEC {
2024-03-28 13:44:56 -04:00
hr , err = hash . NewReader ( ctx , io . LimitReader ( r , partInfo . Size ) , partInfo . Size , "" , "" , partInfo . ActualSize )
} else {
hr , err = hash . NewReader ( ctx , io . LimitReader ( r , partInfo . ActualSize ) , partInfo . ActualSize , "" , "" , partInfo . ActualSize )
}
2021-06-30 10:44:24 -04:00
if err != nil {
2021-07-29 01:11:55 -04:00
return err
2021-06-30 10:44:24 -04:00
}
2023-03-28 06:45:24 -04:00
2024-03-28 13:44:56 -04:00
cHeader := http . Header { }
cHeader . Add ( xhttp . MinIOSourceReplicationRequest , "true" )
2024-06-13 02:56:12 -04:00
if ! isSSEC {
crc := getCRCMeta ( objInfo , partInfo . Number , nil ) // No SSE-C keys here.
for k , v := range crc {
cHeader . Add ( k , v )
}
2024-06-06 05:36:42 -04:00
}
2023-03-28 06:45:24 -04:00
popts := minio . PutObjectPartOptions {
2024-03-28 13:44:56 -04:00
SSE : opts . ServerSideEncryption ,
CustomHeader : cHeader ,
2023-03-28 06:45:24 -04:00
}
2024-06-13 02:56:12 -04:00
if isSSEC {
2024-03-28 13:44:56 -04:00
objectSize += partInfo . Size
pInfo , err = c . PutObjectPart ( ctx , bucket , object , uploadID , partInfo . Number , hr , partInfo . Size , popts )
} else {
objectSize += partInfo . ActualSize
pInfo , err = c . PutObjectPart ( ctx , bucket , object , uploadID , partInfo . Number , hr , partInfo . ActualSize , popts )
}
2021-06-30 10:44:24 -04:00
if err != nil {
2021-07-29 01:11:55 -04:00
return err
2021-06-30 10:44:24 -04:00
}
2024-06-13 02:56:12 -04:00
if ! isSSEC && pInfo . Size != partInfo . ActualSize {
2021-08-24 17:41:05 -04:00
return fmt . Errorf ( "Part size mismatch: got %d, want %d" , pInfo . Size , partInfo . ActualSize )
2021-06-30 10:44:24 -04:00
}
2022-12-05 14:18:50 -05:00
uploadedParts = append ( uploadedParts , minio . CompletePart {
2024-06-06 05:36:42 -04:00
PartNumber : pInfo . PartNumber ,
ETag : pInfo . ETag ,
ChecksumCRC32 : pInfo . ChecksumCRC32 ,
ChecksumCRC32C : pInfo . ChecksumCRC32C ,
ChecksumSHA1 : pInfo . ChecksumSHA1 ,
ChecksumSHA256 : pInfo . ChecksumSHA256 ,
2021-06-30 10:44:24 -04:00
} )
}
2024-06-13 02:56:12 -04:00
userMeta := map [ string ] string {
validSSEReplicationHeaders [ ReservedMetadataPrefix + "Actual-Object-Size" ] : objInfo . UserDefined [ ReservedMetadataPrefix + "actual-size" ] ,
}
if isSSEC && objInfo . UserDefined [ ReplicationSsecChecksumHeader ] != "" {
userMeta [ ReplicationSsecChecksumHeader ] = objInfo . UserDefined [ ReplicationSsecChecksumHeader ]
}
2024-04-05 07:39:55 -04:00
// really big value but its okay on heavily loaded systems. This is just tail end timeout.
2023-08-05 15:27:07 -04:00
cctx , ccancel := context . WithTimeout ( ctx , 10 * time . Minute )
defer ccancel ( )
_ , err = c . CompleteMultipartUpload ( cctx , bucket , object , uploadID , uploadedParts , minio . PutObjectOptions {
2024-06-13 02:56:12 -04:00
UserMetadata : userMeta ,
2022-12-05 14:18:50 -05:00
Internal : minio . AdvancedPutOptions {
2021-07-29 01:11:55 -04:00
SourceMTime : objInfo . ModTime ,
2024-05-10 20:31:22 -04:00
SourceETag : objInfo . ETag ,
2021-07-29 01:11:55 -04:00
// always set this to distinguish between `mc mirror` replication and serverside
ReplicationRequest : true ,
2022-01-02 12:15:06 -05:00
} ,
} )
2021-07-29 01:11:55 -04:00
return err
2021-06-30 10:44:24 -04:00
}
2020-08-12 20:32:24 -04:00
// filterReplicationStatusMetadata filters replication status metadata for COPY
func filterReplicationStatusMetadata ( metadata map [ string ] string ) map [ string ] string {
// Copy on write
dst := metadata
var copied bool
delKey := func ( key string ) {
if _ , ok := metadata [ key ] ; ! ok {
return
}
if ! copied {
dst = make ( map [ string ] string , len ( metadata ) )
for k , v := range metadata {
dst [ k ] = v
}
copied = true
}
delete ( dst , key )
}
delKey ( xhttp . AmzBucketReplicationStatus )
return dst
}
2020-09-16 19:04:55 -04:00
2021-06-01 22:59:11 -04:00
// DeletedObjectReplicationInfo has info on deleted object
type DeletedObjectReplicationInfo struct {
2020-11-19 21:43:58 -05:00
DeletedObject
2021-09-18 16:31:35 -04:00
Bucket string
2022-07-12 13:43:32 -04:00
EventType string
2021-09-18 16:31:35 -04:00
OpType replication . Type
ResetID string
TargetArn string
2020-11-19 21:43:58 -05:00
}
2022-08-22 19:53:06 -04:00
// ToMRFEntry returns the relevant info needed by MRF
func ( di DeletedObjectReplicationInfo ) ToMRFEntry ( ) MRFReplicateEntry {
versionID := di . DeleteMarkerVersionID
if versionID == "" {
versionID = di . VersionID
}
return MRFReplicateEntry {
Bucket : di . Bucket ,
Object : di . ObjectName ,
versionID : versionID ,
}
}
2021-07-01 17:02:44 -04:00
// Replication specific APIName
const (
ReplicateObjectAPI = "ReplicateObject"
ReplicateDeleteAPI = "ReplicateDelete"
)
2021-06-29 02:58:08 -04:00
const (
2021-07-01 17:02:44 -04:00
// ReplicateQueued - replication being queued trail
ReplicateQueued = "replicate:queue"
// ReplicateExisting - audit trail for existing objects replication
ReplicateExisting = "replicate:existing"
// ReplicateExistingDelete - audit trail for delete replication triggered for existing delete markers
ReplicateExistingDelete = "replicate:existing:delete"
// ReplicateMRF - audit trail for replication from Most Recent Failures (MRF) queue
ReplicateMRF = "replicate:mrf"
2022-07-12 13:43:32 -04:00
// ReplicateIncoming - audit trail of inline replication
2021-07-01 17:02:44 -04:00
ReplicateIncoming = "replicate:incoming"
2022-07-12 13:43:32 -04:00
// ReplicateIncomingDelete - audit trail of inline replication of deletes.
ReplicateIncomingDelete = "replicate:incoming:delete"
2021-07-01 17:02:44 -04:00
// ReplicateHeal - audit trail for healing of failed/pending replications
ReplicateHeal = "replicate:heal"
2022-07-12 13:43:32 -04:00
// ReplicateHealDelete - audit trail of healing of failed/pending delete replications.
ReplicateHealDelete = "replicate:heal:delete"
2021-06-29 02:58:08 -04:00
)
2020-09-21 16:43:29 -04:00
var (
2021-04-03 12:03:42 -04:00
globalReplicationPool * ReplicationPool
globalReplicationStats * ReplicationStats
2020-09-21 16:43:29 -04:00
)
2020-09-16 19:04:55 -04:00
2021-03-09 05:56:42 -05:00
// ReplicationPool describes replication pool
type ReplicationPool struct {
2022-11-17 18:20:09 -05:00
// atomic ops:
2022-11-17 10:35:02 -05:00
activeWorkers int32
activeMRFWorkers int32
2023-12-07 19:22:00 -05:00
objLayer ObjectLayer
ctx context . Context
priority string
maxWorkers int
mu sync . RWMutex
mrfMU sync . Mutex
resyncer * replicationResyncer
2022-11-17 18:20:09 -05:00
// workers:
2023-09-13 00:59:15 -04:00
workers [ ] chan ReplicationWorkerOperation
lrgworkers [ ] chan ReplicationWorkerOperation
2022-11-17 18:20:09 -05:00
// mrf:
mrfWorkerKillCh chan struct { }
mrfReplicaCh chan ReplicationWorkerOperation
mrfSaveCh chan MRFReplicateEntry
2022-12-22 17:25:13 -05:00
mrfStopCh chan struct { }
2022-11-17 18:20:09 -05:00
mrfWorkerSize int
}
// ReplicationWorkerOperation is a shared interface of replication operations.
type ReplicationWorkerOperation interface {
ToMRFEntry ( ) MRFReplicateEntry
2021-03-09 05:56:42 -05:00
}
2022-09-24 19:20:28 -04:00
const (
// WorkerMaxLimit max number of workers per node for "fast" mode
WorkerMaxLimit = 500
// WorkerMinLimit min number of workers per node for "slow" mode
WorkerMinLimit = 50
// WorkerAutoDefault is default number of workers for "auto" mode
WorkerAutoDefault = 100
// MRFWorkerMaxLimit max number of mrf workers per node for "fast" mode
MRFWorkerMaxLimit = 8
// MRFWorkerMinLimit min number of mrf workers per node for "slow" mode
MRFWorkerMinLimit = 2
// MRFWorkerAutoDefault is default number of mrf workers for "auto" mode
MRFWorkerAutoDefault = 4
2023-07-25 23:02:02 -04:00
// LargeWorkerCount is default number of workers assigned to large uploads ( >= 128MiB)
LargeWorkerCount = 10
2022-09-24 19:20:28 -04:00
)
2021-03-09 05:56:42 -05:00
// NewReplicationPool creates a pool of replication workers of specified size
2021-04-24 00:58:45 -04:00
func NewReplicationPool ( ctx context . Context , o ObjectLayer , opts replicationPoolOpts ) * ReplicationPool {
2022-09-24 19:20:28 -04:00
var workers , failedWorkers int
priority := "auto"
2023-12-07 19:22:00 -05:00
maxWorkers := WorkerMaxLimit
2022-09-24 19:20:28 -04:00
if opts . Priority != "" {
priority = opts . Priority
}
2023-12-07 19:22:00 -05:00
if opts . MaxWorkers > 0 {
maxWorkers = opts . MaxWorkers
}
2022-09-24 19:20:28 -04:00
switch priority {
case "fast" :
workers = WorkerMaxLimit
failedWorkers = MRFWorkerMaxLimit
case "slow" :
workers = WorkerMinLimit
failedWorkers = MRFWorkerMinLimit
default :
workers = WorkerAutoDefault
failedWorkers = MRFWorkerAutoDefault
}
2023-12-07 19:22:00 -05:00
if maxWorkers > 0 && workers > maxWorkers {
workers = maxWorkers
}
2022-11-17 18:20:09 -05:00
2023-12-07 19:22:00 -05:00
if maxWorkers > 0 && failedWorkers > maxWorkers {
failedWorkers = maxWorkers
}
2021-03-09 05:56:42 -05:00
pool := & ReplicationPool {
2022-11-17 18:20:09 -05:00
workers : make ( [ ] chan ReplicationWorkerOperation , 0 , workers ) ,
2023-07-25 23:02:02 -04:00
lrgworkers : make ( [ ] chan ReplicationWorkerOperation , 0 , LargeWorkerCount ) ,
2022-11-17 18:20:09 -05:00
mrfReplicaCh : make ( chan ReplicationWorkerOperation , 100000 ) ,
mrfWorkerKillCh : make ( chan struct { } , failedWorkers ) ,
resyncer : newresyncer ( ) ,
mrfSaveCh : make ( chan MRFReplicateEntry , 100000 ) ,
2022-12-22 17:25:13 -05:00
mrfStopCh : make ( chan struct { } , 1 ) ,
2022-11-17 18:20:09 -05:00
ctx : ctx ,
objLayer : o ,
priority : priority ,
2023-12-07 19:22:00 -05:00
maxWorkers : maxWorkers ,
2022-11-17 18:20:09 -05:00
}
2023-07-25 23:02:02 -04:00
pool . AddLargeWorkers ( )
2022-11-17 18:20:09 -05:00
pool . ResizeWorkers ( workers , 0 )
2022-09-24 19:20:28 -04:00
pool . ResizeFailedWorkers ( failedWorkers )
2022-11-14 10:16:40 -05:00
go pool . resyncer . PersistToDisk ( ctx , o )
2022-08-22 19:53:06 -04:00
go pool . processMRF ( )
go pool . persistMRF ( )
2021-03-09 05:56:42 -05:00
return pool
2020-09-16 19:04:55 -04:00
}
2021-04-03 12:03:42 -04:00
// AddMRFWorker adds a pending/failed replication worker to handle requests that could not be queued
// to the other workers
func ( p * ReplicationPool ) AddMRFWorker ( ) {
for {
select {
case <- p . ctx . Done ( ) :
return
case oi , ok := <- p . mrfReplicaCh :
if ! ok {
return
}
2022-11-17 18:20:09 -05:00
switch v := oi . ( type ) {
case ReplicateObjectInfo :
2023-08-30 04:00:59 -04:00
globalReplicationStats . incQ ( v . Bucket , v . Size , v . DeleteMarker , v . OpType )
2022-11-17 18:20:09 -05:00
atomic . AddInt32 ( & p . activeMRFWorkers , 1 )
replicateObject ( p . ctx , v , p . objLayer )
atomic . AddInt32 ( & p . activeMRFWorkers , - 1 )
2023-08-30 04:00:59 -04:00
globalReplicationStats . decQ ( v . Bucket , v . Size , v . DeleteMarker , v . OpType )
2022-11-17 18:20:09 -05:00
default :
2024-04-04 08:04:40 -04:00
bugLogIf ( p . ctx , fmt . Errorf ( "unknown mrf replication type: %T" , oi ) , "unknown-mrf-replicate-type" )
2022-11-17 18:20:09 -05:00
}
2021-05-28 16:28:37 -04:00
case <- p . mrfWorkerKillCh :
return
2021-04-03 12:03:42 -04:00
}
}
}
2022-11-17 18:20:09 -05:00
// AddWorker adds a replication worker to the pool.
// An optional pointer to a tracker that will be atomically
// incremented when operations are running can be provided.
func ( p * ReplicationPool ) AddWorker ( input <- chan ReplicationWorkerOperation , opTracker * int32 ) {
2021-03-09 05:56:42 -05:00
for {
select {
case <- p . ctx . Done ( ) :
return
2022-11-17 18:20:09 -05:00
case oi , ok := <- input :
2021-03-09 05:56:42 -05:00
if ! ok {
return
}
2022-11-17 18:20:09 -05:00
switch v := oi . ( type ) {
case ReplicateObjectInfo :
if opTracker != nil {
atomic . AddInt32 ( opTracker , 1 )
}
2023-08-30 04:00:59 -04:00
globalReplicationStats . incQ ( v . Bucket , v . Size , v . DeleteMarker , v . OpType )
2022-11-17 18:20:09 -05:00
replicateObject ( p . ctx , v , p . objLayer )
2023-08-30 04:00:59 -04:00
globalReplicationStats . decQ ( v . Bucket , v . Size , v . DeleteMarker , v . OpType )
2022-11-17 18:20:09 -05:00
if opTracker != nil {
atomic . AddInt32 ( opTracker , - 1 )
}
case DeletedObjectReplicationInfo :
if opTracker != nil {
atomic . AddInt32 ( opTracker , 1 )
}
2023-08-30 04:00:59 -04:00
globalReplicationStats . incQ ( v . Bucket , 0 , true , v . OpType )
2022-11-17 18:20:09 -05:00
replicateDelete ( p . ctx , v , p . objLayer )
2023-08-30 04:00:59 -04:00
globalReplicationStats . decQ ( v . Bucket , 0 , true , v . OpType )
2022-11-17 18:20:09 -05:00
if opTracker != nil {
atomic . AddInt32 ( opTracker , - 1 )
}
default :
2024-04-04 08:04:40 -04:00
bugLogIf ( p . ctx , fmt . Errorf ( "unknown replication type: %T" , oi ) , "unknown-replicate-type" )
2021-06-01 22:59:11 -04:00
}
}
}
}
2023-07-25 23:02:02 -04:00
// AddLargeWorkers adds a static number of workers to handle large uploads
func ( p * ReplicationPool ) AddLargeWorkers ( ) {
for i := 0 ; i < LargeWorkerCount ; i ++ {
p . lrgworkers = append ( p . lrgworkers , make ( chan ReplicationWorkerOperation , 100000 ) )
2023-08-10 18:48:42 -04:00
i := i
go p . AddLargeWorker ( p . lrgworkers [ i ] )
2023-07-25 23:02:02 -04:00
}
go func ( ) {
<- p . ctx . Done ( )
for i := 0 ; i < LargeWorkerCount ; i ++ {
2024-01-28 13:04:17 -05:00
xioutil . SafeClose ( p . lrgworkers [ i ] )
2023-07-25 23:02:02 -04:00
}
} ( )
}
2023-08-10 18:48:42 -04:00
// AddLargeWorker adds a replication worker to the static pool for large uploads.
func ( p * ReplicationPool ) AddLargeWorker ( input <- chan ReplicationWorkerOperation ) {
for {
select {
case <- p . ctx . Done ( ) :
return
case oi , ok := <- input :
if ! ok {
return
}
switch v := oi . ( type ) {
case ReplicateObjectInfo :
2023-08-30 04:00:59 -04:00
globalReplicationStats . incQ ( v . Bucket , v . Size , v . DeleteMarker , v . OpType )
2023-08-10 18:48:42 -04:00
replicateObject ( p . ctx , v , p . objLayer )
2023-08-30 04:00:59 -04:00
globalReplicationStats . decQ ( v . Bucket , v . Size , v . DeleteMarker , v . OpType )
2023-08-10 18:48:42 -04:00
case DeletedObjectReplicationInfo :
replicateDelete ( p . ctx , v , p . objLayer )
default :
2024-04-04 08:04:40 -04:00
bugLogIf ( p . ctx , fmt . Errorf ( "unknown replication type: %T" , oi ) , "unknown-replicate-type" )
2023-08-10 18:48:42 -04:00
}
}
}
}
2022-09-24 19:20:28 -04:00
// ActiveWorkers returns the number of active workers handling replication traffic.
func ( p * ReplicationPool ) ActiveWorkers ( ) int {
return int ( atomic . LoadInt32 ( & p . activeWorkers ) )
}
// ActiveMRFWorkers returns the number of active workers handling replication failures.
func ( p * ReplicationPool ) ActiveMRFWorkers ( ) int {
return int ( atomic . LoadInt32 ( & p . activeMRFWorkers ) )
}
2022-11-17 18:20:09 -05:00
// ResizeWorkers sets replication workers pool to new size.
// checkOld can be set to an expected value.
// If the worker count changed
func ( p * ReplicationPool ) ResizeWorkers ( n , checkOld int ) {
2021-03-09 05:56:42 -05:00
p . mu . Lock ( )
defer p . mu . Unlock ( )
2022-11-17 18:20:09 -05:00
if ( checkOld > 0 && len ( p . workers ) != checkOld ) || n == len ( p . workers ) || n < 1 {
// Either already satisfied or worker count changed while we waited for the lock.
return
}
for len ( p . workers ) < n {
input := make ( chan ReplicationWorkerOperation , 10000 )
p . workers = append ( p . workers , input )
go p . AddWorker ( input , & p . activeWorkers )
2021-03-09 05:56:42 -05:00
}
2022-11-17 18:20:09 -05:00
for len ( p . workers ) > n {
worker := p . workers [ len ( p . workers ) - 1 ]
p . workers = p . workers [ : len ( p . workers ) - 1 ]
2024-01-28 13:04:17 -05:00
xioutil . SafeClose ( worker )
2021-04-24 00:58:45 -04:00
}
}
2022-09-24 19:20:28 -04:00
// ResizeWorkerPriority sets replication failed workers pool size
2023-12-07 19:22:00 -05:00
func ( p * ReplicationPool ) ResizeWorkerPriority ( pri string , maxWorkers int ) {
2022-09-24 19:20:28 -04:00
var workers , mrfWorkers int
p . mu . Lock ( )
switch pri {
case "fast" :
workers = WorkerMaxLimit
mrfWorkers = MRFWorkerMaxLimit
case "slow" :
workers = WorkerMinLimit
mrfWorkers = MRFWorkerMinLimit
default :
workers = WorkerAutoDefault
mrfWorkers = MRFWorkerAutoDefault
2022-11-17 18:20:09 -05:00
if len ( p . workers ) < WorkerAutoDefault {
2023-12-28 01:27:04 -05:00
workers = min ( len ( p . workers ) + 1 , WorkerAutoDefault )
2022-09-24 19:20:28 -04:00
}
if p . mrfWorkerSize < MRFWorkerAutoDefault {
2023-12-28 01:27:04 -05:00
mrfWorkers = min ( p . mrfWorkerSize + 1 , MRFWorkerAutoDefault )
2022-09-24 19:20:28 -04:00
}
}
2023-12-07 19:22:00 -05:00
if maxWorkers > 0 && workers > maxWorkers {
workers = maxWorkers
}
if maxWorkers > 0 && mrfWorkers > maxWorkers {
mrfWorkers = maxWorkers
}
2022-09-24 19:20:28 -04:00
p . priority = pri
2023-12-07 19:22:00 -05:00
p . maxWorkers = maxWorkers
2022-09-24 19:20:28 -04:00
p . mu . Unlock ( )
2022-11-17 18:20:09 -05:00
p . ResizeWorkers ( workers , 0 )
2022-09-24 19:20:28 -04:00
p . ResizeFailedWorkers ( mrfWorkers )
}
2021-04-24 00:58:45 -04:00
// ResizeFailedWorkers sets replication failed workers pool size
func ( p * ReplicationPool ) ResizeFailedWorkers ( n int ) {
p . mu . Lock ( )
defer p . mu . Unlock ( )
for p . mrfWorkerSize < n {
p . mrfWorkerSize ++
go p . AddMRFWorker ( )
}
for p . mrfWorkerSize > n {
p . mrfWorkerSize --
go func ( ) { p . mrfWorkerKillCh <- struct { } { } } ( )
2021-03-09 05:56:42 -05:00
}
}
2023-07-25 23:02:02 -04:00
const (
minLargeObjSize = 128 * humanize . MiByte // 128MiB
)
2022-11-17 18:20:09 -05:00
// getWorkerCh gets a worker channel deterministically based on bucket and object names.
// Must be able to grab read lock from p.
2023-07-25 23:02:02 -04:00
func ( p * ReplicationPool ) getWorkerCh ( bucket , object string , sz int64 ) chan <- ReplicationWorkerOperation {
2022-11-17 18:20:09 -05:00
h := xxh3 . HashString ( bucket + object )
p . mu . RLock ( )
defer p . mu . RUnlock ( )
if len ( p . workers ) == 0 {
return nil
}
return p . workers [ h % uint64 ( len ( p . workers ) ) ]
}
2021-04-29 21:20:39 -04:00
func ( p * ReplicationPool ) queueReplicaTask ( ri ReplicateObjectInfo ) {
if p == nil {
return
}
2023-07-25 23:02:02 -04:00
// if object is large, queue it to a static set of large workers
if ri . Size >= int64 ( minLargeObjSize ) {
h := xxh3 . HashString ( ri . Bucket + ri . Name )
select {
case <- p . ctx . Done ( ) :
case p . lrgworkers [ h % LargeWorkerCount ] <- ri :
default :
globalReplicationPool . queueMRFSave ( ri . ToMRFEntry ( ) )
}
return
}
2022-11-17 18:20:09 -05:00
var ch , healCh chan <- ReplicationWorkerOperation
2021-06-01 22:59:11 -04:00
switch ri . OpType {
2023-09-13 00:59:15 -04:00
case replication . HealReplicationType , replication . ExistingObjectReplicationType :
2022-08-22 19:53:06 -04:00
ch = p . mrfReplicaCh
2023-07-25 23:02:02 -04:00
healCh = p . getWorkerCh ( ri . Name , ri . Bucket , ri . Size )
2021-06-01 22:59:11 -04:00
default :
2023-07-25 23:02:02 -04:00
ch = p . getWorkerCh ( ri . Name , ri . Bucket , ri . Size )
2021-06-01 22:59:11 -04:00
}
2022-11-17 18:20:09 -05:00
if ch == nil && healCh == nil {
return
}
2021-04-29 21:20:39 -04:00
select {
2022-11-17 18:20:09 -05:00
case <- p . ctx . Done ( ) :
2022-08-22 19:53:06 -04:00
case healCh <- ri :
2021-06-01 22:59:11 -04:00
case ch <- ri :
2021-04-29 21:20:39 -04:00
default :
2022-09-12 15:40:02 -04:00
globalReplicationPool . queueMRFSave ( ri . ToMRFEntry ( ) )
2022-09-24 19:20:28 -04:00
p . mu . RLock ( )
2022-11-17 10:35:02 -05:00
prio := p . priority
2023-12-07 19:22:00 -05:00
maxWorkers := p . maxWorkers
2022-11-17 10:35:02 -05:00
p . mu . RUnlock ( )
switch prio {
2022-09-24 19:20:28 -04:00
case "fast" :
2024-04-04 08:04:40 -04:00
replLogOnceIf ( GlobalContext , fmt . Errorf ( "Unable to keep up with incoming traffic" ) , string ( replicationSubsystem ) , logger . WarningKind )
2022-09-24 19:20:28 -04:00
case "slow" :
2024-04-04 08:04:40 -04:00
replLogOnceIf ( GlobalContext , fmt . Errorf ( "Unable to keep up with incoming traffic - we recommend increasing replication priority with `mc admin config set api replication_priority=auto`" ) , string ( replicationSubsystem ) , logger . WarningKind )
2022-09-24 19:20:28 -04:00
default :
2023-12-28 01:27:04 -05:00
maxWorkers = min ( maxWorkers , WorkerMaxLimit )
2023-12-07 19:22:00 -05:00
if p . ActiveWorkers ( ) < maxWorkers {
2022-11-17 10:35:02 -05:00
p . mu . RLock ( )
2023-12-28 01:27:04 -05:00
workers := min ( len ( p . workers ) + 1 , maxWorkers )
2022-11-17 18:20:09 -05:00
existing := len ( p . workers )
2022-11-17 10:35:02 -05:00
p . mu . RUnlock ( )
2022-11-17 18:20:09 -05:00
p . ResizeWorkers ( workers , existing )
2022-09-24 19:20:28 -04:00
}
2023-12-28 01:27:04 -05:00
maxMRFWorkers := min ( maxWorkers , MRFWorkerMaxLimit )
2023-12-07 19:22:00 -05:00
if p . ActiveMRFWorkers ( ) < maxMRFWorkers {
2022-11-17 10:35:02 -05:00
p . mu . RLock ( )
2023-12-28 01:27:04 -05:00
workers := min ( p . mrfWorkerSize + 1 , maxMRFWorkers )
2022-11-17 10:35:02 -05:00
p . mu . RUnlock ( )
2022-09-24 19:20:28 -04:00
p . ResizeFailedWorkers ( workers )
}
}
2021-04-29 21:20:39 -04:00
}
}
2021-09-18 16:31:35 -04:00
func queueReplicateDeletesWrapper ( doi DeletedObjectReplicationInfo , existingObjectResync ResyncDecision ) {
for k , v := range existingObjectResync . targets {
if v . Replicate {
doi . ResetID = v . ResetID
doi . TargetArn = k
globalReplicationPool . queueReplicaDeleteTask ( doi )
}
}
}
2021-06-01 22:59:11 -04:00
func ( p * ReplicationPool ) queueReplicaDeleteTask ( doi DeletedObjectReplicationInfo ) {
2021-03-09 05:56:42 -05:00
if p == nil {
return
2020-09-21 16:43:29 -04:00
}
2022-11-17 18:20:09 -05:00
var ch chan <- ReplicationWorkerOperation
2021-06-01 22:59:11 -04:00
switch doi . OpType {
2023-09-13 00:59:15 -04:00
case replication . HealReplicationType , replication . ExistingObjectReplicationType :
2021-07-01 17:02:44 -04:00
fallthrough
2021-06-01 22:59:11 -04:00
default :
2023-07-25 23:02:02 -04:00
ch = p . getWorkerCh ( doi . Bucket , doi . ObjectName , 0 )
2021-06-01 22:59:11 -04:00
}
2021-03-09 05:56:42 -05:00
select {
2022-11-17 18:20:09 -05:00
case <- p . ctx . Done ( ) :
2021-06-01 22:59:11 -04:00
case ch <- doi :
2021-03-09 05:56:42 -05:00
default :
2022-09-12 15:40:02 -04:00
globalReplicationPool . queueMRFSave ( doi . ToMRFEntry ( ) )
2022-09-24 19:20:28 -04:00
p . mu . RLock ( )
2022-11-17 10:35:02 -05:00
prio := p . priority
2023-12-07 19:22:00 -05:00
maxWorkers := p . maxWorkers
2022-11-17 10:35:02 -05:00
p . mu . RUnlock ( )
switch prio {
2022-09-24 19:20:28 -04:00
case "fast" :
2024-04-04 08:04:40 -04:00
replLogOnceIf ( GlobalContext , fmt . Errorf ( "Unable to keep up with incoming deletes" ) , string ( replicationSubsystem ) , logger . WarningKind )
2022-09-24 19:20:28 -04:00
case "slow" :
2024-04-04 08:04:40 -04:00
replLogOnceIf ( GlobalContext , fmt . Errorf ( "Unable to keep up with incoming deletes - we recommend increasing replication priority with `mc admin config set api replication_priority=auto`" ) , string ( replicationSubsystem ) , logger . WarningKind )
2022-09-24 19:20:28 -04:00
default :
2023-12-28 01:27:04 -05:00
maxWorkers = min ( maxWorkers , WorkerMaxLimit )
2023-12-07 19:22:00 -05:00
if p . ActiveWorkers ( ) < maxWorkers {
2022-11-17 10:35:02 -05:00
p . mu . RLock ( )
2023-12-28 01:27:04 -05:00
workers := min ( len ( p . workers ) + 1 , maxWorkers )
2022-11-17 18:20:09 -05:00
existing := len ( p . workers )
2022-11-17 10:35:02 -05:00
p . mu . RUnlock ( )
2022-11-17 18:20:09 -05:00
p . ResizeWorkers ( workers , existing )
2022-09-24 19:20:28 -04:00
}
}
2021-03-09 05:56:42 -05:00
}
}
2021-04-24 00:58:45 -04:00
type replicationPoolOpts struct {
2023-12-07 19:22:00 -05:00
Priority string
MaxWorkers int
2021-04-24 00:58:45 -04:00
}
2021-03-09 05:56:42 -05:00
func initBackgroundReplication ( ctx context . Context , objectAPI ObjectLayer ) {
2023-12-07 19:22:00 -05:00
globalReplicationPool = NewReplicationPool ( ctx , objectAPI , globalAPIConfig . getReplicationOpts ( ) )
2021-04-03 12:03:42 -04:00
globalReplicationStats = NewReplicationStats ( ctx , objectAPI )
2023-08-30 04:00:59 -04:00
go globalReplicationStats . trackEWMA ( )
2020-09-21 16:43:29 -04:00
}
2021-01-12 01:36:51 -05:00
2022-03-08 16:58:55 -05:00
type proxyResult struct {
Proxy bool
Err error
}
2021-01-12 01:36:51 -05:00
// get Reader from replication target if active-active replication is in place and
// this node returns a 404
2024-06-10 11:31:51 -04:00
func proxyGetToReplicationTarget ( ctx context . Context , bucket , object string , rs * HTTPRangeSpec , h http . Header , opts ObjectOptions , proxyTargets * madmin . BucketTargets ) ( gr * GetObjectReader , proxy proxyResult , err error ) {
2022-03-08 16:58:55 -05:00
tgt , oi , proxy := proxyHeadToRepTarget ( ctx , bucket , object , rs , opts , proxyTargets )
if ! proxy . Proxy {
return nil , proxy , nil
2021-01-12 01:36:51 -05:00
}
2024-06-10 11:31:51 -04:00
fn , _ , _ , err := NewGetObjectReader ( nil , oi , opts , h )
2021-01-12 01:36:51 -05:00
if err != nil {
2022-03-08 16:58:55 -05:00
return nil , proxy , err
2021-01-12 01:36:51 -05:00
}
2022-12-05 14:18:50 -05:00
gopts := minio . GetObjectOptions {
2021-01-12 01:36:51 -05:00
VersionID : opts . VersionID ,
ServerSideEncryption : opts . ServerSideEncryption ,
2022-12-05 14:18:50 -05:00
Internal : minio . AdvancedGetOptions {
2021-01-27 14:22:34 -05:00
ReplicationProxyRequest : "true" ,
2021-01-12 01:36:51 -05:00
} ,
2022-03-08 16:58:55 -05:00
PartNumber : opts . PartNumber ,
2021-01-12 01:36:51 -05:00
}
// get correct offsets for encrypted object
2022-03-08 16:58:55 -05:00
if rs != nil {
h , err := rs . ToHeader ( )
if err != nil {
return nil , proxy , err
2021-01-12 01:36:51 -05:00
}
2022-03-08 16:58:55 -05:00
gopts . Set ( xhttp . Range , h )
2021-01-12 01:36:51 -05:00
}
2021-02-03 23:41:33 -05:00
// Make sure to match ETag when proxying.
if err = gopts . SetMatchETag ( oi . ETag ) ; err != nil {
2022-03-08 16:58:55 -05:00
return nil , proxy , err
2021-02-03 23:41:33 -05:00
}
2022-12-05 14:18:50 -05:00
c := minio . Core { Client : tgt . Client }
2023-01-27 13:24:51 -05:00
obj , _ , h , err := c . GetObject ( ctx , tgt . Bucket , object , gopts )
2021-01-12 01:36:51 -05:00
if err != nil {
2022-03-08 16:58:55 -05:00
return nil , proxy , err
2021-01-12 01:36:51 -05:00
}
closeReader := func ( ) { obj . Close ( ) }
2021-06-24 12:44:00 -04:00
reader , err := fn ( obj , h , closeReader )
2021-01-12 01:36:51 -05:00
if err != nil {
2022-03-08 16:58:55 -05:00
return nil , proxy , err
2021-01-12 01:36:51 -05:00
}
2021-02-10 20:25:04 -05:00
reader . ObjInfo = oi . Clone ( )
2022-03-08 16:58:55 -05:00
if rs != nil {
contentSize , err := parseSizeFromContentRange ( h )
if err != nil {
return nil , proxy , err
}
reader . ObjInfo . Size = contentSize
}
return reader , proxyResult { Proxy : true } , nil
2021-01-12 01:36:51 -05:00
}
2022-05-08 19:50:31 -04:00
func getProxyTargets ( ctx context . Context , bucket , object string , opts ObjectOptions ) ( tgts * madmin . BucketTargets ) {
if opts . VersionSuspended {
return & madmin . BucketTargets { }
}
2024-04-20 05:05:54 -04:00
if opts . ProxyRequest || ( opts . ProxyHeaderSet && ! opts . ProxyRequest ) {
2022-10-13 19:43:36 -04:00
return & madmin . BucketTargets { }
}
2021-01-12 01:36:51 -05:00
cfg , err := getReplicationConfig ( ctx , bucket )
2021-09-18 16:31:35 -04:00
if err != nil || cfg == nil {
2024-04-22 13:49:30 -04:00
replLogOnceIf ( ctx , err , bucket )
2021-09-18 16:31:35 -04:00
return & madmin . BucketTargets { }
}
topts := replication . ObjectOpts { Name : object }
tgtArns := cfg . FilterTargetArns ( topts )
tgts = & madmin . BucketTargets { Targets : make ( [ ] madmin . BucketTarget , len ( tgtArns ) ) }
for i , tgtArn := range tgtArns {
tgt := globalBucketTargetSys . GetRemoteBucketTargetByArn ( ctx , bucket , tgtArn )
tgts . Targets [ i ] = tgt
2021-01-12 01:36:51 -05:00
}
2021-09-18 16:31:35 -04:00
return tgts
2021-01-12 01:36:51 -05:00
}
2021-01-27 14:22:34 -05:00
2022-03-08 16:58:55 -05:00
func proxyHeadToRepTarget ( ctx context . Context , bucket , object string , rs * HTTPRangeSpec , opts ObjectOptions , proxyTargets * madmin . BucketTargets ) ( tgt * TargetClient , oi ObjectInfo , proxy proxyResult ) {
2021-01-12 01:36:51 -05:00
// this option is set when active-active replication is in place between site A -> B,
// and site B does not have the object yet.
2024-04-20 05:05:54 -04:00
if opts . ProxyRequest || ( opts . ProxyHeaderSet && ! opts . ProxyRequest ) { // true only when site B sets MinIOSourceProxyRequest header
2022-03-08 16:58:55 -05:00
return nil , oi , proxy
2021-01-12 01:36:51 -05:00
}
2024-02-06 01:00:45 -05:00
var perr error
2021-09-18 16:31:35 -04:00
for _ , t := range proxyTargets . Targets {
2023-10-28 00:08:53 -04:00
tgt = globalBucketTargetSys . GetRemoteTargetClient ( bucket , t . Arn )
2022-08-16 20:46:22 -04:00
if tgt == nil || globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
2021-09-18 16:31:35 -04:00
continue
}
// if proxying explicitly disabled on remote target
if tgt . disableProxy {
continue
}
2021-01-12 01:36:51 -05:00
2022-12-05 14:18:50 -05:00
gopts := minio . GetObjectOptions {
2021-09-18 16:31:35 -04:00
VersionID : opts . VersionID ,
ServerSideEncryption : opts . ServerSideEncryption ,
2022-12-05 14:18:50 -05:00
Internal : minio . AdvancedGetOptions {
2021-09-18 16:31:35 -04:00
ReplicationProxyRequest : "true" ,
} ,
2022-03-08 16:58:55 -05:00
PartNumber : opts . PartNumber ,
2021-09-18 16:31:35 -04:00
}
2022-03-08 16:58:55 -05:00
if rs != nil {
h , err := rs . ToHeader ( )
if err != nil {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "invalid range header for %s/%s(%s) - %w" , bucket , object , opts . VersionID , err ) )
2022-03-08 16:58:55 -05:00
continue
}
gopts . Set ( xhttp . Range , h )
}
2021-09-18 16:31:35 -04:00
objInfo , err := tgt . StatObject ( ctx , t . TargetBucket , object , gopts )
if err != nil {
2024-02-06 01:00:45 -05:00
perr = err
2022-03-08 16:58:55 -05:00
if isErrInvalidRange ( ErrorRespToObjectError ( err , bucket , object ) ) {
return nil , oi , proxyResult { Err : err }
}
2021-09-18 16:31:35 -04:00
continue
}
tags , _ := tags . MapToObjectTags ( objInfo . UserTags )
oi = ObjectInfo {
Bucket : bucket ,
Name : object ,
ModTime : objInfo . LastModified ,
Size : objInfo . Size ,
ETag : objInfo . ETag ,
VersionID : objInfo . VersionID ,
IsLatest : objInfo . IsLatest ,
DeleteMarker : objInfo . IsDeleteMarker ,
ContentType : objInfo . ContentType ,
Expires : objInfo . Expires ,
StorageClass : objInfo . StorageClass ,
ReplicationStatusInternal : objInfo . ReplicationStatus ,
UserTags : tags . String ( ) ,
2023-06-17 10:30:53 -04:00
ReplicationStatus : replication . StatusType ( objInfo . ReplicationStatus ) ,
2021-09-18 16:31:35 -04:00
}
oi . UserDefined = make ( map [ string ] string , len ( objInfo . Metadata ) )
for k , v := range objInfo . Metadata {
oi . UserDefined [ k ] = v [ 0 ]
}
ce , ok := oi . UserDefined [ xhttp . ContentEncoding ]
if ! ok {
ce , ok = oi . UserDefined [ strings . ToLower ( xhttp . ContentEncoding ) ]
}
if ok {
oi . ContentEncoding = ce
}
2022-03-08 16:58:55 -05:00
return tgt , oi , proxyResult { Proxy : true }
2021-01-12 01:36:51 -05:00
}
2024-02-06 01:00:45 -05:00
proxy . Err = perr
2022-03-08 16:58:55 -05:00
return nil , oi , proxy
2021-01-12 01:36:51 -05:00
}
// get object info from replication target if active-active replication is in place and
// this node returns a 404
2022-03-08 16:58:55 -05:00
func proxyHeadToReplicationTarget ( ctx context . Context , bucket , object string , rs * HTTPRangeSpec , opts ObjectOptions , proxyTargets * madmin . BucketTargets ) ( oi ObjectInfo , proxy proxyResult ) {
_ , oi , proxy = proxyHeadToRepTarget ( ctx , bucket , object , rs , opts , proxyTargets )
2021-09-18 16:31:35 -04:00
return oi , proxy
2021-01-12 01:36:51 -05:00
}
2023-09-16 05:28:06 -04:00
func scheduleReplication ( ctx context . Context , oi ObjectInfo , o ObjectLayer , dsc ReplicateDecision , opType replication . Type ) {
tgtStatuses := replicationStatusesMap ( oi . ReplicationStatusInternal )
purgeStatuses := versionPurgeStatusesMap ( oi . VersionPurgeStatusInternal )
tm , _ := time . Parse ( time . RFC3339Nano , oi . UserDefined [ ReservedMetadataPrefixLower + ReplicationTimestamp ] )
rstate := oi . ReplicationState ( )
rstate . ReplicateDecisionStr = dsc . String ( )
asz , _ := oi . GetActualSize ( )
ri := ReplicateObjectInfo {
Name : oi . Name ,
Size : oi . Size ,
ActualSize : asz ,
Bucket : oi . Bucket ,
VersionID : oi . VersionID ,
2023-09-19 06:18:28 -04:00
ETag : oi . ETag ,
2023-09-16 05:28:06 -04:00
ModTime : oi . ModTime ,
ReplicationStatus : oi . ReplicationStatus ,
ReplicationStatusInternal : oi . ReplicationStatusInternal ,
DeleteMarker : oi . DeleteMarker ,
VersionPurgeStatusInternal : oi . VersionPurgeStatusInternal ,
VersionPurgeStatus : oi . VersionPurgeStatus ,
ReplicationState : rstate ,
OpType : opType ,
Dsc : dsc ,
TargetStatuses : tgtStatuses ,
TargetPurgeStatuses : purgeStatuses ,
ReplicationTimestamp : tm ,
2023-09-19 06:18:28 -04:00
SSEC : crypto . SSEC . IsEncrypted ( oi . UserDefined ) ,
UserTags : oi . UserTags ,
2023-09-16 05:28:06 -04:00
}
2024-06-10 11:31:51 -04:00
if ri . SSEC {
ri . Checksum = oi . Checksum
}
2021-09-18 16:31:35 -04:00
if dsc . Synchronous ( ) {
2022-07-12 13:43:32 -04:00
replicateObject ( ctx , ri , o )
2021-01-12 01:36:51 -05:00
} else {
2022-07-12 13:43:32 -04:00
globalReplicationPool . queueReplicaTask ( ri )
2021-04-03 12:03:42 -04:00
}
2021-01-12 01:36:51 -05:00
}
2024-01-13 02:51:33 -05:00
// proxyTaggingToRepTarget proxies tagging requests to remote targets for
// active-active replicated setups
func proxyTaggingToRepTarget ( ctx context . Context , bucket , object string , tags * tags . Tags , opts ObjectOptions , proxyTargets * madmin . BucketTargets ) ( proxy proxyResult ) {
// this option is set when active-active replication is in place between site A -> B,
// and request hits site B that does not have the object yet.
2024-04-20 05:05:54 -04:00
if opts . ProxyRequest || ( opts . ProxyHeaderSet && ! opts . ProxyRequest ) { // true only when site B sets MinIOSourceProxyRequest header
2024-01-13 02:51:33 -05:00
return proxy
}
var wg sync . WaitGroup
errs := make ( [ ] error , len ( proxyTargets . Targets ) )
for idx , t := range proxyTargets . Targets {
tgt := globalBucketTargetSys . GetRemoteTargetClient ( bucket , t . Arn )
if tgt == nil || globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
continue
}
// if proxying explicitly disabled on remote target
if tgt . disableProxy {
continue
}
idx := idx
wg . Add ( 1 )
go func ( idx int , tgt * TargetClient ) {
defer wg . Done ( )
var err error
if tags != nil {
popts := minio . PutObjectTaggingOptions {
VersionID : opts . VersionID ,
Internal : minio . AdvancedObjectTaggingOptions {
ReplicationProxyRequest : "true" ,
} ,
}
err = tgt . PutObjectTagging ( ctx , tgt . Bucket , object , tags , popts )
} else {
dopts := minio . RemoveObjectTaggingOptions {
VersionID : opts . VersionID ,
Internal : minio . AdvancedObjectTaggingOptions {
ReplicationProxyRequest : "true" ,
} ,
}
err = tgt . RemoveObjectTagging ( ctx , tgt . Bucket , object , dopts )
}
if err != nil {
errs [ idx ] = err
}
} ( idx , tgt )
}
wg . Wait ( )
var (
terr error
taggedCount int
)
for _ , err := range errs {
if err == nil {
taggedCount ++
continue
}
2024-02-11 21:18:43 -05:00
if err != nil {
2024-01-13 02:51:33 -05:00
terr = err
}
}
// don't return error if at least one target was tagged successfully
if taggedCount == 0 && terr != nil {
proxy . Err = terr
}
return proxy
}
// proxyGetTaggingToRepTarget proxies get tagging requests to remote targets for
// active-active replicated setups
func proxyGetTaggingToRepTarget ( ctx context . Context , bucket , object string , opts ObjectOptions , proxyTargets * madmin . BucketTargets ) ( tgs * tags . Tags , proxy proxyResult ) {
// this option is set when active-active replication is in place between site A -> B,
// and request hits site B that does not have the object yet.
2024-04-20 05:05:54 -04:00
if opts . ProxyRequest || ( opts . ProxyHeaderSet && ! opts . ProxyRequest ) { // true only when site B sets MinIOSourceProxyRequest header
2024-01-13 02:51:33 -05:00
return nil , proxy
}
var wg sync . WaitGroup
errs := make ( [ ] error , len ( proxyTargets . Targets ) )
tagSlc := make ( [ ] map [ string ] string , len ( proxyTargets . Targets ) )
for idx , t := range proxyTargets . Targets {
tgt := globalBucketTargetSys . GetRemoteTargetClient ( bucket , t . Arn )
if tgt == nil || globalBucketTargetSys . isOffline ( tgt . EndpointURL ( ) ) {
continue
}
// if proxying explicitly disabled on remote target
if tgt . disableProxy {
continue
}
idx := idx
wg . Add ( 1 )
go func ( idx int , tgt * TargetClient ) {
defer wg . Done ( )
var err error
gopts := minio . GetObjectTaggingOptions {
VersionID : opts . VersionID ,
Internal : minio . AdvancedObjectTaggingOptions {
ReplicationProxyRequest : "true" ,
} ,
}
tgs , err = tgt . GetObjectTagging ( ctx , tgt . Bucket , object , gopts )
if err != nil {
errs [ idx ] = err
} else {
tagSlc [ idx ] = tgs . ToMap ( )
}
} ( idx , tgt )
}
wg . Wait ( )
for idx , err := range errs {
errCode := minio . ToErrorResponse ( err ) . Code
if err != nil && errCode != "NoSuchKey" && errCode != "NoSuchVersion" {
return nil , proxyResult { Err : err }
}
if err == nil {
tgs , _ = tags . MapToObjectTags ( tagSlc [ idx ] )
}
}
2024-02-11 21:18:43 -05:00
if len ( errs ) == 1 {
proxy . Err = errs [ 0 ]
}
2024-01-13 02:51:33 -05:00
return tgs , proxy
}
2021-09-18 16:31:35 -04:00
func scheduleReplicationDelete ( ctx context . Context , dv DeletedObjectReplicationInfo , o ObjectLayer ) {
2021-04-29 21:20:39 -04:00
globalReplicationPool . queueReplicaDeleteTask ( dv )
2021-09-18 16:31:35 -04:00
for arn := range dv . ReplicationState . Targets {
2023-08-30 04:00:59 -04:00
globalReplicationStats . Update ( dv . Bucket , replicatedTargetInfo { Arn : arn , Size : 0 , Duration : 0 , OpType : replication . DeleteReplicationType } , replication . Pending , replication . StatusType ( "" ) )
2021-09-18 16:31:35 -04:00
}
2021-01-12 01:36:51 -05:00
}
2021-06-01 22:59:11 -04:00
type replicationConfig struct {
2021-09-18 16:31:35 -04:00
Config * replication . Config
remotes * madmin . BucketTargets
2021-06-01 22:59:11 -04:00
}
func ( c replicationConfig ) Empty ( ) bool {
return c . Config == nil
}
2022-01-02 12:15:06 -05:00
2021-06-01 22:59:11 -04:00
func ( c replicationConfig ) Replicate ( opts replication . ObjectOpts ) bool {
return c . Config . Replicate ( opts )
}
// Resync returns true if replication reset is requested
2023-09-16 05:28:06 -04:00
func ( c replicationConfig ) Resync ( ctx context . Context , oi ObjectInfo , dsc ReplicateDecision , tgtStatuses map [ string ] replication . StatusType ) ( r ResyncDecision ) {
2021-06-01 22:59:11 -04:00
if c . Empty ( ) {
2021-09-18 16:31:35 -04:00
return
2021-06-01 22:59:11 -04:00
}
2021-09-18 16:31:35 -04:00
// Now overlay existing object replication choices for target
2021-06-01 22:59:11 -04:00
if oi . DeleteMarker {
2021-09-18 16:31:35 -04:00
opts := replication . ObjectOpts {
2021-06-01 22:59:11 -04:00
Name : oi . Name ,
DeleteMarker : oi . DeleteMarker ,
VersionID : oi . VersionID ,
OpType : replication . DeleteReplicationType ,
2022-01-02 12:15:06 -05:00
ExistingObject : true ,
}
2021-09-18 16:31:35 -04:00
tgtArns := c . Config . FilterTargetArns ( opts )
// indicates no matching target with Existing object replication enabled.
if len ( tgtArns ) == 0 {
return
2021-06-01 22:59:11 -04:00
}
2021-09-18 16:31:35 -04:00
for _ , t := range tgtArns {
opts . TargetArn = t
// Update replication decision for target based on existing object replciation rule.
dsc . Set ( newReplicateTargetDecision ( t , c . Replicate ( opts ) , false ) )
}
return c . resync ( oi , dsc , tgtStatuses )
}
// Ignore previous replication status when deciding if object can be re-replicated
2023-09-16 05:28:06 -04:00
userDefined := cloneMSS ( oi . UserDefined )
delete ( userDefined , xhttp . AmzBucketReplicationStatus )
rdsc := mustReplicate ( ctx , oi . Bucket , oi . Name , getMustReplicateOptions ( userDefined , oi . UserTags , "" , replication . ExistingObjectReplicationType , ObjectOptions { } ) )
return c . resync ( oi , rdsc , tgtStatuses )
2021-06-01 22:59:11 -04:00
}
// wrapper function for testability. Returns true if a new reset is requested on
// already replicated objects OR object qualifies for existing object replication
// and no reset requested.
2023-09-16 05:28:06 -04:00
func ( c replicationConfig ) resync ( oi ObjectInfo , dsc ReplicateDecision , tgtStatuses map [ string ] replication . StatusType ) ( r ResyncDecision ) {
2021-09-18 16:31:35 -04:00
r = ResyncDecision {
2023-09-16 05:28:06 -04:00
targets : make ( map [ string ] ResyncTargetDecision , len ( dsc . targetsMap ) ) ,
2021-09-18 16:31:35 -04:00
}
if c . remotes == nil {
return
}
for _ , tgt := range c . remotes . Targets {
d , ok := dsc . targetsMap [ tgt . Arn ]
if ! ok {
continue
}
if ! d . Replicate {
continue
}
r . targets [ d . Arn ] = resyncTarget ( oi , tgt . Arn , tgt . ResetID , tgt . ResetBeforeDate , tgtStatuses [ tgt . Arn ] )
}
return
}
func targetResetHeader ( arn string ) string {
return fmt . Sprintf ( "%s-%s" , ReservedMetadataPrefixLower + ReplicationReset , arn )
}
func resyncTarget ( oi ObjectInfo , arn string , resetID string , resetBeforeDate time . Time , tgtStatus replication . StatusType ) ( rd ResyncTargetDecision ) {
rd = ResyncTargetDecision {
ResetID : resetID ,
ResetBeforeDate : resetBeforeDate ,
}
rs , ok := oi . UserDefined [ targetResetHeader ( arn ) ]
if ! ok {
2021-11-16 12:28:29 -05:00
rs , ok = oi . UserDefined [ xhttp . MinIOReplicationResetStatus ] // for backward compatibility
2021-06-01 22:59:11 -04:00
}
if ! ok { // existing object replication is enabled and object version is unreplicated so far.
2021-09-18 16:31:35 -04:00
if resetID != "" && oi . ModTime . Before ( resetBeforeDate ) { // trigger replication if `mc replicate reset` requested
rd . Replicate = true
return
2021-06-01 22:59:11 -04:00
}
2021-09-18 16:31:35 -04:00
// For existing object reset - this condition is needed
rd . Replicate = tgtStatus == ""
return
2021-06-01 22:59:11 -04:00
}
2021-09-18 16:31:35 -04:00
if resetID == "" || resetBeforeDate . Equal ( timeSentinel ) { // no reset in progress
return
2021-06-01 22:59:11 -04:00
}
2021-09-18 16:31:35 -04:00
2021-06-01 22:59:11 -04:00
// if already replicated, return true if a new reset was requested.
splits := strings . SplitN ( rs , ";" , 2 )
2021-09-18 16:31:35 -04:00
if len ( splits ) != 2 {
return
}
newReset := splits [ 1 ] != resetID
if ! newReset && tgtStatus == replication . Completed {
2021-06-01 22:59:11 -04:00
// already replicated and no reset requested
2021-09-18 16:31:35 -04:00
return
2021-06-01 22:59:11 -04:00
}
2021-09-18 16:31:35 -04:00
rd . Replicate = newReset && oi . ModTime . Before ( resetBeforeDate )
return
2021-06-01 22:59:11 -04:00
}
2021-11-19 17:46:14 -05:00
2022-07-12 13:43:32 -04:00
const resyncTimeInterval = time . Minute * 1
2022-02-10 13:16:52 -05:00
2022-11-14 10:16:40 -05:00
// PersistToDisk persists in-memory resync metadata stats to disk at periodic intervals
func ( s * replicationResyncer ) PersistToDisk ( ctx context . Context , objectAPI ObjectLayer ) {
2022-02-10 13:16:52 -05:00
resyncTimer := time . NewTimer ( resyncTimeInterval )
defer resyncTimer . Stop ( )
2022-09-01 19:53:36 -04:00
// For each bucket name, store the last timestamp of the
// successful save of replication status in the backend disks.
lastResyncStatusSave := make ( map [ string ] time . Time )
2022-02-10 13:16:52 -05:00
for {
select {
case <- resyncTimer . C :
2022-11-14 10:16:40 -05:00
s . RLock ( )
for bucket , brs := range s . statusMap {
2022-02-10 13:16:52 -05:00
var updt bool
2022-09-01 19:53:36 -04:00
// Save the replication status if one resync to any bucket target is still not finished
2022-02-10 13:16:52 -05:00
for _ , st := range brs . TargetsMap {
2022-11-14 10:16:40 -05:00
if st . LastUpdate . Equal ( timeSentinel ) {
2022-02-10 13:16:52 -05:00
updt = true
break
}
}
2022-09-01 19:53:36 -04:00
// Save the replication status if a new stats update is found and not saved in the backend yet
if brs . LastUpdate . After ( lastResyncStatusSave [ bucket ] ) {
updt = true
}
2022-02-10 13:16:52 -05:00
if updt {
if err := saveResyncStatus ( ctx , bucket , brs , objectAPI ) ; err != nil {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "could not save resync metadata to drive for %s - %w" , bucket , err ) )
2022-09-01 19:53:36 -04:00
} else {
lastResyncStatusSave [ bucket ] = brs . LastUpdate
2022-02-10 13:16:52 -05:00
}
}
}
2022-11-14 10:16:40 -05:00
s . RUnlock ( )
2022-05-18 01:42:59 -04:00
resyncTimer . Reset ( resyncTimeInterval )
2022-02-10 13:16:52 -05:00
case <- ctx . Done ( ) :
// server could be restarting - need
// to exit immediately
return
}
}
}
2023-02-24 15:07:34 -05:00
const (
resyncWorkerCnt = 10 // limit of number of bucket resyncs is progress at any given time
resyncParallelRoutines = 10 // number of parallel resync ops per bucket
)
2022-11-14 10:16:40 -05:00
func newresyncer ( ) * replicationResyncer {
rs := replicationResyncer {
statusMap : make ( map [ string ] BucketReplicationResyncStatus ) ,
workerSize : resyncWorkerCnt ,
resyncCancelCh : make ( chan struct { } , resyncWorkerCnt ) ,
workerCh : make ( chan struct { } , resyncWorkerCnt ) ,
}
for i := 0 ; i < rs . workerSize ; i ++ {
rs . workerCh <- struct { } { }
}
return & rs
}
2023-02-24 15:07:34 -05:00
// mark status of replication resync on remote target for the bucket
2023-10-13 20:03:34 -04:00
func ( s * replicationResyncer ) markStatus ( status ResyncStatusType , opts resyncOpts , objAPI ObjectLayer ) {
2023-02-24 15:07:34 -05:00
s . Lock ( )
defer s . Unlock ( )
m := s . statusMap [ opts . bucket ]
st := m . TargetsMap [ opts . arn ]
st . LastUpdate = UTCNow ( )
st . ResyncStatus = status
m . TargetsMap [ opts . arn ] = st
m . LastUpdate = UTCNow ( )
s . statusMap [ opts . bucket ] = m
2023-10-13 20:03:34 -04:00
ctx , cancel := context . WithTimeout ( context . Background ( ) , time . Second )
defer cancel ( )
saveResyncStatus ( ctx , opts . bucket , m , objAPI )
2023-02-24 15:07:34 -05:00
}
// update replication resync stats for bucket's remote target
func ( s * replicationResyncer ) incStats ( ts TargetReplicationResyncStatus , opts resyncOpts ) {
s . Lock ( )
defer s . Unlock ( )
m := s . statusMap [ opts . bucket ]
st := m . TargetsMap [ opts . arn ]
st . Object = ts . Object
st . ReplicatedCount += ts . ReplicatedCount
st . FailedCount += ts . FailedCount
st . ReplicatedSize += ts . ReplicatedSize
st . FailedSize += ts . FailedSize
m . TargetsMap [ opts . arn ] = st
m . LastUpdate = UTCNow ( )
s . statusMap [ opts . bucket ] = m
}
2022-02-10 13:16:52 -05:00
// resyncBucket resyncs all qualifying objects as per replication rules for the target
// ARN
2022-11-14 10:16:40 -05:00
func ( s * replicationResyncer ) resyncBucket ( ctx context . Context , objectAPI ObjectLayer , heal bool , opts resyncOpts ) {
select {
case <- s . workerCh : // block till a worker is available
case <- ctx . Done ( ) :
return
}
2022-02-10 13:16:52 -05:00
resyncStatus := ResyncFailed
defer func ( ) {
2023-10-13 20:03:34 -04:00
s . markStatus ( resyncStatus , opts , objectAPI )
2022-11-14 10:16:40 -05:00
globalSiteResyncMetrics . incBucket ( opts , resyncStatus )
s . workerCh <- struct { } { }
2022-02-10 13:16:52 -05:00
} ( )
// Allocate new results channel to receive ObjectInfo.
2024-05-06 16:27:52 -04:00
objInfoCh := make ( chan itemOrErr [ ObjectInfo ] )
2022-11-14 10:16:40 -05:00
cfg , err := getReplicationConfig ( ctx , opts . bucket )
2022-02-10 13:16:52 -05:00
if err != nil {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "replication resync of %s for arn %s failed with %w" , opts . bucket , opts . arn , err ) )
2022-02-10 13:16:52 -05:00
return
}
2022-11-14 10:16:40 -05:00
tgts , err := globalBucketTargetSys . ListBucketTargets ( ctx , opts . bucket )
2022-02-10 13:16:52 -05:00
if err != nil {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "replication resync of %s for arn %s failed %w" , opts . bucket , opts . arn , err ) )
2022-02-10 13:16:52 -05:00
return
}
rcfg := replicationConfig {
Config : cfg ,
remotes : tgts ,
}
tgtArns := cfg . FilterTargetArns (
replication . ObjectOpts {
OpType : replication . ResyncReplicationType ,
2022-11-14 10:16:40 -05:00
TargetArn : opts . arn ,
2022-02-10 13:16:52 -05:00
} )
if len ( tgtArns ) != 1 {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "replication resync failed for %s - arn specified %s is missing in the replication config" , opts . bucket , opts . arn ) )
2022-02-10 13:16:52 -05:00
return
}
2023-10-28 00:08:53 -04:00
tgt := globalBucketTargetSys . GetRemoteTargetClient ( opts . bucket , opts . arn )
2022-02-10 13:16:52 -05:00
if tgt == nil {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , fmt . Errorf ( "replication resync failed for %s - target could not be created for arn %s" , opts . bucket , opts . arn ) )
2022-02-10 13:16:52 -05:00
return
}
2022-11-14 10:16:40 -05:00
// mark resync status as resync started
if ! heal {
2023-10-13 20:03:34 -04:00
s . markStatus ( ResyncStarted , opts , objectAPI )
2022-11-14 10:16:40 -05:00
}
2023-02-24 15:07:34 -05:00
2022-08-18 20:49:08 -04:00
// Walk through all object versions - Walk() is always in ascending order needed to ensure
// delete marker replicated to target after object version is first created.
2023-11-27 20:20:04 -05:00
if err := objectAPI . Walk ( ctx , opts . bucket , "" , objInfoCh , WalkOptions { } ) ; err != nil {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , err )
2022-02-10 13:16:52 -05:00
return
}
2022-11-14 10:16:40 -05:00
s . RLock ( )
m := s . statusMap [ opts . bucket ]
st := m . TargetsMap [ opts . arn ]
s . RUnlock ( )
2022-02-10 13:16:52 -05:00
var lastCheckpoint string
if st . ResyncStatus == ResyncStarted || st . ResyncStatus == ResyncFailed {
lastCheckpoint = st . Object
}
2023-08-01 14:51:15 -04:00
workers := make ( [ ] chan ReplicateObjectInfo , resyncParallelRoutines )
resultCh := make ( chan TargetReplicationResyncStatus , 1 )
2024-01-28 13:04:17 -05:00
defer xioutil . SafeClose ( resultCh )
2023-09-05 23:22:39 -04:00
go func ( ) {
for r := range resultCh {
s . incStats ( r , opts )
globalSiteResyncMetrics . updateMetric ( r , opts . resyncID )
}
} ( )
2023-08-01 14:51:15 -04:00
var wg sync . WaitGroup
for i := 0 ; i < resyncParallelRoutines ; i ++ {
wg . Add ( 1 )
workers [ i ] = make ( chan ReplicateObjectInfo , 100 )
i := i
go func ( ctx context . Context , idx int ) {
defer wg . Done ( )
for roi := range workers [ idx ] {
select {
case <- ctx . Done ( ) :
return
case <- s . resyncCancelCh :
default :
}
traceFn := s . trace ( tgt . ResetID , fmt . Sprintf ( "%s/%s (%s)" , opts . bucket , roi . Name , roi . VersionID ) )
if roi . DeleteMarker || ! roi . VersionPurgeStatus . Empty ( ) {
versionID := ""
dmVersionID := ""
if roi . VersionPurgeStatus . Empty ( ) {
dmVersionID = roi . VersionID
} else {
versionID = roi . VersionID
}
doi := DeletedObjectReplicationInfo {
DeletedObject : DeletedObject {
ObjectName : roi . Name ,
DeleteMarkerVersionID : dmVersionID ,
VersionID : versionID ,
2023-09-16 05:28:06 -04:00
ReplicationState : roi . ReplicationState ,
2023-08-01 14:51:15 -04:00
DeleteMarkerMTime : DeleteMarkerMTime { roi . ModTime } ,
DeleteMarker : roi . DeleteMarker ,
} ,
Bucket : roi . Bucket ,
OpType : replication . ExistingObjectReplicationType ,
EventType : ReplicateExistingDelete ,
}
replicateDelete ( ctx , doi , objectAPI )
} else {
roi . OpType = replication . ExistingObjectReplicationType
roi . EventType = ReplicateExisting
replicateObject ( ctx , roi , objectAPI )
}
2023-10-10 03:33:42 -04:00
st := TargetReplicationResyncStatus {
Object : roi . Name ,
Bucket : roi . Bucket ,
}
_ , err := tgt . StatObject ( ctx , tgt . Bucket , roi . Name , minio . StatObjectOptions {
2023-08-01 14:51:15 -04:00
VersionID : roi . VersionID ,
Internal : minio . AdvancedGetOptions {
ReplicationProxyRequest : "false" ,
} ,
} )
2024-06-03 11:45:54 -04:00
sz := roi . Size
2023-08-01 14:51:15 -04:00
if err != nil {
if roi . DeleteMarker && isErrMethodNotAllowed ( ErrorRespToObjectError ( err , opts . bucket , roi . Name ) ) {
st . ReplicatedCount ++
} else {
st . FailedCount ++
}
2024-06-03 11:45:54 -04:00
sz = 0
2023-08-01 14:51:15 -04:00
} else {
st . ReplicatedCount ++
st . ReplicatedSize += roi . Size
}
2024-06-03 11:45:54 -04:00
traceFn ( sz , err )
2023-08-01 14:51:15 -04:00
select {
case <- ctx . Done ( ) :
return
case <- s . resyncCancelCh :
return
case resultCh <- st :
}
}
} ( ctx , i )
}
2024-05-06 16:27:52 -04:00
for res := range objInfoCh {
if res . Err != nil {
resyncStatus = ResyncFailed
replLogIf ( ctx , res . Err )
return
}
2022-11-14 10:16:40 -05:00
select {
case <- s . resyncCancelCh :
resyncStatus = ResyncCanceled
return
2023-02-24 15:07:34 -05:00
case <- ctx . Done ( ) :
return
2022-11-14 10:16:40 -05:00
default :
}
2024-05-06 16:27:52 -04:00
if heal && lastCheckpoint != "" && lastCheckpoint != res . Item . Name {
2022-02-10 13:16:52 -05:00
continue
}
lastCheckpoint = ""
2024-05-06 16:27:52 -04:00
roi := getHealReplicateObjectInfo ( res . Item , rcfg )
2023-08-01 14:51:15 -04:00
if ! roi . ExistingObjResync . mustResync ( ) {
continue
}
select {
case <- s . resyncCancelCh :
return
case <- ctx . Done ( ) :
return
default :
h := xxh3 . HashString ( roi . Bucket + roi . Name )
workers [ h % uint64 ( resyncParallelRoutines ) ] <- roi
}
2022-02-10 13:16:52 -05:00
}
2023-08-01 14:51:15 -04:00
for i := 0 ; i < resyncParallelRoutines ; i ++ {
2024-01-28 13:04:17 -05:00
xioutil . SafeClose ( workers [ i ] )
2023-08-01 14:51:15 -04:00
}
wg . Wait ( )
2022-02-10 13:16:52 -05:00
resyncStatus = ResyncCompleted
}
// start replication resync for the remote target ARN specified
2022-11-14 10:16:40 -05:00
func ( s * replicationResyncer ) start ( ctx context . Context , objAPI ObjectLayer , opts resyncOpts ) error {
if opts . bucket == "" {
2022-02-10 13:16:52 -05:00
return fmt . Errorf ( "bucket name is empty" )
}
2022-11-14 10:16:40 -05:00
if opts . arn == "" {
2022-02-10 13:16:52 -05:00
return fmt . Errorf ( "target ARN specified for resync is empty" )
}
// Check if the current bucket has quota restrictions, if not skip it
2022-11-14 10:16:40 -05:00
cfg , err := getReplicationConfig ( ctx , opts . bucket )
2022-02-10 13:16:52 -05:00
if err != nil {
return err
}
tgtArns := cfg . FilterTargetArns (
replication . ObjectOpts {
OpType : replication . ResyncReplicationType ,
2022-11-14 10:16:40 -05:00
TargetArn : opts . arn ,
2022-02-10 13:16:52 -05:00
} )
if len ( tgtArns ) == 0 {
2022-11-14 10:16:40 -05:00
return fmt . Errorf ( "arn %s specified for resync not found in replication config" , opts . arn )
2022-02-10 13:16:52 -05:00
}
2022-11-14 10:16:40 -05:00
globalReplicationPool . resyncer . RLock ( )
data , ok := globalReplicationPool . resyncer . statusMap [ opts . bucket ]
globalReplicationPool . resyncer . RUnlock ( )
if ! ok {
data , err = loadBucketResyncMetadata ( ctx , opts . bucket , objAPI )
if err != nil {
return err
}
2022-02-10 13:16:52 -05:00
}
// validate if resync is in progress for this arn
for tArn , st := range data . TargetsMap {
2022-11-14 10:16:40 -05:00
if opts . arn == tArn && ( st . ResyncStatus == ResyncStarted || st . ResyncStatus == ResyncPending ) {
return fmt . Errorf ( "Resync of bucket %s is already in progress for remote bucket %s" , opts . bucket , opts . arn )
2022-02-10 13:16:52 -05:00
}
}
status := TargetReplicationResyncStatus {
2022-11-14 10:16:40 -05:00
ResyncID : opts . resyncID ,
ResyncBeforeDate : opts . resyncBefore ,
2022-02-10 13:16:52 -05:00
StartTime : UTCNow ( ) ,
2022-11-14 10:16:40 -05:00
ResyncStatus : ResyncPending ,
Bucket : opts . bucket ,
2022-02-10 13:16:52 -05:00
}
2022-11-14 10:16:40 -05:00
data . TargetsMap [ opts . arn ] = status
if err = saveResyncStatus ( ctx , opts . bucket , data , objAPI ) ; err != nil {
2022-02-10 13:16:52 -05:00
return err
}
2022-11-14 10:16:40 -05:00
globalReplicationPool . resyncer . Lock ( )
defer globalReplicationPool . resyncer . Unlock ( )
brs , ok := globalReplicationPool . resyncer . statusMap [ opts . bucket ]
2022-02-10 13:16:52 -05:00
if ! ok {
brs = BucketReplicationResyncStatus {
Version : resyncMetaVersion ,
TargetsMap : make ( map [ string ] TargetReplicationResyncStatus ) ,
}
}
2022-11-14 10:16:40 -05:00
brs . TargetsMap [ opts . arn ] = status
globalReplicationPool . resyncer . statusMap [ opts . bucket ] = brs
go globalReplicationPool . resyncer . resyncBucket ( GlobalContext , objAPI , false , opts )
2022-02-10 13:16:52 -05:00
return nil
}
2024-06-03 11:45:54 -04:00
func ( s * replicationResyncer ) trace ( resyncID string , path string ) func ( sz int64 , err error ) {
2022-11-14 10:16:40 -05:00
startTime := time . Now ( )
2024-06-03 11:45:54 -04:00
return func ( sz int64 , err error ) {
2022-11-14 10:16:40 -05:00
duration := time . Since ( startTime )
if globalTrace . NumSubscribers ( madmin . TraceReplicationResync ) > 0 {
2024-06-03 11:45:54 -04:00
globalTrace . Publish ( replicationResyncTrace ( resyncID , startTime , duration , path , err , sz ) )
2022-11-14 10:16:40 -05:00
}
}
}
2024-06-03 11:45:54 -04:00
func replicationResyncTrace ( resyncID string , startTime time . Time , duration time . Duration , path string , err error , sz int64 ) madmin . TraceInfo {
2022-11-14 10:16:40 -05:00
var errStr string
if err != nil {
errStr = err . Error ( )
}
funcName := fmt . Sprintf ( "replication.(resyncID=%s)" , resyncID )
return madmin . TraceInfo {
TraceType : madmin . TraceReplicationResync ,
Time : startTime ,
NodeName : globalLocalNodeName ,
FuncName : funcName ,
Duration : duration ,
Path : path ,
Error : errStr ,
2024-06-03 11:45:54 -04:00
Bytes : sz ,
2022-11-14 10:16:40 -05:00
}
}
2022-02-10 13:16:52 -05:00
// delete resync metadata from replication resync state in memory
func ( p * ReplicationPool ) deleteResyncMetadata ( ctx context . Context , bucket string ) {
if p == nil {
return
}
2022-11-14 10:16:40 -05:00
p . resyncer . Lock ( )
delete ( p . resyncer . statusMap , bucket )
defer p . resyncer . Unlock ( )
globalSiteResyncMetrics . deleteBucket ( bucket )
2022-02-10 13:16:52 -05:00
}
// initResync - initializes bucket replication resync for all buckets.
2024-06-21 18:22:24 -04:00
func ( p * ReplicationPool ) initResync ( ctx context . Context , buckets [ ] string , objAPI ObjectLayer ) error {
2022-02-10 13:16:52 -05:00
if objAPI == nil {
return errServerNotInitialized
}
// Load bucket metadata sys in background
2022-11-14 10:16:40 -05:00
go p . startResyncRoutine ( ctx , buckets , objAPI )
2022-02-10 13:16:52 -05:00
return nil
}
2024-06-21 18:22:24 -04:00
func ( p * ReplicationPool ) startResyncRoutine ( ctx context . Context , buckets [ ] string , objAPI ObjectLayer ) {
2022-11-14 10:16:40 -05:00
r := rand . New ( rand . NewSource ( time . Now ( ) . UnixNano ( ) ) )
// Run the replication resync in a loop
for {
if err := p . loadResync ( ctx , buckets , objAPI ) ; err == nil {
<- ctx . Done ( )
return
}
duration := time . Duration ( r . Float64 ( ) * float64 ( time . Minute ) )
if duration < time . Second {
2024-01-18 02:03:17 -05:00
// Make sure to sleep at least a second to avoid high CPU ticks.
2022-11-14 10:16:40 -05:00
duration = time . Second
}
time . Sleep ( duration )
}
}
2022-02-10 13:16:52 -05:00
// Loads bucket replication resync statuses into memory.
2024-06-21 18:22:24 -04:00
func ( p * ReplicationPool ) loadResync ( ctx context . Context , buckets [ ] string , objAPI ObjectLayer ) error {
2022-11-14 10:16:40 -05:00
// Make sure only one node running resync on the cluster.
2022-12-01 15:10:09 -05:00
ctx , cancel := globalLeaderLock . GetLock ( ctx )
defer cancel ( )
2022-02-10 13:16:52 -05:00
for index := range buckets {
2024-06-21 18:22:24 -04:00
bucket := buckets [ index ]
2023-07-14 07:00:29 -04:00
meta , err := loadBucketResyncMetadata ( ctx , bucket , objAPI )
2022-02-10 13:16:52 -05:00
if err != nil {
2022-07-13 19:29:10 -04:00
if ! errors . Is ( err , errVolumeNotFound ) {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , err )
2022-02-10 13:16:52 -05:00
}
2022-07-13 19:29:10 -04:00
continue
2022-02-10 13:16:52 -05:00
}
2022-11-14 10:16:40 -05:00
p . resyncer . Lock ( )
2023-07-14 07:00:29 -04:00
p . resyncer . statusMap [ bucket ] = meta
2022-11-14 10:16:40 -05:00
p . resyncer . Unlock ( )
2023-07-14 07:00:29 -04:00
tgts := meta . cloneTgtStats ( )
2022-11-14 10:16:40 -05:00
for arn , st := range tgts {
switch st . ResyncStatus {
case ResyncFailed , ResyncStarted , ResyncPending :
go p . resyncer . resyncBucket ( ctx , objAPI , true , resyncOpts {
bucket : bucket ,
arn : arn ,
resyncID : st . ResyncID ,
resyncBefore : st . ResyncBeforeDate ,
} )
2022-02-10 13:16:52 -05:00
}
}
}
2022-11-14 10:16:40 -05:00
return nil
2022-02-10 13:16:52 -05:00
}
// load bucket resync metadata from disk
func loadBucketResyncMetadata ( ctx context . Context , bucket string , objAPI ObjectLayer ) ( brs BucketReplicationResyncStatus , e error ) {
brs = newBucketResyncStatus ( bucket )
resyncDirPath := path . Join ( bucketMetaPrefix , bucket , replicationDir )
data , err := readConfig ( GlobalContext , objAPI , pathJoin ( resyncDirPath , resyncFileName ) )
if err != nil && err != errConfigNotFound {
return brs , err
}
if len ( data ) == 0 {
// Seems to be empty.
return brs , nil
}
if len ( data ) <= 4 {
return brs , fmt . Errorf ( "replication resync: no data" )
}
// Read resync meta header
switch binary . LittleEndian . Uint16 ( data [ 0 : 2 ] ) {
case resyncMetaFormat :
default :
return brs , fmt . Errorf ( "resyncMeta: unknown format: %d" , binary . LittleEndian . Uint16 ( data [ 0 : 2 ] ) )
}
switch binary . LittleEndian . Uint16 ( data [ 2 : 4 ] ) {
case resyncMetaVersion :
default :
return brs , fmt . Errorf ( "resyncMeta: unknown version: %d" , binary . LittleEndian . Uint16 ( data [ 2 : 4 ] ) )
}
// OK, parse data.
if _ , err = brs . UnmarshalMsg ( data [ 4 : ] ) ; err != nil {
return brs , err
}
switch brs . Version {
case resyncMetaVersionV1 :
default :
return brs , fmt . Errorf ( "unexpected resync meta version: %d" , brs . Version )
}
return brs , nil
}
// save resync status to resync.bin
func saveResyncStatus ( ctx context . Context , bucket string , brs BucketReplicationResyncStatus , objectAPI ObjectLayer ) error {
data := make ( [ ] byte , 4 , brs . Msgsize ( ) + 4 )
// Initialize the resync meta header.
binary . LittleEndian . PutUint16 ( data [ 0 : 2 ] , resyncMetaFormat )
binary . LittleEndian . PutUint16 ( data [ 2 : 4 ] , resyncMetaVersion )
buf , err := brs . MarshalMsg ( data )
if err != nil {
return err
}
configFile := path . Join ( bucketMetaPrefix , bucket , replicationDir , resyncFileName )
return saveConfig ( ctx , objectAPI , configFile , buf )
}
2022-07-21 14:05:44 -04:00
2022-11-15 10:59:21 -05:00
// getReplicationDiff returns un-replicated objects in a channel.
// If a non-nil channel is returned it must be consumed fully or
// the provided context must be canceled.
func getReplicationDiff ( ctx context . Context , objAPI ObjectLayer , bucket string , opts madmin . ReplDiffOpts ) ( chan madmin . DiffInfo , error ) {
2022-07-21 14:05:44 -04:00
cfg , err := getReplicationConfig ( ctx , bucket )
if err != nil {
2024-04-22 13:49:30 -04:00
replLogOnceIf ( ctx , err , bucket )
2022-11-15 10:59:21 -05:00
return nil , err
2022-07-21 14:05:44 -04:00
}
tgts , err := globalBucketTargetSys . ListBucketTargets ( ctx , bucket )
if err != nil {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , err )
2022-11-15 10:59:21 -05:00
return nil , err
}
2024-05-06 16:27:52 -04:00
objInfoCh := make ( chan itemOrErr [ ObjectInfo ] , 10 )
2023-11-27 20:20:04 -05:00
if err := objAPI . Walk ( ctx , bucket , opts . Prefix , objInfoCh , WalkOptions { } ) ; err != nil {
2024-04-04 08:04:40 -04:00
replLogIf ( ctx , err )
2022-11-15 10:59:21 -05:00
return nil , err
2022-07-21 14:05:44 -04:00
}
rcfg := replicationConfig {
Config : cfg ,
remotes : tgts ,
}
2022-11-15 10:59:21 -05:00
diffCh := make ( chan madmin . DiffInfo , 4000 )
2022-07-21 14:05:44 -04:00
go func ( ) {
2024-01-28 13:04:17 -05:00
defer xioutil . SafeClose ( diffCh )
2024-05-06 16:27:52 -04:00
for res := range objInfoCh {
if res . Err != nil {
diffCh <- madmin . DiffInfo { Err : res . Err }
return
}
2022-11-15 10:59:21 -05:00
if contextCanceled ( ctx ) {
// Just consume input...
continue
}
2024-05-06 16:27:52 -04:00
obj := res . Item
2022-07-21 14:05:44 -04:00
// Ignore object prefixes which are excluded
// from versioning via the MinIO bucket versioning extension.
if globalBucketVersioningSys . PrefixSuspended ( bucket , obj . Name ) {
continue
}
roi := getHealReplicateObjectInfo ( obj , rcfg )
switch roi . ReplicationStatus {
case replication . Completed , replication . Replica :
if ! opts . Verbose {
continue
}
fallthrough
default :
// ignore pre-existing objects that don't satisfy replication rule(s)
if roi . ReplicationStatus . Empty ( ) && ! roi . ExistingObjResync . mustResync ( ) {
continue
}
tgtsMap := make ( map [ string ] madmin . TgtDiffInfo )
for arn , st := range roi . TargetStatuses {
if opts . ARN == "" || opts . ARN == arn {
if ! opts . Verbose && ( st == replication . Completed || st == replication . Replica ) {
continue
}
tgtsMap [ arn ] = madmin . TgtDiffInfo {
ReplicationStatus : st . String ( ) ,
}
}
}
for arn , st := range roi . TargetPurgeStatuses {
if opts . ARN == "" || opts . ARN == arn {
if ! opts . Verbose && st == Complete {
continue
}
t , ok := tgtsMap [ arn ]
if ! ok {
t = madmin . TgtDiffInfo { }
}
t . DeleteReplicationStatus = string ( st )
tgtsMap [ arn ] = t
}
}
select {
case diffCh <- madmin . DiffInfo {
Object : obj . Name ,
VersionID : obj . VersionID ,
LastModified : obj . ModTime ,
IsDeleteMarker : obj . DeleteMarker ,
ReplicationStatus : string ( roi . ReplicationStatus ) ,
DeleteReplicationStatus : string ( roi . VersionPurgeStatus ) ,
ReplicationTimestamp : roi . ReplicationTimestamp ,
Targets : tgtsMap ,
} :
case <- ctx . Done ( ) :
2022-11-15 10:59:21 -05:00
continue
2022-07-21 14:05:44 -04:00
}
}
}
} ( )
return diffCh , nil
}
2022-08-09 18:00:24 -04:00
// QueueReplicationHeal is a wrapper for queueReplicationHeal
2023-07-13 02:51:33 -04:00
func QueueReplicationHeal ( ctx context . Context , bucket string , oi ObjectInfo , retryCount int ) {
2023-09-01 16:46:10 -04:00
// ignore modtime zero objects
if oi . ModTime . IsZero ( ) {
2022-08-09 18:00:24 -04:00
return
}
2024-04-22 13:49:30 -04:00
rcfg , err := getReplicationConfig ( ctx , bucket )
if err != nil {
replLogOnceIf ( ctx , err , bucket )
return
}
2022-08-09 18:00:24 -04:00
tgts , _ := globalBucketTargetSys . ListBucketTargets ( ctx , bucket )
queueReplicationHeal ( ctx , bucket , oi , replicationConfig {
Config : rcfg ,
remotes : tgts ,
2023-07-13 02:51:33 -04:00
} , retryCount )
2022-08-09 18:00:24 -04:00
}
// queueReplicationHeal enqueues objects that failed replication OR eligible for resyncing through
// an ongoing resync operation or via existing objects replication configuration setting.
2023-07-13 02:51:33 -04:00
func queueReplicationHeal ( ctx context . Context , bucket string , oi ObjectInfo , rcfg replicationConfig , retryCount int ) ( roi ReplicateObjectInfo ) {
2023-09-01 16:46:10 -04:00
// ignore modtime zero objects
if oi . ModTime . IsZero ( ) {
2022-08-09 18:00:24 -04:00
return roi
}
2024-01-30 13:43:58 -05:00
if isVeeamSOSAPIObject ( oi . Name ) {
return roi
}
2022-08-09 18:00:24 -04:00
if rcfg . Config == nil || rcfg . remotes == nil {
return roi
}
roi = getHealReplicateObjectInfo ( oi , rcfg )
2023-07-13 02:51:33 -04:00
roi . RetryCount = uint32 ( retryCount )
2022-08-09 18:00:24 -04:00
if ! roi . Dsc . ReplicateAny ( ) {
return
}
// early return if replication already done, otherwise we need to determine if this
// version is an existing object that needs healing.
if oi . ReplicationStatus == replication . Completed && oi . VersionPurgeStatus . Empty ( ) && ! roi . ExistingObjResync . mustResync ( ) {
return
}
if roi . DeleteMarker || ! roi . VersionPurgeStatus . Empty ( ) {
versionID := ""
dmVersionID := ""
if roi . VersionPurgeStatus . Empty ( ) {
dmVersionID = roi . VersionID
} else {
versionID = roi . VersionID
}
dv := DeletedObjectReplicationInfo {
DeletedObject : DeletedObject {
ObjectName : roi . Name ,
DeleteMarkerVersionID : dmVersionID ,
VersionID : versionID ,
2023-09-16 05:28:06 -04:00
ReplicationState : roi . ReplicationState ,
2022-08-09 18:00:24 -04:00
DeleteMarkerMTime : DeleteMarkerMTime { roi . ModTime } ,
DeleteMarker : roi . DeleteMarker ,
} ,
Bucket : roi . Bucket ,
OpType : replication . HealReplicationType ,
EventType : ReplicateHealDelete ,
}
// heal delete marker replication failure or versioned delete replication failure
if roi . ReplicationStatus == replication . Pending ||
roi . ReplicationStatus == replication . Failed ||
roi . VersionPurgeStatus == Failed || roi . VersionPurgeStatus == Pending {
globalReplicationPool . queueReplicaDeleteTask ( dv )
return
}
// if replication status is Complete on DeleteMarker and existing object resync required
if roi . ExistingObjResync . mustResync ( ) && ( roi . ReplicationStatus == replication . Completed || roi . ReplicationStatus . Empty ( ) ) {
queueReplicateDeletesWrapper ( dv , roi . ExistingObjResync )
return
}
return
}
if roi . ExistingObjResync . mustResync ( ) {
roi . OpType = replication . ExistingObjectReplicationType
}
switch roi . ReplicationStatus {
case replication . Pending , replication . Failed :
roi . EventType = ReplicateHeal
globalReplicationPool . queueReplicaTask ( roi )
return
}
if roi . ExistingObjResync . mustResync ( ) {
roi . EventType = ReplicateExisting
globalReplicationPool . queueReplicaTask ( roi )
}
return
}
2022-08-22 19:53:06 -04:00
2023-07-13 02:51:33 -04:00
const (
mrfSaveInterval = 5 * time . Minute
2023-09-11 23:59:11 -04:00
mrfQueueInterval = mrfSaveInterval + time . Minute // A minute higher than save interval
2023-07-13 02:51:33 -04:00
mrfRetryLimit = 3 // max number of retries before letting scanner catch up on this object version
mrfMaxEntries = 1000000
)
2022-08-22 19:53:06 -04:00
func ( p * ReplicationPool ) persistMRF ( ) {
2022-10-12 18:47:41 -04:00
if ! p . initialized ( ) {
return
}
2022-08-22 19:53:06 -04:00
entries := make ( map [ string ] MRFReplicateEntry )
2023-07-13 02:51:33 -04:00
mTimer := time . NewTimer ( mrfSaveInterval )
2022-08-22 19:53:06 -04:00
defer mTimer . Stop ( )
2023-08-21 19:44:50 -04:00
2023-09-11 23:59:11 -04:00
saveMRFToDisk := func ( ) {
2022-08-22 19:53:06 -04:00
if len ( entries ) == 0 {
return
}
2023-08-21 19:44:50 -04:00
2023-07-13 02:51:33 -04:00
// queue all entries for healing before overwriting the node mrf file
2023-08-21 19:44:50 -04:00
if ! contextCanceled ( p . ctx ) {
p . queueMRFHeal ( )
}
2023-07-13 02:51:33 -04:00
2023-09-11 23:59:11 -04:00
p . saveMRFEntries ( p . ctx , entries )
2023-08-21 19:44:50 -04:00
2022-08-22 19:53:06 -04:00
entries = make ( map [ string ] MRFReplicateEntry )
}
for {
select {
case <- mTimer . C :
2023-09-11 23:59:11 -04:00
saveMRFToDisk ( )
2023-07-13 02:51:33 -04:00
mTimer . Reset ( mrfSaveInterval )
2022-08-22 19:53:06 -04:00
case <- p . ctx . Done ( ) :
2022-12-22 17:25:13 -05:00
p . mrfStopCh <- struct { } { }
2024-01-28 13:04:17 -05:00
xioutil . SafeClose ( p . mrfSaveCh )
2023-09-11 23:59:11 -04:00
// We try to save if possible, but we don't care beyond that.
saveMRFToDisk ( )
2022-08-22 19:53:06 -04:00
return
case e , ok := <- p . mrfSaveCh :
if ! ok {
return
}
2023-12-01 19:13:08 -05:00
entries [ e . versionID ] = e
2023-08-21 19:44:50 -04:00
if len ( entries ) >= mrfMaxEntries {
2023-09-11 23:59:11 -04:00
saveMRFToDisk ( )
2022-08-22 19:53:06 -04:00
}
}
}
}
func ( p * ReplicationPool ) queueMRFSave ( entry MRFReplicateEntry ) {
2022-10-12 18:47:41 -04:00
if ! p . initialized ( ) {
2022-08-22 19:53:06 -04:00
return
}
2023-08-30 04:00:59 -04:00
if entry . RetryCount > mrfRetryLimit { // let scanner catch up if retry count exceeded
atomic . AddUint64 ( & globalReplicationStats . mrfStats . TotalDroppedCount , 1 )
atomic . AddUint64 ( & globalReplicationStats . mrfStats . TotalDroppedBytes , uint64 ( entry . sz ) )
2023-07-13 02:51:33 -04:00
return
}
2023-08-30 04:00:59 -04:00
2022-08-22 19:53:06 -04:00
select {
case <- GlobalContext . Done ( ) :
return
2022-12-22 17:25:13 -05:00
case <- p . mrfStopCh :
return
2022-11-10 13:20:02 -05:00
default :
2022-12-22 17:25:13 -05:00
select {
case p . mrfSaveCh <- entry :
default :
2023-12-01 19:13:08 -05:00
atomic . AddUint64 ( & globalReplicationStats . mrfStats . TotalDroppedCount , 1 )
atomic . AddUint64 ( & globalReplicationStats . mrfStats . TotalDroppedBytes , uint64 ( entry . sz ) )
2022-12-22 17:25:13 -05:00
}
2022-08-22 19:53:06 -04:00
}
}
2023-11-10 22:54:46 -05:00
func ( p * ReplicationPool ) persistToDrive ( ctx context . Context , v MRFReplicateEntries ) {
2023-09-11 23:59:11 -04:00
newReader := func ( ) io . ReadCloser {
r , w := io . Pipe ( )
go func ( ) {
2023-11-10 22:54:46 -05:00
// Initialize MRF meta header.
var data [ 4 ] byte
binary . LittleEndian . PutUint16 ( data [ 0 : 2 ] , mrfMetaFormat )
binary . LittleEndian . PutUint16 ( data [ 2 : 4 ] , mrfMetaVersion )
2023-09-11 23:59:11 -04:00
mw := msgp . NewWriter ( w )
2023-11-10 22:54:46 -05:00
n , err := mw . Write ( data [ : ] )
2023-09-11 23:59:11 -04:00
if err != nil {
w . CloseWithError ( err )
return
}
if n != len ( data ) {
w . CloseWithError ( io . ErrShortWrite )
return
}
err = v . EncodeMsg ( mw )
mw . Flush ( )
w . CloseWithError ( err )
} ( )
return r
}
2023-12-13 22:27:55 -05:00
globalLocalDrivesMu . RLock ( )
2024-02-16 20:15:57 -05:00
localDrives := cloneDrives ( globalLocalDrives )
2023-12-13 22:27:55 -05:00
globalLocalDrivesMu . RUnlock ( )
for _ , localDrive := range localDrives {
2023-09-11 23:59:11 -04:00
r := newReader ( )
2024-01-30 15:43:25 -05:00
err := localDrive . CreateFile ( ctx , "" , minioMetaBucket , pathJoin ( replicationMRFDir , globalLocalNodeNameHex + ".bin" ) , - 1 , r )
2023-09-11 23:59:11 -04:00
r . Close ( )
if err == nil {
break
}
}
}
2023-07-13 02:51:33 -04:00
// save mrf entries to nodenamehex.bin
2023-09-11 23:59:11 -04:00
func ( p * ReplicationPool ) saveMRFEntries ( ctx context . Context , entries map [ string ] MRFReplicateEntry ) {
2022-10-12 18:47:41 -04:00
if ! p . initialized ( ) {
2023-09-11 23:59:11 -04:00
return
2022-10-12 18:47:41 -04:00
}
2023-08-30 04:00:59 -04:00
atomic . StoreUint64 ( & globalReplicationStats . mrfStats . LastFailedCount , uint64 ( len ( entries ) ) )
2022-08-22 19:53:06 -04:00
if len ( entries ) == 0 {
2023-09-11 23:59:11 -04:00
return
2022-08-22 19:53:06 -04:00
}
2023-08-21 19:44:50 -04:00
2022-08-22 19:53:06 -04:00
v := MRFReplicateEntries {
Entries : entries ,
2023-11-10 22:54:46 -05:00
Version : mrfMetaVersion ,
2022-08-22 19:53:06 -04:00
}
2023-11-10 22:54:46 -05:00
p . persistToDrive ( ctx , v )
2022-08-22 19:53:06 -04:00
}
// load mrf entries from disk
2023-09-11 23:59:11 -04:00
func ( p * ReplicationPool ) loadMRF ( ) ( mrfRec MRFReplicateEntries , err error ) {
loadMRF := func ( rc io . ReadCloser ) ( re MRFReplicateEntries , err error ) {
defer rc . Close ( )
if ! p . initialized ( ) {
return re , nil
}
2023-12-01 19:13:08 -05:00
var data [ 4 ] byte
n , err := rc . Read ( data [ : ] )
2023-09-11 23:59:11 -04:00
if err != nil {
return re , err
}
if n != len ( data ) {
return re , errors . New ( "replication mrf: no data" )
}
// Read resync meta header
switch binary . LittleEndian . Uint16 ( data [ 0 : 2 ] ) {
case mrfMetaFormat :
default :
return re , fmt . Errorf ( "replication mrf: unknown format: %d" , binary . LittleEndian . Uint16 ( data [ 0 : 2 ] ) )
}
switch binary . LittleEndian . Uint16 ( data [ 2 : 4 ] ) {
case mrfMetaVersion :
default :
return re , fmt . Errorf ( "replication mrf: unknown version: %d" , binary . LittleEndian . Uint16 ( data [ 2 : 4 ] ) )
}
// OK, parse data.
// ignore any parsing errors, we do not care this file is generated again anyways.
re . DecodeMsg ( msgp . NewReader ( rc ) )
2022-08-22 19:53:06 -04:00
return re , nil
}
2023-12-13 22:27:55 -05:00
globalLocalDrivesMu . RLock ( )
2024-02-16 20:15:57 -05:00
localDrives := cloneDrives ( globalLocalDrives )
2023-12-13 22:27:55 -05:00
globalLocalDrivesMu . RUnlock ( )
for _ , localDrive := range localDrives {
2023-09-11 23:59:11 -04:00
rc , err := localDrive . ReadFileStream ( p . ctx , minioMetaBucket , pathJoin ( replicationMRFDir , globalLocalNodeNameHex + ".bin" ) , 0 , - 1 )
if err != nil {
continue
}
2022-08-22 19:53:06 -04:00
2023-09-11 23:59:11 -04:00
mrfRec , err = loadMRF ( rc )
if err != nil {
continue
}
// finally delete the file after processing mrf entries
localDrive . Delete ( p . ctx , minioMetaBucket , pathJoin ( replicationMRFDir , globalLocalNodeNameHex + ".bin" ) , DeleteOptions { } )
break
2022-08-22 19:53:06 -04:00
}
2023-09-11 23:59:11 -04:00
return mrfRec , nil
2022-08-22 19:53:06 -04:00
}
func ( p * ReplicationPool ) processMRF ( ) {
2022-10-12 18:47:41 -04:00
if ! p . initialized ( ) {
2022-08-22 19:53:06 -04:00
return
}
2023-07-13 02:51:33 -04:00
pTimer := time . NewTimer ( mrfQueueInterval )
2022-08-22 19:53:06 -04:00
defer pTimer . Stop ( )
for {
select {
case <- pTimer . C :
// skip healing if all targets are offline
var offlineCnt int
tgts := globalBucketTargetSys . ListTargets ( p . ctx , "" , "" )
for _ , tgt := range tgts {
if globalBucketTargetSys . isOffline ( tgt . URL ( ) ) {
offlineCnt ++
}
}
if len ( tgts ) == offlineCnt {
2023-07-13 02:51:33 -04:00
pTimer . Reset ( mrfQueueInterval )
2022-08-22 19:53:06 -04:00
continue
}
2023-07-13 02:51:33 -04:00
if err := p . queueMRFHeal ( ) ; err != nil && ! osIsNotExist ( err ) {
2024-04-04 08:04:40 -04:00
replLogIf ( p . ctx , err )
2022-08-22 19:53:06 -04:00
}
2023-07-13 02:51:33 -04:00
pTimer . Reset ( mrfQueueInterval )
2022-08-22 19:53:06 -04:00
case <- p . ctx . Done ( ) :
return
}
}
}
// process sends error logs to the heal channel for an attempt to heal replication.
2023-07-13 02:51:33 -04:00
func ( p * ReplicationPool ) queueMRFHeal ( ) error {
2023-08-21 19:44:50 -04:00
p . mrfMU . Lock ( )
defer p . mrfMU . Unlock ( )
2022-10-12 18:47:41 -04:00
if ! p . initialized ( ) {
2022-08-22 19:53:06 -04:00
return errServerNotInitialized
}
2023-09-11 23:59:11 -04:00
mrfRec , err := p . loadMRF ( )
if err != nil {
return err
}
2023-07-13 02:51:33 -04:00
2023-09-11 23:59:11 -04:00
// queue replication heal in a goroutine to avoid holding up mrf save routine
go func ( ) {
for vID , e := range mrfRec . Entries {
ctx , cancel := context . WithTimeout ( p . ctx , time . Second ) // Do not waste more than a second on this.
2023-08-21 19:44:50 -04:00
2023-09-11 23:59:11 -04:00
oi , err := p . objLayer . GetObjectInfo ( ctx , e . Bucket , e . Object , ObjectOptions {
VersionID : vID ,
} )
cancel ( )
if err != nil {
continue
2023-07-13 02:51:33 -04:00
}
2023-09-11 23:59:11 -04:00
QueueReplicationHeal ( p . ctx , e . Bucket , oi , e . RetryCount )
}
} ( )
2023-07-13 02:51:33 -04:00
2022-08-22 19:53:06 -04:00
return nil
}
2022-09-12 15:40:02 -04:00
2022-10-12 18:47:41 -04:00
func ( p * ReplicationPool ) initialized ( ) bool {
return ! ( p == nil || p . objLayer == nil )
}
2023-07-13 02:51:33 -04:00
// getMRF returns MRF entries for this node.
2023-12-01 19:13:08 -05:00
func ( p * ReplicationPool ) getMRF ( ctx context . Context , bucket string ) ( ch <- chan madmin . ReplicationMRF , err error ) {
2023-09-11 23:59:11 -04:00
mrfRec , err := p . loadMRF ( )
if err != nil {
return nil , err
}
2023-07-13 02:51:33 -04:00
mrfCh := make ( chan madmin . ReplicationMRF , 100 )
go func ( ) {
2024-01-28 13:04:17 -05:00
defer xioutil . SafeClose ( mrfCh )
2023-09-11 23:59:11 -04:00
for vID , e := range mrfRec . Entries {
2023-12-01 19:13:08 -05:00
if bucket != "" && e . Bucket != bucket {
2023-08-21 19:44:50 -04:00
continue
}
2023-09-11 23:59:11 -04:00
select {
case mrfCh <- madmin . ReplicationMRF {
NodeName : globalLocalNodeName ,
Object : e . Object ,
VersionID : vID ,
Bucket : e . Bucket ,
RetryCount : e . RetryCount ,
} :
case <- ctx . Done ( ) :
return
2023-07-13 02:51:33 -04:00
}
}
} ( )
return mrfCh , nil
}