feat: Add support for site level resync (#15753)

This commit is contained in:
Poorna 2022-11-14 07:16:40 -08:00 committed by GitHub
parent 7ac64ad24a
commit d6bc141bd1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 1442 additions and 120 deletions

View File

@ -515,3 +515,45 @@ func (a adminAPIHandlers) SRPeerRemove(w http.ResponseWriter, r *http.Request) {
return return
} }
} }
// SiteReplicationResyncOp - PUT /minio/admin/v3/site-replication/resync/op
func (a adminAPIHandlers) SiteReplicationResyncOp(w http.ResponseWriter, r *http.Request) {
ctx := newContext(r, w, "SiteReplicationResyncOp")
defer logger.AuditLog(ctx, w, r, mustGetClaimsFromToken(r))
objectAPI, _ := validateAdminReq(ctx, w, r, iampolicy.SiteReplicationResyncAction)
if objectAPI == nil {
return
}
var peerSite madmin.PeerInfo
if err := parseJSONBody(ctx, r.Body, &peerSite, ""); err != nil {
writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, err), r.URL)
return
}
vars := mux.Vars(r)
op := madmin.SiteResyncOp(vars["operation"])
var (
status madmin.SRResyncOpStatus
err error
)
switch op {
case madmin.SiteResyncStart:
status, err = globalSiteReplicationSys.startResync(ctx, objectAPI, peerSite)
case madmin.SiteResyncCancel:
status, err = globalSiteReplicationSys.cancelResync(ctx, objectAPI, peerSite)
default:
err = errSRInvalidRequest(errInvalidArgument)
}
if err != nil {
writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, err), r.URL)
return
}
body, err := json.Marshal(status)
if err != nil {
writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, err), r.URL)
return
}
writeSuccessResponseJSON(w, body)
}

View File

@ -428,7 +428,7 @@ func (a adminAPIHandlers) MetricsHandler(w http.ResponseWriter, r *http.Request)
} }
} }
} }
dID := r.Form.Get("by-depID")
done := ctx.Done() done := ctx.Done()
ticker := time.NewTicker(interval) ticker := time.NewTicker(interval)
defer ticker.Stop() defer ticker.Stop()
@ -441,15 +441,16 @@ func (a adminAPIHandlers) MetricsHandler(w http.ResponseWriter, r *http.Request)
hosts: hostMap, hosts: hostMap,
disks: diskMap, disks: diskMap,
jobID: jobID, jobID: jobID,
depID: dID,
}) })
m.Merge(&mLocal) m.Merge(&mLocal)
// Allow half the interval for collecting remote... // Allow half the interval for collecting remote...
cctx, cancel := context.WithTimeout(ctx, interval/2) cctx, cancel := context.WithTimeout(ctx, interval/2)
mRemote := collectRemoteMetrics(cctx, types, collectMetricsOpts{ mRemote := collectRemoteMetrics(cctx, types, collectMetricsOpts{
hosts: hostMap, hosts: hostMap,
disks: diskMap, disks: diskMap,
jobID: jobID, jobID: jobID,
depID: dID,
}) })
cancel() cancel()
m.Merge(&mRemote) m.Merge(&mRemote)

View File

@ -258,6 +258,7 @@ func registerAdminRouter(router *mux.Router, enableConfigOps bool) {
adminRouter.Methods(http.MethodPut).Path(adminVersion + "/site-replication/edit").HandlerFunc(gz(httpTraceHdrs(adminAPI.SiteReplicationEdit))) adminRouter.Methods(http.MethodPut).Path(adminVersion + "/site-replication/edit").HandlerFunc(gz(httpTraceHdrs(adminAPI.SiteReplicationEdit)))
adminRouter.Methods(http.MethodPut).Path(adminVersion + "/site-replication/peer/edit").HandlerFunc(gz(httpTraceHdrs(adminAPI.SRPeerEdit))) adminRouter.Methods(http.MethodPut).Path(adminVersion + "/site-replication/peer/edit").HandlerFunc(gz(httpTraceHdrs(adminAPI.SRPeerEdit)))
adminRouter.Methods(http.MethodPut).Path(adminVersion + "/site-replication/peer/remove").HandlerFunc(gz(httpTraceHdrs(adminAPI.SRPeerRemove))) adminRouter.Methods(http.MethodPut).Path(adminVersion + "/site-replication/peer/remove").HandlerFunc(gz(httpTraceHdrs(adminAPI.SRPeerRemove)))
adminRouter.Methods(http.MethodPut).Path(adminVersion+"/site-replication/resync/op").HandlerFunc(gz(httpTraceHdrs(adminAPI.SiteReplicationResyncOp))).Queries("operation", "{operation:.*}")
if globalIsDistErasure { if globalIsDistErasure {
// Top locks // Top locks

View File

@ -321,7 +321,12 @@ func (api objectAPIHandlers) ResetBucketReplicationStartHandler(w http.ResponseW
writeErrorResponseJSON(ctx, w, toAPIError(ctx, err), r.URL) writeErrorResponseJSON(ctx, w, toAPIError(ctx, err), r.URL)
} }
} }
if err := startReplicationResync(ctx, bucket, arn, resetID, resetBeforeDate, objectAPI); err != nil { if err := globalReplicationPool.resyncer.start(ctx, objectAPI, resyncOpts{
bucket: bucket,
arn: arn,
resyncID: resetID,
resyncBefore: resetBeforeDate,
}); err != nil {
writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErrWithErr(ErrBadRequest, InvalidArgument{ writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErrWithErr(ErrBadRequest, InvalidArgument{
Bucket: bucket, Bucket: bucket,
Err: err, Err: err,
@ -370,10 +375,13 @@ func (api objectAPIHandlers) ResetBucketReplicationStatusHandler(w http.Response
writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL) writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
return return
} }
var tgtStats map[string]TargetReplicationResyncStatus
globalReplicationPool.resyncState.RLock() globalReplicationPool.resyncer.RLock()
brs, ok := globalReplicationPool.resyncState.statusMap[bucket] brs, ok := globalReplicationPool.resyncer.statusMap[bucket]
globalReplicationPool.resyncState.RUnlock() if ok {
tgtStats = brs.cloneTgtStats()
}
globalReplicationPool.resyncer.RUnlock()
if !ok { if !ok {
brs, err = loadBucketResyncMetadata(ctx, bucket, objectAPI) brs, err = loadBucketResyncMetadata(ctx, bucket, objectAPI)
if err != nil { if err != nil {
@ -383,10 +391,11 @@ func (api objectAPIHandlers) ResetBucketReplicationStatusHandler(w http.Response
}), r.URL) }), r.URL)
return return
} }
tgtStats = brs.cloneTgtStats()
} }
var rinfo ResyncTargetsInfo var rinfo ResyncTargetsInfo
for tarn, st := range brs.TargetsMap { for tarn, st := range tgtStats {
if arn != "" && tarn != arn { if arn != "" && tarn != arn {
continue continue
} }
@ -394,7 +403,7 @@ func (api objectAPIHandlers) ResetBucketReplicationStatusHandler(w http.Response
Arn: tarn, Arn: tarn,
ResetID: st.ResyncID, ResetID: st.ResyncID,
StartTime: st.StartTime, StartTime: st.StartTime,
EndTime: st.EndTime, EndTime: st.LastUpdate,
ResyncStatus: st.ResyncStatus.String(), ResyncStatus: st.ResyncStatus.String(),
ReplicatedSize: st.ReplicatedSize, ReplicatedSize: st.ReplicatedSize,
ReplicatedCount: st.ReplicatedCount, ReplicatedCount: st.ReplicatedCount,

View File

@ -628,9 +628,12 @@ func (v VersionPurgeStatusType) Pending() bool {
return v == Pending || v == Failed return v == Pending || v == Failed
} }
type replicationResyncState struct { type replicationResyncer struct {
// map of bucket to their resync status // map of bucket to their resync status
statusMap map[string]BucketReplicationResyncStatus statusMap map[string]BucketReplicationResyncStatus
workerSize int
resyncCancelCh chan struct{}
workerCh chan struct{}
sync.RWMutex sync.RWMutex
} }
@ -642,12 +645,23 @@ const (
resyncMetaVersion = resyncMetaVersionV1 resyncMetaVersion = resyncMetaVersionV1
) )
type resyncOpts struct {
bucket string
arn string
resyncID string
resyncBefore time.Time
}
// ResyncStatusType status of resync operation // ResyncStatusType status of resync operation
type ResyncStatusType int type ResyncStatusType int
const ( const (
// NoResync - no resync in progress // NoResync - no resync in progress
NoResync ResyncStatusType = iota NoResync ResyncStatusType = iota
// ResyncPending - resync pending
ResyncPending
// ResyncCanceled - resync canceled
ResyncCanceled
// ResyncStarted - resync in progress // ResyncStarted - resync in progress
ResyncStarted ResyncStarted
// ResyncCompleted - resync finished // ResyncCompleted - resync finished
@ -656,6 +670,10 @@ const (
ResyncFailed ResyncFailed
) )
func (rt ResyncStatusType) isValid() bool {
return rt != NoResync
}
func (rt ResyncStatusType) String() string { func (rt ResyncStatusType) String() string {
switch rt { switch rt {
case ResyncStarted: case ResyncStarted:
@ -664,6 +682,10 @@ func (rt ResyncStatusType) String() string {
return "Completed" return "Completed"
case ResyncFailed: case ResyncFailed:
return "Failed" return "Failed"
case ResyncPending:
return "Pending"
case ResyncCanceled:
return "Canceled"
default: default:
return "" return ""
} }
@ -672,7 +694,7 @@ func (rt ResyncStatusType) String() string {
// TargetReplicationResyncStatus status of resync of bucket for a specific target // TargetReplicationResyncStatus status of resync of bucket for a specific target
type TargetReplicationResyncStatus struct { type TargetReplicationResyncStatus struct {
StartTime time.Time `json:"startTime" msg:"st"` StartTime time.Time `json:"startTime" msg:"st"`
EndTime time.Time `json:"endTime" msg:"et"` LastUpdate time.Time `json:"lastUpdated" msg:"lst"`
// Resync ID assigned to this reset // Resync ID assigned to this reset
ResyncID string `json:"resyncID" msg:"id"` ResyncID string `json:"resyncID" msg:"id"`
// ResyncBeforeDate - resync all objects created prior to this date // ResyncBeforeDate - resync all objects created prior to this date
@ -701,6 +723,14 @@ type BucketReplicationResyncStatus struct {
LastUpdate time.Time `json:"lastUpdate" msg:"lu"` LastUpdate time.Time `json:"lastUpdate" msg:"lu"`
} }
func (rs *BucketReplicationResyncStatus) cloneTgtStats() (m map[string]TargetReplicationResyncStatus) {
m = make(map[string]TargetReplicationResyncStatus)
for arn, st := range rs.TargetsMap {
m[arn] = st
}
return
}
func newBucketResyncStatus(bucket string) BucketReplicationResyncStatus { func newBucketResyncStatus(bucket string) BucketReplicationResyncStatus {
return BucketReplicationResyncStatus{ return BucketReplicationResyncStatus{
TargetsMap: make(map[string]TargetReplicationResyncStatus), TargetsMap: make(map[string]TargetReplicationResyncStatus),

View File

@ -2117,10 +2117,10 @@ func (z *TargetReplicationResyncStatus) DecodeMsg(dc *msgp.Reader) (err error) {
err = msgp.WrapError(err, "StartTime") err = msgp.WrapError(err, "StartTime")
return return
} }
case "et": case "lst":
z.EndTime, err = dc.ReadTime() z.LastUpdate, err = dc.ReadTime()
if err != nil { if err != nil {
err = msgp.WrapError(err, "EndTime") err = msgp.WrapError(err, "LastUpdate")
return return
} }
case "id": case "id":
@ -2205,14 +2205,14 @@ func (z *TargetReplicationResyncStatus) EncodeMsg(en *msgp.Writer) (err error) {
err = msgp.WrapError(err, "StartTime") err = msgp.WrapError(err, "StartTime")
return return
} }
// write "et" // write "lst"
err = en.Append(0xa2, 0x65, 0x74) err = en.Append(0xa3, 0x6c, 0x73, 0x74)
if err != nil { if err != nil {
return return
} }
err = en.WriteTime(z.EndTime) err = en.WriteTime(z.LastUpdate)
if err != nil { if err != nil {
err = msgp.WrapError(err, "EndTime") err = msgp.WrapError(err, "LastUpdate")
return return
} }
// write "id" // write "id"
@ -2315,9 +2315,9 @@ func (z *TargetReplicationResyncStatus) MarshalMsg(b []byte) (o []byte, err erro
// string "st" // string "st"
o = append(o, 0x8b, 0xa2, 0x73, 0x74) o = append(o, 0x8b, 0xa2, 0x73, 0x74)
o = msgp.AppendTime(o, z.StartTime) o = msgp.AppendTime(o, z.StartTime)
// string "et" // string "lst"
o = append(o, 0xa2, 0x65, 0x74) o = append(o, 0xa3, 0x6c, 0x73, 0x74)
o = msgp.AppendTime(o, z.EndTime) o = msgp.AppendTime(o, z.LastUpdate)
// string "id" // string "id"
o = append(o, 0xa2, 0x69, 0x64) o = append(o, 0xa2, 0x69, 0x64)
o = msgp.AppendString(o, z.ResyncID) o = msgp.AppendString(o, z.ResyncID)
@ -2372,10 +2372,10 @@ func (z *TargetReplicationResyncStatus) UnmarshalMsg(bts []byte) (o []byte, err
err = msgp.WrapError(err, "StartTime") err = msgp.WrapError(err, "StartTime")
return return
} }
case "et": case "lst":
z.EndTime, bts, err = msgp.ReadTimeBytes(bts) z.LastUpdate, bts, err = msgp.ReadTimeBytes(bts)
if err != nil { if err != nil {
err = msgp.WrapError(err, "EndTime") err = msgp.WrapError(err, "LastUpdate")
return return
} }
case "id": case "id":
@ -2450,7 +2450,7 @@ func (z *TargetReplicationResyncStatus) UnmarshalMsg(bts []byte) (o []byte, err
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *TargetReplicationResyncStatus) Msgsize() (s int) { func (z *TargetReplicationResyncStatus) Msgsize() (s int) {
s = 1 + 3 + msgp.TimeSize + 3 + msgp.TimeSize + 3 + msgp.StringPrefixSize + len(z.ResyncID) + 4 + msgp.TimeSize + 4 + msgp.IntSize + 3 + msgp.Int64Size + 4 + msgp.Int64Size + 3 + msgp.Int64Size + 4 + msgp.Int64Size + 4 + msgp.StringPrefixSize + len(z.Bucket) + 4 + msgp.StringPrefixSize + len(z.Object) s = 1 + 3 + msgp.TimeSize + 4 + msgp.TimeSize + 3 + msgp.StringPrefixSize + len(z.ResyncID) + 4 + msgp.TimeSize + 4 + msgp.IntSize + 3 + msgp.Int64Size + 4 + msgp.Int64Size + 3 + msgp.Int64Size + 4 + msgp.Int64Size + 4 + msgp.StringPrefixSize + len(z.Bucket) + 4 + msgp.StringPrefixSize + len(z.Object)
return return
} }

View File

@ -24,6 +24,7 @@ import (
"fmt" "fmt"
"io" "io"
"math" "math"
"math/rand"
"net/http" "net/http"
"path" "path"
"reflect" "reflect"
@ -1519,13 +1520,12 @@ type ReplicationPool struct {
existingReplicaDeleteCh chan DeletedObjectReplicationInfo existingReplicaDeleteCh chan DeletedObjectReplicationInfo
mrfSaveCh chan MRFReplicateEntry mrfSaveCh chan MRFReplicateEntry
saveStateCh chan struct{} saveStateCh chan struct{}
workerSize int workerSize int
mrfWorkerSize int mrfWorkerSize int
activeWorkers int32 activeWorkers int32
activeMRFWorkers int32 activeMRFWorkers int32
priority string priority string
resyncState replicationResyncState resyncer *replicationResyncer
workerWg sync.WaitGroup workerWg sync.WaitGroup
mrfWorkerWg sync.WaitGroup mrfWorkerWg sync.WaitGroup
once sync.Once once sync.Once
@ -1578,7 +1578,7 @@ func NewReplicationPool(ctx context.Context, o ObjectLayer, opts replicationPool
mrfWorkerKillCh: make(chan struct{}, failedWorkers), mrfWorkerKillCh: make(chan struct{}, failedWorkers),
existingReplicaCh: make(chan ReplicateObjectInfo, 100000), existingReplicaCh: make(chan ReplicateObjectInfo, 100000),
existingReplicaDeleteCh: make(chan DeletedObjectReplicationInfo, 100000), existingReplicaDeleteCh: make(chan DeletedObjectReplicationInfo, 100000),
resyncState: replicationResyncState{statusMap: make(map[string]BucketReplicationResyncStatus)}, resyncer: newresyncer(),
mrfSaveCh: make(chan MRFReplicateEntry, 100000), mrfSaveCh: make(chan MRFReplicateEntry, 100000),
saveStateCh: make(chan struct{}, 1), saveStateCh: make(chan struct{}, 1),
ctx: ctx, ctx: ctx,
@ -1589,7 +1589,7 @@ func NewReplicationPool(ctx context.Context, o ObjectLayer, opts replicationPool
pool.ResizeWorkers(workers) pool.ResizeWorkers(workers)
pool.ResizeFailedWorkers(failedWorkers) pool.ResizeFailedWorkers(failedWorkers)
go pool.AddExistingObjectReplicateWorker() go pool.AddExistingObjectReplicateWorker()
go pool.updateResyncStatus(ctx, o) go pool.resyncer.PersistToDisk(ctx, o)
go pool.processMRF() go pool.processMRF()
go pool.persistMRF() go pool.persistMRF()
go pool.saveStatsToDisk() go pool.saveStatsToDisk()
@ -2147,8 +2147,8 @@ func resyncTarget(oi ObjectInfo, arn string, resetID string, resetBeforeDate tim
const resyncTimeInterval = time.Minute * 1 const resyncTimeInterval = time.Minute * 1
// updateResyncStatus persists in-memory resync metadata stats to disk at periodic intervals // PersistToDisk persists in-memory resync metadata stats to disk at periodic intervals
func (p *ReplicationPool) updateResyncStatus(ctx context.Context, objectAPI ObjectLayer) { func (s *replicationResyncer) PersistToDisk(ctx context.Context, objectAPI ObjectLayer) {
resyncTimer := time.NewTimer(resyncTimeInterval) resyncTimer := time.NewTimer(resyncTimeInterval)
defer resyncTimer.Stop() defer resyncTimer.Stop()
@ -2159,12 +2159,12 @@ func (p *ReplicationPool) updateResyncStatus(ctx context.Context, objectAPI Obje
for { for {
select { select {
case <-resyncTimer.C: case <-resyncTimer.C:
p.resyncState.RLock() s.RLock()
for bucket, brs := range p.resyncState.statusMap { for bucket, brs := range s.statusMap {
var updt bool var updt bool
// Save the replication status if one resync to any bucket target is still not finished // Save the replication status if one resync to any bucket target is still not finished
for _, st := range brs.TargetsMap { for _, st := range brs.TargetsMap {
if st.EndTime.Equal(timeSentinel) { if st.LastUpdate.Equal(timeSentinel) {
updt = true updt = true
break break
} }
@ -2181,7 +2181,7 @@ func (p *ReplicationPool) updateResyncStatus(ctx context.Context, objectAPI Obje
} }
} }
} }
p.resyncState.RUnlock() s.RUnlock()
resyncTimer.Reset(resyncTimeInterval) resyncTimer.Reset(resyncTimeInterval)
case <-ctx.Done(): case <-ctx.Done():
@ -2192,31 +2192,54 @@ func (p *ReplicationPool) updateResyncStatus(ctx context.Context, objectAPI Obje
} }
} }
const resyncWorkerCnt = 50 // limit of number of bucket resyncs is progress at any given time
func newresyncer() *replicationResyncer {
rs := replicationResyncer{
statusMap: make(map[string]BucketReplicationResyncStatus),
workerSize: resyncWorkerCnt,
resyncCancelCh: make(chan struct{}, resyncWorkerCnt),
workerCh: make(chan struct{}, resyncWorkerCnt),
}
for i := 0; i < rs.workerSize; i++ {
rs.workerCh <- struct{}{}
}
return &rs
}
// resyncBucket resyncs all qualifying objects as per replication rules for the target // resyncBucket resyncs all qualifying objects as per replication rules for the target
// ARN // ARN
func resyncBucket(ctx context.Context, bucket, arn string, heal bool, objectAPI ObjectLayer) { func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI ObjectLayer, heal bool, opts resyncOpts) {
select {
case <-s.workerCh: // block till a worker is available
case <-ctx.Done():
return
}
resyncStatus := ResyncFailed resyncStatus := ResyncFailed
defer func() { defer func() {
globalReplicationPool.resyncState.Lock() s.Lock()
m := globalReplicationPool.resyncState.statusMap[bucket] m := s.statusMap[opts.bucket]
st := m.TargetsMap[arn] st := m.TargetsMap[opts.arn]
st.EndTime = UTCNow() st.LastUpdate = UTCNow()
st.ResyncStatus = resyncStatus st.ResyncStatus = resyncStatus
m.TargetsMap[arn] = st m.TargetsMap[opts.arn] = st
m.LastUpdate = UTCNow() m.LastUpdate = UTCNow()
globalReplicationPool.resyncState.statusMap[bucket] = m s.statusMap[opts.bucket] = m
globalReplicationPool.resyncState.Unlock() s.Unlock()
globalSiteResyncMetrics.incBucket(opts, resyncStatus)
s.workerCh <- struct{}{}
}() }()
// Allocate new results channel to receive ObjectInfo. // Allocate new results channel to receive ObjectInfo.
objInfoCh := make(chan ObjectInfo) objInfoCh := make(chan ObjectInfo)
cfg, err := getReplicationConfig(ctx, bucket) cfg, err := getReplicationConfig(ctx, opts.bucket)
if err != nil { if err != nil {
logger.LogIf(ctx, fmt.Errorf("Replication resync of %s for arn %s failed with %w", bucket, arn, err)) logger.LogIf(ctx, fmt.Errorf("Replication resync of %s for arn %s failed with %w", opts.bucket, opts.arn, err))
return return
} }
tgts, err := globalBucketTargetSys.ListBucketTargets(ctx, bucket) tgts, err := globalBucketTargetSys.ListBucketTargets(ctx, opts.bucket)
if err != nil { if err != nil {
logger.LogIf(ctx, fmt.Errorf("Replication resync of %s for arn %s failed %w", bucket, arn, err)) logger.LogIf(ctx, fmt.Errorf("Replication resync of %s for arn %s failed %w", opts.bucket, opts.arn, err))
return return
} }
rcfg := replicationConfig{ rcfg := replicationConfig{
@ -2226,34 +2249,50 @@ func resyncBucket(ctx context.Context, bucket, arn string, heal bool, objectAPI
tgtArns := cfg.FilterTargetArns( tgtArns := cfg.FilterTargetArns(
replication.ObjectOpts{ replication.ObjectOpts{
OpType: replication.ResyncReplicationType, OpType: replication.ResyncReplicationType,
TargetArn: arn, TargetArn: opts.arn,
}) })
if len(tgtArns) != 1 { if len(tgtArns) != 1 {
logger.LogIf(ctx, fmt.Errorf("Replication resync failed for %s - arn specified %s is missing in the replication config", bucket, arn)) logger.LogIf(ctx, fmt.Errorf("Replication resync failed for %s - arn specified %s is missing in the replication config", opts.bucket, opts.arn))
return return
} }
tgt := globalBucketTargetSys.GetRemoteTargetClient(ctx, arn) tgt := globalBucketTargetSys.GetRemoteTargetClient(ctx, opts.arn)
if tgt == nil { if tgt == nil {
logger.LogIf(ctx, fmt.Errorf("Replication resync failed for %s - target could not be created for arn %s", bucket, arn)) logger.LogIf(ctx, fmt.Errorf("Replication resync failed for %s - target could not be created for arn %s", opts.bucket, opts.arn))
return return
} }
// mark resync status as resync started
if !heal {
s.Lock()
m := s.statusMap[opts.bucket]
st := m.TargetsMap[opts.arn]
st.ResyncStatus = ResyncStarted
m.TargetsMap[opts.arn] = st
m.LastUpdate = UTCNow()
s.statusMap[opts.bucket] = m
s.Unlock()
}
// Walk through all object versions - Walk() is always in ascending order needed to ensure // Walk through all object versions - Walk() is always in ascending order needed to ensure
// delete marker replicated to target after object version is first created. // delete marker replicated to target after object version is first created.
if err := objectAPI.Walk(ctx, bucket, "", objInfoCh, ObjectOptions{}); err != nil { if err := objectAPI.Walk(ctx, opts.bucket, "", objInfoCh, ObjectOptions{}); err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
return return
} }
globalReplicationPool.resyncState.RLock() s.RLock()
m := globalReplicationPool.resyncState.statusMap[bucket] m := s.statusMap[opts.bucket]
st := m.TargetsMap[arn] st := m.TargetsMap[opts.arn]
globalReplicationPool.resyncState.RUnlock() s.RUnlock()
var lastCheckpoint string var lastCheckpoint string
if st.ResyncStatus == ResyncStarted || st.ResyncStatus == ResyncFailed { if st.ResyncStatus == ResyncStarted || st.ResyncStatus == ResyncFailed {
lastCheckpoint = st.Object lastCheckpoint = st.Object
} }
for obj := range objInfoCh { for obj := range objInfoCh {
select {
case <-s.resyncCancelCh:
resyncStatus = ResyncCanceled
return
default:
}
if heal && lastCheckpoint != "" && lastCheckpoint != obj.Name { if heal && lastCheckpoint != "" && lastCheckpoint != obj.Name {
continue continue
} }
@ -2263,7 +2302,7 @@ func resyncBucket(ctx context.Context, bucket, arn string, heal bool, objectAPI
if !roi.ExistingObjResync.mustResync() { if !roi.ExistingObjResync.mustResync() {
continue continue
} }
traceFn := s.trace(tgt.ResetID, fmt.Sprintf("%s/%s (%s)", opts.bucket, roi.Name, roi.VersionID))
if roi.DeleteMarker || !roi.VersionPurgeStatus.Empty() { if roi.DeleteMarker || !roi.VersionPurgeStatus.Empty() {
versionID := "" versionID := ""
dmVersionID := "" dmVersionID := ""
@ -2298,96 +2337,134 @@ func resyncBucket(ctx context.Context, bucket, arn string, heal bool, objectAPI
ReplicationProxyRequest: "false", ReplicationProxyRequest: "false",
}, },
}) })
globalReplicationPool.resyncState.Lock() s.Lock()
m = globalReplicationPool.resyncState.statusMap[bucket] m = s.statusMap[opts.bucket]
st = m.TargetsMap[arn] st = m.TargetsMap[opts.arn]
st.Object = roi.Name st.Object = roi.Name
success := true
if err != nil { if err != nil {
if roi.DeleteMarker && isErrMethodNotAllowed(ErrorRespToObjectError(err, bucket, roi.Name)) { if roi.DeleteMarker && isErrMethodNotAllowed(ErrorRespToObjectError(err, opts.bucket, roi.Name)) {
st.ReplicatedCount++ st.ReplicatedCount++
} else { } else {
st.FailedCount++ st.FailedCount++
success = false
} }
} else { } else {
st.ReplicatedCount++ st.ReplicatedCount++
st.ReplicatedSize += roi.Size st.ReplicatedSize += roi.Size
} }
m.TargetsMap[arn] = st m.TargetsMap[opts.arn] = st
m.LastUpdate = UTCNow() m.LastUpdate = UTCNow()
globalReplicationPool.resyncState.statusMap[bucket] = m s.statusMap[opts.bucket] = m
globalReplicationPool.resyncState.Unlock() s.Unlock()
traceFn(err)
globalSiteResyncMetrics.updateMetric(roi, success, opts.resyncID)
} }
resyncStatus = ResyncCompleted resyncStatus = ResyncCompleted
} }
// start replication resync for the remote target ARN specified // start replication resync for the remote target ARN specified
func startReplicationResync(ctx context.Context, bucket, arn, resyncID string, resyncBeforeDate time.Time, objAPI ObjectLayer) error { func (s *replicationResyncer) start(ctx context.Context, objAPI ObjectLayer, opts resyncOpts) error {
if bucket == "" { if opts.bucket == "" {
return fmt.Errorf("bucket name is empty") return fmt.Errorf("bucket name is empty")
} }
if arn == "" { if opts.arn == "" {
return fmt.Errorf("target ARN specified for resync is empty") return fmt.Errorf("target ARN specified for resync is empty")
} }
// Check if the current bucket has quota restrictions, if not skip it // Check if the current bucket has quota restrictions, if not skip it
cfg, err := getReplicationConfig(ctx, bucket) cfg, err := getReplicationConfig(ctx, opts.bucket)
if err != nil { if err != nil {
return err return err
} }
tgtArns := cfg.FilterTargetArns( tgtArns := cfg.FilterTargetArns(
replication.ObjectOpts{ replication.ObjectOpts{
OpType: replication.ResyncReplicationType, OpType: replication.ResyncReplicationType,
TargetArn: arn, TargetArn: opts.arn,
}) })
if len(tgtArns) == 0 { if len(tgtArns) == 0 {
return fmt.Errorf("arn %s specified for resync not found in replication config", arn) return fmt.Errorf("arn %s specified for resync not found in replication config", opts.arn)
} }
globalReplicationPool.resyncer.RLock()
data, err := loadBucketResyncMetadata(ctx, bucket, objAPI) data, ok := globalReplicationPool.resyncer.statusMap[opts.bucket]
globalReplicationPool.resyncer.RUnlock()
if !ok {
data, err = loadBucketResyncMetadata(ctx, opts.bucket, objAPI)
if err != nil { if err != nil {
return err return err
} }
}
// validate if resync is in progress for this arn // validate if resync is in progress for this arn
for tArn, st := range data.TargetsMap { for tArn, st := range data.TargetsMap {
if arn == tArn && st.ResyncStatus == ResyncStarted { if opts.arn == tArn && (st.ResyncStatus == ResyncStarted || st.ResyncStatus == ResyncPending) {
return fmt.Errorf("Resync of bucket %s is already in progress for remote bucket %s", bucket, arn) return fmt.Errorf("Resync of bucket %s is already in progress for remote bucket %s", opts.bucket, opts.arn)
} }
} }
status := TargetReplicationResyncStatus{ status := TargetReplicationResyncStatus{
ResyncID: resyncID, ResyncID: opts.resyncID,
ResyncBeforeDate: resyncBeforeDate, ResyncBeforeDate: opts.resyncBefore,
StartTime: UTCNow(), StartTime: UTCNow(),
ResyncStatus: ResyncStarted, ResyncStatus: ResyncPending,
Bucket: bucket, Bucket: opts.bucket,
} }
data.TargetsMap[arn] = status data.TargetsMap[opts.arn] = status
if err = saveResyncStatus(ctx, bucket, data, objAPI); err != nil { if err = saveResyncStatus(ctx, opts.bucket, data, objAPI); err != nil {
return err return err
} }
globalReplicationPool.resyncState.Lock()
defer globalReplicationPool.resyncState.Unlock() globalReplicationPool.resyncer.Lock()
brs, ok := globalReplicationPool.resyncState.statusMap[bucket] defer globalReplicationPool.resyncer.Unlock()
brs, ok := globalReplicationPool.resyncer.statusMap[opts.bucket]
if !ok { if !ok {
brs = BucketReplicationResyncStatus{ brs = BucketReplicationResyncStatus{
Version: resyncMetaVersion, Version: resyncMetaVersion,
TargetsMap: make(map[string]TargetReplicationResyncStatus), TargetsMap: make(map[string]TargetReplicationResyncStatus),
} }
} }
brs.TargetsMap[arn] = status brs.TargetsMap[opts.arn] = status
globalReplicationPool.resyncState.statusMap[bucket] = brs globalReplicationPool.resyncer.statusMap[opts.bucket] = brs
go resyncBucket(GlobalContext, bucket, arn, false, objAPI) go globalReplicationPool.resyncer.resyncBucket(GlobalContext, objAPI, false, opts)
return nil return nil
} }
func (s *replicationResyncer) trace(resyncID string, path string) func(err error) {
startTime := time.Now()
return func(err error) {
duration := time.Since(startTime)
if globalTrace.NumSubscribers(madmin.TraceReplicationResync) > 0 {
globalTrace.Publish(replicationResyncTrace(resyncID, startTime, duration, path, err))
}
}
}
func replicationResyncTrace(resyncID string, startTime time.Time, duration time.Duration, path string, err error) madmin.TraceInfo {
var errStr string
if err != nil {
errStr = err.Error()
}
funcName := fmt.Sprintf("replication.(resyncID=%s)", resyncID)
return madmin.TraceInfo{
TraceType: madmin.TraceReplicationResync,
Time: startTime,
NodeName: globalLocalNodeName,
FuncName: funcName,
Duration: duration,
Path: path,
Error: errStr,
}
}
// delete resync metadata from replication resync state in memory // delete resync metadata from replication resync state in memory
func (p *ReplicationPool) deleteResyncMetadata(ctx context.Context, bucket string) { func (p *ReplicationPool) deleteResyncMetadata(ctx context.Context, bucket string) {
if p == nil { if p == nil {
return return
} }
p.resyncState.Lock() p.resyncer.Lock()
delete(p.resyncState.statusMap, bucket) delete(p.resyncer.statusMap, bucket)
defer p.resyncState.Unlock() defer p.resyncer.Unlock()
globalSiteResyncMetrics.deleteBucket(bucket)
} }
// initResync - initializes bucket replication resync for all buckets. // initResync - initializes bucket replication resync for all buckets.
@ -2396,12 +2473,44 @@ func (p *ReplicationPool) initResync(ctx context.Context, buckets []BucketInfo,
return errServerNotInitialized return errServerNotInitialized
} }
// Load bucket metadata sys in background // Load bucket metadata sys in background
go p.loadResync(ctx, buckets, objAPI) go p.startResyncRoutine(ctx, buckets, objAPI)
return nil return nil
} }
func (p *ReplicationPool) startResyncRoutine(ctx context.Context, buckets []BucketInfo, objAPI ObjectLayer) {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
// Run the replication resync in a loop
for {
if err := p.loadResync(ctx, buckets, objAPI); err == nil {
<-ctx.Done()
return
}
duration := time.Duration(r.Float64() * float64(time.Minute))
if duration < time.Second {
// Make sure to sleep atleast a second to avoid high CPU ticks.
duration = time.Second
}
time.Sleep(duration)
}
}
var replicationResyncLockTimeout = newDynamicTimeoutWithOpts(dynamicTimeoutOpts{
timeout: 30 * time.Second,
minimum: 10 * time.Second,
retryInterval: time.Second,
})
// Loads bucket replication resync statuses into memory. // Loads bucket replication resync statuses into memory.
func (p *ReplicationPool) loadResync(ctx context.Context, buckets []BucketInfo, objAPI ObjectLayer) { func (p *ReplicationPool) loadResync(ctx context.Context, buckets []BucketInfo, objAPI ObjectLayer) error {
// Make sure only one node running resync on the cluster.
locker := objAPI.NewNSLock(minioMetaBucket, "replication/resync.lock")
lkctx, err := locker.GetLock(ctx, replicationResyncLockTimeout)
if err != nil {
return err
}
ctx = lkctx.Context()
defer lkctx.Cancel()
// No unlock for "leader" lock.
for index := range buckets { for index := range buckets {
meta, err := loadBucketResyncMetadata(ctx, buckets[index].Name, objAPI) meta, err := loadBucketResyncMetadata(ctx, buckets[index].Name, objAPI)
if err != nil { if err != nil {
@ -2410,30 +2519,38 @@ func (p *ReplicationPool) loadResync(ctx context.Context, buckets []BucketInfo,
} }
continue continue
} }
p.resyncState.Lock()
p.resyncState.statusMap[buckets[index].Name] = meta p.resyncer.Lock()
p.resyncState.Unlock() p.resyncer.statusMap[buckets[index].Name] = meta
p.resyncer.Unlock()
} }
for index := range buckets { for index := range buckets {
bucket := buckets[index].Name bucket := buckets[index].Name
p.resyncState.RLock() var tgts map[string]TargetReplicationResyncStatus
m, ok := p.resyncState.statusMap[bucket] p.resyncer.RLock()
p.resyncState.RUnlock() m, ok := p.resyncer.statusMap[bucket]
if ok { if ok {
for arn, st := range m.TargetsMap { tgts = m.cloneTgtStats()
if st.ResyncStatus == ResyncFailed || st.ResyncStatus == ResyncStarted { }
go resyncBucket(ctx, bucket, arn, true, objAPI) p.resyncer.RUnlock()
} for arn, st := range tgts {
switch st.ResyncStatus {
case ResyncFailed, ResyncStarted, ResyncPending:
go p.resyncer.resyncBucket(ctx, objAPI, true, resyncOpts{
bucket: bucket,
arn: arn,
resyncID: st.ResyncID,
resyncBefore: st.ResyncBeforeDate,
})
} }
} }
} }
return nil
} }
// load bucket resync metadata from disk // load bucket resync metadata from disk
func loadBucketResyncMetadata(ctx context.Context, bucket string, objAPI ObjectLayer) (brs BucketReplicationResyncStatus, e error) { func loadBucketResyncMetadata(ctx context.Context, bucket string, objAPI ObjectLayer) (brs BucketReplicationResyncStatus, e error) {
brs = newBucketResyncStatus(bucket) brs = newBucketResyncStatus(bucket)
resyncDirPath := path.Join(bucketMetaPrefix, bucket, replicationDir) resyncDirPath := path.Join(bucketMetaPrefix, bucket, replicationDir)
data, err := readConfig(GlobalContext, objAPI, pathJoin(resyncDirPath, resyncFileName)) data, err := readConfig(GlobalContext, objAPI, pathJoin(resyncDirPath, resyncFileName))
if err != nil && err != errConfigNotFound { if err != nil && err != errConfigNotFound {

View File

@ -487,6 +487,22 @@ func (sys *BucketTargetSys) getRemoteARN(bucket string, target *madmin.BucketTar
return generateARN(target) return generateARN(target)
} }
// getRemoteARNForPeer returns the remote target for a peer site in site replication
func (sys *BucketTargetSys) getRemoteARNForPeer(bucket string, peer madmin.PeerInfo) string {
tgts := sys.targetsMap[bucket]
for _, target := range tgts {
ep, _ := url.Parse(peer.Endpoint)
if target.SourceBucket == bucket &&
target.TargetBucket == bucket &&
target.Endpoint == ep.Host &&
target.Secure == (ep.Scheme == "https") &&
target.Type == madmin.ReplicationService {
return target.Arn
}
}
return ""
}
// generate ARN that is unique to this target type // generate ARN that is unique to this target type
func generateARN(t *madmin.BucketTarget) string { func generateARN(t *madmin.BucketTarget) string {
arn := madmin.ARN{ arn := madmin.ARN{

View File

@ -281,6 +281,9 @@ var (
// Cluster replication manager. // Cluster replication manager.
globalSiteReplicationSys SiteReplicationSys globalSiteReplicationSys SiteReplicationSys
// Cluster replication resync metrics
globalSiteResyncMetrics *siteResyncMetrics
// Is set to true when Bucket federation is requested // Is set to true when Bucket federation is requested
// and is 'true' when etcdConfig.PathPrefix is empty // and is 'true' when etcdConfig.PathPrefix is empty
globalBucketFederation bool globalBucketFederation bool

View File

@ -29,6 +29,7 @@ type collectMetricsOpts struct {
hosts map[string]struct{} hosts map[string]struct{}
disks map[string]struct{} disks map[string]struct{}
jobID string jobID string
depID string
} }
func collectLocalMetrics(types madmin.MetricType, opts collectMetricsOpts) (m madmin.RealtimeMetrics) { func collectLocalMetrics(types madmin.MetricType, opts collectMetricsOpts) (m madmin.RealtimeMetrics) {
@ -65,7 +66,9 @@ func collectLocalMetrics(types madmin.MetricType, opts collectMetricsOpts) (m ma
if types.Contains(madmin.MetricsBatchJobs) { if types.Contains(madmin.MetricsBatchJobs) {
m.Aggregated.BatchJobs = globalBatchJobsMetrics.report(opts.jobID) m.Aggregated.BatchJobs = globalBatchJobsMetrics.report(opts.jobID)
} }
if types.Contains(madmin.MetricsSiteResync) {
m.Aggregated.SiteResync = globalSiteResyncMetrics.report(opts.depID)
}
// Add types... // Add types...
// ByHost is a shallow reference, so careful about sharing. // ByHost is a shallow reference, so careful about sharing.

View File

@ -202,6 +202,8 @@ func (client *peerRESTClient) GetMetrics(ctx context.Context, t madmin.MetricTyp
values.Set(peerRESTDisk, disk) values.Set(peerRESTDisk, disk)
} }
values.Set(peerRESTJobID, opts.jobID) values.Set(peerRESTJobID, opts.jobID)
values.Set(peerRESTDepID, opts.depID)
respBody, err := client.callWithContext(ctx, peerRESTMethodMetrics, values, nil, -1) respBody, err := client.callWithContext(ctx, peerRESTMethodMetrics, values, nil, -1)
if err != nil { if err != nil {
return return

View File

@ -97,6 +97,7 @@ const (
peerRESTMetricsTypes = "types" peerRESTMetricsTypes = "types"
peerRESTDisk = "disk" peerRESTDisk = "disk"
peerRESTJobID = "job-id" peerRESTJobID = "job-id"
peerRESTDepID = "depID"
peerRESTStartRebalance = "start-rebalance" peerRESTStartRebalance = "start-rebalance"
peerRESTListenBucket = "bucket" peerRESTListenBucket = "bucket"

View File

@ -437,6 +437,7 @@ func (s *peerRESTServer) GetMetricsHandler(w http.ResponseWriter, r *http.Reques
diskMap[disk] = struct{}{} diskMap[disk] = struct{}{}
} }
jobID := r.Form.Get(peerRESTJobID) jobID := r.Form.Get(peerRESTJobID)
depID := r.Form.Get(peerRESTDepID)
ctx, cancel := context.WithCancel(r.Context()) ctx, cancel := context.WithCancel(r.Context())
defer cancel() defer cancel()
@ -444,8 +445,8 @@ func (s *peerRESTServer) GetMetricsHandler(w http.ResponseWriter, r *http.Reques
info := collectLocalMetrics(types, collectMetricsOpts{ info := collectLocalMetrics(types, collectMetricsOpts{
disks: diskMap, disks: diskMap,
jobID: jobID, jobID: jobID,
depID: depID,
}) })
logger.LogIf(ctx, gob.NewEncoder(w).Encode(info)) logger.LogIf(ctx, gob.NewEncoder(w).Encode(info))
} }

View File

@ -330,6 +330,8 @@ func initAllSubsystems(ctx context.Context) {
// Create new ILM tier configuration subsystem // Create new ILM tier configuration subsystem
globalTierConfigMgr = NewTierConfigMgr() globalTierConfigMgr = NewTierConfigMgr()
globalSiteResyncMetrics = newSiteResyncMetrics(GlobalContext)
} }
func configRetriableErrors(err error) bool { func configRetriableErrors(err error) bool {

View File

@ -0,0 +1,334 @@
// Copyright (c) 2015-2022 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cmd
import (
"context"
"math/rand"
"sync"
"time"
"github.com/minio/madmin-go"
)
//go:generate msgp -file=$GOFILE
// SiteResyncStatus captures current replication resync status for a target site
type SiteResyncStatus struct {
Version int `json:"version" msg:"v"`
// Overall site status
Status ResyncStatusType `json:"st" msg:"ss"`
DeplID string `json:"dId" msg:"did"`
BucketStatuses map[string]ResyncStatusType `json:"buckets" msg:"bkts"`
TotBuckets int `json:"totbuckets" msg:"tb"`
TargetReplicationResyncStatus `json:"currSt" msg:"cst"`
}
func (s *SiteResyncStatus) clone() SiteResyncStatus {
if s == nil {
return SiteResyncStatus{}
}
o := *s
o.BucketStatuses = make(map[string]ResyncStatusType, len(s.BucketStatuses))
for b, st := range s.BucketStatuses {
o.BucketStatuses[b] = st
}
return o
}
const (
siteResyncPrefix = bucketMetaPrefix + "/site-replication/resync"
)
type resyncState struct {
resyncID string
LastSaved time.Time
}
//msgp:ignore siteResyncMetrics
type siteResyncMetrics struct {
sync.RWMutex
// resyncStatus maps resync ID to resync status for peer
resyncStatus map[string]SiteResyncStatus
// map peer deployment ID to resync ID
peerResyncMap map[string]resyncState
}
func newSiteResyncMetrics(ctx context.Context) *siteResyncMetrics {
s := siteResyncMetrics{
resyncStatus: make(map[string]SiteResyncStatus),
peerResyncMap: make(map[string]resyncState),
}
go s.save(ctx)
go s.init(ctx)
return &s
}
// init site resync metrics
func (sm *siteResyncMetrics) init(ctx context.Context) {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
// Run the site resync metrics load in a loop
for {
if err := sm.load(ctx, newObjectLayerFn()); err == nil {
<-ctx.Done()
return
}
duration := time.Duration(r.Float64() * float64(time.Second*10))
if duration < time.Second {
// Make sure to sleep atleast a second to avoid high CPU ticks.
duration = time.Second
}
time.Sleep(duration)
}
}
// load resync metrics saved on disk into memory
func (sm *siteResyncMetrics) load(ctx context.Context, objAPI ObjectLayer) error {
if objAPI == nil {
return errServerNotInitialized
}
info, err := globalSiteReplicationSys.GetClusterInfo(ctx)
if err != nil {
return err
}
if !info.Enabled {
return nil
}
for _, peer := range info.Sites {
if peer.DeploymentID == globalDeploymentID {
continue
}
rs, err := loadSiteResyncMetadata(ctx, objAPI, peer.DeploymentID)
if err != nil {
return err
}
sm.Lock()
defer sm.Unlock()
if _, ok := sm.peerResyncMap[peer.DeploymentID]; !ok {
sm.peerResyncMap[peer.DeploymentID] = resyncState{resyncID: rs.ResyncID, LastSaved: time.Time{}}
sm.resyncStatus[rs.ResyncID] = rs
}
}
return nil
}
func (sm *siteResyncMetrics) report(dID string) *madmin.SiteResyncMetrics {
sm.RLock()
defer sm.RUnlock()
rst, ok := sm.peerResyncMap[dID]
if !ok {
return nil
}
rs, ok := sm.resyncStatus[rst.resyncID]
if !ok {
return nil
}
m := madmin.SiteResyncMetrics{
CollectedAt: rs.LastUpdate,
StartTime: rs.StartTime,
LastUpdate: rs.LastUpdate,
ResyncStatus: rs.Status.String(),
ResyncID: rst.resyncID,
DeplID: rs.DeplID,
ReplicatedSize: rs.ReplicatedSize,
ReplicatedCount: rs.ReplicatedCount,
FailedSize: rs.FailedSize,
FailedCount: rs.FailedCount,
Bucket: rs.Bucket,
Object: rs.Object,
NumBuckets: int64(rs.TotBuckets),
}
for b, st := range rs.BucketStatuses {
if st == ResyncFailed {
m.FailedBuckets = append(m.FailedBuckets, b)
}
}
return &m
}
// save in-memory stats to disk
func (sm *siteResyncMetrics) save(ctx context.Context) {
sTimer := time.NewTimer(siteResyncSaveInterval)
defer sTimer.Stop()
for {
select {
case <-sTimer.C:
if globalSiteReplicationSys.isEnabled() {
sm.Lock()
for dID, rs := range sm.peerResyncMap {
st, ok := sm.resyncStatus[rs.resyncID]
if ok {
updt := st.Status.isValid() && st.LastUpdate.After(rs.LastSaved)
if !updt {
continue
}
rs.LastSaved = UTCNow()
sm.peerResyncMap[dID] = rs
go saveSiteResyncMetadata(ctx, st, newObjectLayerFn())
}
}
sm.Unlock()
}
sTimer.Reset(siteResyncSaveInterval)
case <-ctx.Done():
return
}
}
}
// update overall site resync state
func (sm *siteResyncMetrics) updateState(s SiteResyncStatus) {
if !globalSiteReplicationSys.isEnabled() {
return
}
sm.Lock()
defer sm.Unlock()
switch s.Status {
case ResyncStarted:
sm.peerResyncMap[s.DeplID] = resyncState{resyncID: s.ResyncID, LastSaved: time.Time{}}
sm.resyncStatus[s.ResyncID] = s
case ResyncCompleted, ResyncCanceled, ResyncFailed:
st, ok := sm.resyncStatus[s.ResyncID]
if ok {
st.LastUpdate = s.LastUpdate
st.Status = s.Status
}
sm.resyncStatus[s.ResyncID] = st
}
}
// increment SyncedBuckets count
func (sm *siteResyncMetrics) incBucket(o resyncOpts, bktStatus ResyncStatusType) {
if !globalSiteReplicationSys.isEnabled() {
return
}
sm.Lock()
defer sm.Unlock()
st, ok := sm.resyncStatus[o.resyncID]
if ok {
switch bktStatus {
case ResyncCompleted:
st.BucketStatuses[o.bucket] = ResyncCompleted
st.Status = siteResyncStatus(st.Status, st.BucketStatuses)
st.LastUpdate = UTCNow()
sm.resyncStatus[o.resyncID] = st
case ResyncFailed:
st.BucketStatuses[o.bucket] = ResyncFailed
st.Status = siteResyncStatus(st.Status, st.BucketStatuses)
st.LastUpdate = UTCNow()
sm.resyncStatus[o.resyncID] = st
}
}
}
// remove deleted bucket from active resync tracking
func (sm *siteResyncMetrics) deleteBucket(b string) {
if !globalSiteReplicationSys.isEnabled() {
return
}
sm.Lock()
defer sm.Unlock()
for _, rs := range sm.peerResyncMap {
st, ok := sm.resyncStatus[rs.resyncID]
if !ok {
return
}
switch st.Status {
case ResyncCompleted, ResyncFailed:
return
default:
delete(st.BucketStatuses, b)
}
}
}
// returns overall resync status from individual bucket resync status map
func siteResyncStatus(currSt ResyncStatusType, m map[string]ResyncStatusType) ResyncStatusType {
// avoid overwriting canceled resync status
if currSt != ResyncStarted {
return currSt
}
totBuckets := len(m)
var cmpCount, failCount int
for _, st := range m {
switch st {
case ResyncCompleted:
cmpCount++
case ResyncFailed:
failCount++
}
}
if cmpCount == totBuckets {
return ResyncCompleted
}
if cmpCount+failCount == totBuckets {
return ResyncFailed
}
return ResyncStarted
}
// update resync metrics per object
func (sm *siteResyncMetrics) updateMetric(roi ReplicateObjectInfo, success bool, resyncID string) {
if !globalSiteReplicationSys.isEnabled() {
return
}
sm.Lock()
defer sm.Unlock()
s := sm.resyncStatus[resyncID]
if success {
s.ReplicatedCount++
s.ReplicatedSize += roi.Size
} else {
s.FailedCount++
s.FailedSize += roi.Size
}
s.Bucket = roi.Bucket
s.Object = roi.Name
s.LastUpdate = UTCNow()
sm.resyncStatus[resyncID] = s
}
// Status returns current in-memory resync status for this deployment
func (sm *siteResyncMetrics) status(dID string) (rs SiteResyncStatus, err error) {
sm.RLock()
defer sm.RUnlock()
if rst, ok1 := sm.peerResyncMap[dID]; ok1 {
if st, ok2 := sm.resyncStatus[rst.resyncID]; ok2 {
return st.clone(), nil
}
}
return rs, errSRNoResync
}
// Status returns latest resync status for this deployment
func (sm *siteResyncMetrics) siteStatus(ctx context.Context, objAPI ObjectLayer, dID string) (rs SiteResyncStatus, err error) {
if !globalSiteReplicationSys.isEnabled() {
return rs, errSRNotEnabled
}
// check in-memory status
rs, err = sm.status(dID)
if err == nil {
return rs, nil
}
// check disk resync status
rs, err = loadSiteResyncMetadata(ctx, objAPI, dID)
if err != nil && err == errConfigNotFound {
return rs, nil
}
return rs, err
}

View File

@ -0,0 +1,318 @@
package cmd
// Code generated by github.com/tinylib/msgp DO NOT EDIT.
import (
"github.com/tinylib/msgp/msgp"
)
// DecodeMsg implements msgp.Decodable
func (z *SiteResyncStatus) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
var zb0001 uint32
zb0001, err = dc.ReadMapHeader()
if err != nil {
err = msgp.WrapError(err)
return
}
for zb0001 > 0 {
zb0001--
field, err = dc.ReadMapKeyPtr()
if err != nil {
err = msgp.WrapError(err)
return
}
switch msgp.UnsafeString(field) {
case "v":
z.Version, err = dc.ReadInt()
if err != nil {
err = msgp.WrapError(err, "Version")
return
}
case "ss":
err = z.Status.DecodeMsg(dc)
if err != nil {
err = msgp.WrapError(err, "Status")
return
}
case "did":
z.DeplID, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "DeplID")
return
}
case "bkts":
var zb0002 uint32
zb0002, err = dc.ReadMapHeader()
if err != nil {
err = msgp.WrapError(err, "BucketStatuses")
return
}
if z.BucketStatuses == nil {
z.BucketStatuses = make(map[string]ResyncStatusType, zb0002)
} else if len(z.BucketStatuses) > 0 {
for key := range z.BucketStatuses {
delete(z.BucketStatuses, key)
}
}
for zb0002 > 0 {
zb0002--
var za0001 string
var za0002 ResyncStatusType
za0001, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "BucketStatuses")
return
}
err = za0002.DecodeMsg(dc)
if err != nil {
err = msgp.WrapError(err, "BucketStatuses", za0001)
return
}
z.BucketStatuses[za0001] = za0002
}
case "tb":
z.TotBuckets, err = dc.ReadInt()
if err != nil {
err = msgp.WrapError(err, "TotBuckets")
return
}
case "cst":
err = z.TargetReplicationResyncStatus.DecodeMsg(dc)
if err != nil {
err = msgp.WrapError(err, "TargetReplicationResyncStatus")
return
}
default:
err = dc.Skip()
if err != nil {
err = msgp.WrapError(err)
return
}
}
}
return
}
// EncodeMsg implements msgp.Encodable
func (z *SiteResyncStatus) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 6
// write "v"
err = en.Append(0x86, 0xa1, 0x76)
if err != nil {
return
}
err = en.WriteInt(z.Version)
if err != nil {
err = msgp.WrapError(err, "Version")
return
}
// write "ss"
err = en.Append(0xa2, 0x73, 0x73)
if err != nil {
return
}
err = z.Status.EncodeMsg(en)
if err != nil {
err = msgp.WrapError(err, "Status")
return
}
// write "did"
err = en.Append(0xa3, 0x64, 0x69, 0x64)
if err != nil {
return
}
err = en.WriteString(z.DeplID)
if err != nil {
err = msgp.WrapError(err, "DeplID")
return
}
// write "bkts"
err = en.Append(0xa4, 0x62, 0x6b, 0x74, 0x73)
if err != nil {
return
}
err = en.WriteMapHeader(uint32(len(z.BucketStatuses)))
if err != nil {
err = msgp.WrapError(err, "BucketStatuses")
return
}
for za0001, za0002 := range z.BucketStatuses {
err = en.WriteString(za0001)
if err != nil {
err = msgp.WrapError(err, "BucketStatuses")
return
}
err = za0002.EncodeMsg(en)
if err != nil {
err = msgp.WrapError(err, "BucketStatuses", za0001)
return
}
}
// write "tb"
err = en.Append(0xa2, 0x74, 0x62)
if err != nil {
return
}
err = en.WriteInt(z.TotBuckets)
if err != nil {
err = msgp.WrapError(err, "TotBuckets")
return
}
// write "cst"
err = en.Append(0xa3, 0x63, 0x73, 0x74)
if err != nil {
return
}
err = z.TargetReplicationResyncStatus.EncodeMsg(en)
if err != nil {
err = msgp.WrapError(err, "TargetReplicationResyncStatus")
return
}
return
}
// MarshalMsg implements msgp.Marshaler
func (z *SiteResyncStatus) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 6
// string "v"
o = append(o, 0x86, 0xa1, 0x76)
o = msgp.AppendInt(o, z.Version)
// string "ss"
o = append(o, 0xa2, 0x73, 0x73)
o, err = z.Status.MarshalMsg(o)
if err != nil {
err = msgp.WrapError(err, "Status")
return
}
// string "did"
o = append(o, 0xa3, 0x64, 0x69, 0x64)
o = msgp.AppendString(o, z.DeplID)
// string "bkts"
o = append(o, 0xa4, 0x62, 0x6b, 0x74, 0x73)
o = msgp.AppendMapHeader(o, uint32(len(z.BucketStatuses)))
for za0001, za0002 := range z.BucketStatuses {
o = msgp.AppendString(o, za0001)
o, err = za0002.MarshalMsg(o)
if err != nil {
err = msgp.WrapError(err, "BucketStatuses", za0001)
return
}
}
// string "tb"
o = append(o, 0xa2, 0x74, 0x62)
o = msgp.AppendInt(o, z.TotBuckets)
// string "cst"
o = append(o, 0xa3, 0x63, 0x73, 0x74)
o, err = z.TargetReplicationResyncStatus.MarshalMsg(o)
if err != nil {
err = msgp.WrapError(err, "TargetReplicationResyncStatus")
return
}
return
}
// UnmarshalMsg implements msgp.Unmarshaler
func (z *SiteResyncStatus) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
var zb0001 uint32
zb0001, bts, err = msgp.ReadMapHeaderBytes(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
for zb0001 > 0 {
zb0001--
field, bts, err = msgp.ReadMapKeyZC(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
switch msgp.UnsafeString(field) {
case "v":
z.Version, bts, err = msgp.ReadIntBytes(bts)
if err != nil {
err = msgp.WrapError(err, "Version")
return
}
case "ss":
bts, err = z.Status.UnmarshalMsg(bts)
if err != nil {
err = msgp.WrapError(err, "Status")
return
}
case "did":
z.DeplID, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "DeplID")
return
}
case "bkts":
var zb0002 uint32
zb0002, bts, err = msgp.ReadMapHeaderBytes(bts)
if err != nil {
err = msgp.WrapError(err, "BucketStatuses")
return
}
if z.BucketStatuses == nil {
z.BucketStatuses = make(map[string]ResyncStatusType, zb0002)
} else if len(z.BucketStatuses) > 0 {
for key := range z.BucketStatuses {
delete(z.BucketStatuses, key)
}
}
for zb0002 > 0 {
var za0001 string
var za0002 ResyncStatusType
zb0002--
za0001, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "BucketStatuses")
return
}
bts, err = za0002.UnmarshalMsg(bts)
if err != nil {
err = msgp.WrapError(err, "BucketStatuses", za0001)
return
}
z.BucketStatuses[za0001] = za0002
}
case "tb":
z.TotBuckets, bts, err = msgp.ReadIntBytes(bts)
if err != nil {
err = msgp.WrapError(err, "TotBuckets")
return
}
case "cst":
bts, err = z.TargetReplicationResyncStatus.UnmarshalMsg(bts)
if err != nil {
err = msgp.WrapError(err, "TargetReplicationResyncStatus")
return
}
default:
bts, err = msgp.Skip(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
}
}
o = bts
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *SiteResyncStatus) Msgsize() (s int) {
s = 1 + 2 + msgp.IntSize + 3 + z.Status.Msgsize() + 4 + msgp.StringPrefixSize + len(z.DeplID) + 5 + msgp.MapHeaderSize
if z.BucketStatuses != nil {
for za0001, za0002 := range z.BucketStatuses {
_ = za0002
s += msgp.StringPrefixSize + len(za0001) + za0002.Msgsize()
}
}
s += 3 + msgp.IntSize + 4 + z.TargetReplicationResyncStatus.Msgsize()
return
}

View File

@ -0,0 +1,123 @@
package cmd
// Code generated by github.com/tinylib/msgp DO NOT EDIT.
import (
"bytes"
"testing"
"github.com/tinylib/msgp/msgp"
)
func TestMarshalUnmarshalSiteResyncStatus(t *testing.T) {
v := SiteResyncStatus{}
bts, err := v.MarshalMsg(nil)
if err != nil {
t.Fatal(err)
}
left, err := v.UnmarshalMsg(bts)
if err != nil {
t.Fatal(err)
}
if len(left) > 0 {
t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left)
}
left, err = msgp.Skip(bts)
if err != nil {
t.Fatal(err)
}
if len(left) > 0 {
t.Errorf("%d bytes left over after Skip(): %q", len(left), left)
}
}
func BenchmarkMarshalMsgSiteResyncStatus(b *testing.B) {
v := SiteResyncStatus{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.MarshalMsg(nil)
}
}
func BenchmarkAppendMsgSiteResyncStatus(b *testing.B) {
v := SiteResyncStatus{}
bts := make([]byte, 0, v.Msgsize())
bts, _ = v.MarshalMsg(bts[0:0])
b.SetBytes(int64(len(bts)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
bts, _ = v.MarshalMsg(bts[0:0])
}
}
func BenchmarkUnmarshalSiteResyncStatus(b *testing.B) {
v := SiteResyncStatus{}
bts, _ := v.MarshalMsg(nil)
b.ReportAllocs()
b.SetBytes(int64(len(bts)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := v.UnmarshalMsg(bts)
if err != nil {
b.Fatal(err)
}
}
}
func TestEncodeDecodeSiteResyncStatus(t *testing.T) {
v := SiteResyncStatus{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
m := v.Msgsize()
if buf.Len() > m {
t.Log("WARNING: TestEncodeDecodeSiteResyncStatus Msgsize() is inaccurate")
}
vn := SiteResyncStatus{}
err := msgp.Decode(&buf, &vn)
if err != nil {
t.Error(err)
}
buf.Reset()
msgp.Encode(&buf, &v)
err = msgp.NewReader(&buf).Skip()
if err != nil {
t.Error(err)
}
}
func BenchmarkEncodeSiteResyncStatus(b *testing.B) {
v := SiteResyncStatus{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
b.SetBytes(int64(buf.Len()))
en := msgp.NewWriter(msgp.Nowhere)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.EncodeMsg(en)
}
en.Flush()
}
func BenchmarkDecodeSiteResyncStatus(b *testing.B) {
v := SiteResyncStatus{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
b.SetBytes(int64(buf.Len()))
rd := msgp.NewEndlessReader(buf.Bytes(), b)
dc := msgp.NewReader(rd)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := v.DecodeMsg(dc)
if err != nil {
b.Fatal(err)
}
}
}

View File

@ -1,4 +1,4 @@
// Copyright (c) 2015-2021 MinIO, Inc. // Copyright (c) 2015-2022 MinIO, Inc.
// //
// This file is part of MinIO Object Storage stack // This file is part of MinIO Object Storage stack
// //
@ -21,6 +21,7 @@ import (
"bytes" "bytes"
"context" "context"
"encoding/base64" "encoding/base64"
"encoding/binary"
"encoding/json" "encoding/json"
"encoding/xml" "encoding/xml"
"errors" "errors"
@ -76,6 +77,22 @@ var (
Cause: errors.New("site replication is not enabled"), Cause: errors.New("site replication is not enabled"),
Code: ErrSiteReplicationInvalidRequest, Code: ErrSiteReplicationInvalidRequest,
} }
errSRResyncStarted = SRError{
Cause: errors.New("site replication resync is already in progress"),
Code: ErrSiteReplicationInvalidRequest,
}
errSRResyncCanceled = SRError{
Cause: errors.New("site replication resync is already canceled"),
Code: ErrSiteReplicationInvalidRequest,
}
errSRNoResync = SRError{
Cause: errors.New("no resync in progress"),
Code: ErrSiteReplicationInvalidRequest,
}
errSRResyncToSelf = SRError{
Cause: errors.New("invalid peer specified - cannot resync to self"),
Code: ErrSiteReplicationInvalidRequest,
}
) )
func errSRInvalidRequest(err error) SRError { func errSRInvalidRequest(err error) SRError {
@ -4858,3 +4875,305 @@ func (c *SiteReplicationSys) getPeerForUpload(deplID string) (pi srPeerInfo, loc
} }
return pi, true return pi, true
} }
// startResync initiates resync of data to peerSite specified. The overall site resync status
// is maintained in .minio.sys/buckets/site-replication/resync/<deployment-id.meta>, while collecting
// individual bucket resync status in .minio.sys/buckets/<bucket-name>/replication/resync.bin
func (c *SiteReplicationSys) startResync(ctx context.Context, objAPI ObjectLayer, peer madmin.PeerInfo) (res madmin.SRResyncOpStatus, err error) {
if !c.isEnabled() {
return res, errSRNotEnabled
}
if objAPI == nil {
return res, errSRObjectLayerNotReady
}
if peer.DeploymentID == globalDeploymentID {
return res, errSRResyncToSelf
}
if _, ok := c.state.Peers[peer.DeploymentID]; !ok {
return res, errSRPeerNotFound
}
rs, err := globalSiteResyncMetrics.siteStatus(ctx, objAPI, peer.DeploymentID)
if err != nil {
return res, err
}
if rs.Status == ResyncStarted {
return res, errSRResyncStarted
}
var buckets []BucketInfo
buckets, err = objAPI.ListBuckets(ctx, BucketOptions{})
if err != nil {
return res, err
}
rs = newSiteResyncStatus(peer.DeploymentID, buckets)
defer func() {
if err != nil {
rs.Status = ResyncFailed
saveSiteResyncMetadata(ctx, rs, objAPI)
globalSiteResyncMetrics.updateState(rs)
}
}()
globalSiteResyncMetrics.updateState(rs)
if err := saveSiteResyncMetadata(ctx, rs, objAPI); err != nil {
return res, err
}
for _, bi := range buckets {
bucket := bi.Name
if _, err := getReplicationConfig(ctx, bucket); err != nil {
res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{
ErrDetail: err.Error(),
Bucket: bucket,
Status: ResyncFailed.String(),
})
continue
}
// mark remote target for this deployment with the new reset id
tgtArn := globalBucketTargetSys.getRemoteARNForPeer(bucket, peer)
if tgtArn == "" {
res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{
ErrDetail: fmt.Sprintf("no valid remote target found for this peer %s (%s)", peer.Name, peer.DeploymentID),
Bucket: bucket,
})
continue
}
target := globalBucketTargetSys.GetRemoteBucketTargetByArn(ctx, bucket, tgtArn)
target.ResetBeforeDate = UTCNow()
target.ResetID = rs.ResyncID
if err = globalBucketTargetSys.SetTarget(ctx, bucket, &target, true); err != nil {
res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{
ErrDetail: err.Error(),
Bucket: bucket,
})
continue
}
targets, err := globalBucketTargetSys.ListBucketTargets(ctx, bucket)
if err != nil {
res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{
ErrDetail: err.Error(),
Bucket: bucket,
})
continue
}
tgtBytes, err := json.Marshal(&targets)
if err != nil {
res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{
ErrDetail: err.Error(),
Bucket: bucket,
})
continue
}
if _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketTargetsFile, tgtBytes); err != nil {
res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{
ErrDetail: err.Error(),
Bucket: bucket,
})
continue
}
if err := globalReplicationPool.resyncer.start(ctx, objAPI, resyncOpts{
bucket: bucket,
arn: tgtArn,
resyncID: rs.ResyncID,
}); err != nil {
res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{
ErrDetail: err.Error(),
Bucket: bucket,
})
continue
}
}
res = madmin.SRResyncOpStatus{
Status: ResyncStarted.String(),
OpType: "start",
ResyncID: rs.ResyncID,
}
if len(res.Buckets) > 0 {
res.ErrDetail = "partial failure in starting site resync"
}
return res, nil
}
// cancelResync stops an ongoing site level resync for the peer specified.
func (c *SiteReplicationSys) cancelResync(ctx context.Context, objAPI ObjectLayer, peer madmin.PeerInfo) (res madmin.SRResyncOpStatus, err error) {
if !c.isEnabled() {
return res, errSRNotEnabled
}
if objAPI == nil {
return res, errSRObjectLayerNotReady
}
if peer.DeploymentID == globalDeploymentID {
return res, errSRResyncToSelf
}
if _, ok := c.state.Peers[peer.DeploymentID]; !ok {
return res, errSRPeerNotFound
}
rs, err := globalSiteResyncMetrics.siteStatus(ctx, objAPI, peer.DeploymentID)
if err != nil {
return res, err
}
switch rs.Status {
case ResyncCanceled:
return res, errSRResyncCanceled
case ResyncCompleted, NoResync:
return res, errSRNoResync
}
res = madmin.SRResyncOpStatus{
Status: rs.Status.String(),
OpType: "cancel",
ResyncID: rs.ResyncID,
}
switch rs.Status {
case ResyncCanceled:
return res, errSRResyncCanceled
case ResyncCompleted, NoResync:
return res, errSRNoResync
}
targets := globalBucketTargetSys.ListTargets(ctx, "", string(madmin.ReplicationService))
// clear the remote target resetID set while initiating resync to stop replication
for _, t := range targets {
if t.ResetID == rs.ResyncID {
// get tgt with credentials
tgt := globalBucketTargetSys.GetRemoteBucketTargetByArn(ctx, t.SourceBucket, t.Arn)
tgt.ResetID = ""
bucket := t.SourceBucket
if err = globalBucketTargetSys.SetTarget(ctx, bucket, &tgt, true); err != nil {
res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{
ErrDetail: err.Error(),
Bucket: bucket,
})
continue
}
targets, err := globalBucketTargetSys.ListBucketTargets(ctx, bucket)
if err != nil {
res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{
ErrDetail: err.Error(),
Bucket: bucket,
})
continue
}
tgtBytes, err := json.Marshal(&targets)
if err != nil {
res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{
ErrDetail: err.Error(),
Bucket: bucket,
})
continue
}
if _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketTargetsFile, tgtBytes); err != nil {
res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{
ErrDetail: err.Error(),
Bucket: bucket,
})
continue
}
// update resync state for the bucket
globalReplicationPool.resyncer.Lock()
m, ok := globalReplicationPool.resyncer.statusMap[bucket]
if !ok {
m = newBucketResyncStatus(bucket)
}
if st, ok := m.TargetsMap[t.Arn]; ok {
st.LastUpdate = UTCNow()
st.ResyncStatus = ResyncCanceled
m.TargetsMap[t.Arn] = st
m.LastUpdate = UTCNow()
}
globalReplicationPool.resyncer.statusMap[bucket] = m
globalReplicationPool.resyncer.Unlock()
}
}
rs.Status = ResyncCanceled
rs.LastUpdate = UTCNow()
if err := saveSiteResyncMetadata(ctx, rs, objAPI); err != nil {
return res, err
}
globalSiteResyncMetrics.updateState(rs)
res.Status = rs.Status.String()
return res, nil
}
const (
siteResyncMetaFormat = 1
siteResyncMetaVersionV1 = 1
siteResyncMetaVersion = siteResyncMetaVersionV1
siteResyncSaveInterval = 10 * time.Second
)
func newSiteResyncStatus(dID string, buckets []BucketInfo) SiteResyncStatus {
now := UTCNow()
s := SiteResyncStatus{
Version: siteResyncMetaVersion,
Status: ResyncStarted,
DeplID: dID,
TotBuckets: len(buckets),
BucketStatuses: make(map[string]ResyncStatusType),
}
for _, bi := range buckets {
s.BucketStatuses[bi.Name] = ResyncPending
}
s.ResyncID = mustGetUUID()
s.StartTime = now
s.LastUpdate = now
return s
}
// load site resync metadata from disk
func loadSiteResyncMetadata(ctx context.Context, objAPI ObjectLayer, dID string) (rs SiteResyncStatus, e error) {
data, err := readConfig(GlobalContext, objAPI, getSRResyncFilePath(dID))
if err != nil {
return rs, err
}
if len(data) == 0 {
// Seems to be empty.
return rs, nil
}
if len(data) <= 4 {
return rs, fmt.Errorf("site resync: no data")
}
// Read resync meta header
switch binary.LittleEndian.Uint16(data[0:2]) {
case siteResyncMetaFormat:
default:
return rs, fmt.Errorf("resyncMeta: unknown format: %d", binary.LittleEndian.Uint16(data[0:2]))
}
switch binary.LittleEndian.Uint16(data[2:4]) {
case siteResyncMetaVersion:
default:
return rs, fmt.Errorf("resyncMeta: unknown version: %d", binary.LittleEndian.Uint16(data[2:4]))
}
// OK, parse data.
if _, err = rs.UnmarshalMsg(data[4:]); err != nil {
return rs, err
}
switch rs.Version {
case siteResyncMetaVersionV1:
default:
return rs, fmt.Errorf("unexpected resync meta version: %d", rs.Version)
}
return rs, nil
}
// save resync status of peer to resync/depl-id.meta
func saveSiteResyncMetadata(ctx context.Context, ss SiteResyncStatus, objectAPI ObjectLayer) error {
data := make([]byte, 4, ss.Msgsize()+4)
// Initialize the resync meta header.
binary.LittleEndian.PutUint16(data[0:2], siteResyncMetaFormat)
binary.LittleEndian.PutUint16(data[2:4], siteResyncMetaVersion)
buf, err := ss.MarshalMsg(data)
if err != nil {
return err
}
return saveConfig(ctx, objectAPI, getSRResyncFilePath(ss.DeplID), buf)
}
func getSRResyncFilePath(dID string) string {
return pathJoin(siteResyncPrefix, dID+".meta")
}