Ensure that only one node performs site replication healing (#15584)

When a node finds a change in the other replication cluster and applies
to itself will already notify other peers. No need for all nodes in a
given cluster to do site replication healing, only one node is
sufficient.
This commit is contained in:
Anis Elleuch 2022-08-24 21:46:09 +01:00 committed by GitHub
parent 97a6322de1
commit b737c83a66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 32 additions and 1 deletions

View File

@ -25,6 +25,7 @@ import (
"encoding/xml"
"errors"
"fmt"
"math/rand"
"net/url"
"reflect"
"sort"
@ -3501,7 +3502,37 @@ func (c *SiteReplicationSys) PeerEditReq(ctx context.Context, arg madmin.PeerInf
const siteHealTimeInterval = 10 * time.Second
var siteReplicationHealLockTimeout = newDynamicTimeoutWithOpts(dynamicTimeoutOpts{
timeout: 30 * time.Second,
minimum: 10 * time.Second,
retryInterval: time.Second,
})
func (c *SiteReplicationSys) startHealRoutine(ctx context.Context, objAPI ObjectLayer) {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
// Run the site replication healing in a loop
for {
c.healRoutine(ctx, objAPI)
duration := time.Duration(r.Float64() * float64(time.Minute))
if duration < time.Second {
// Make sure to sleep atleast a second to avoid high CPU ticks.
duration = time.Second
}
time.Sleep(duration)
}
}
func (c *SiteReplicationSys) healRoutine(ctx context.Context, objAPI ObjectLayer) {
// Make sure only one node running site replication on the cluster.
locker := objAPI.NewNSLock(minioMetaBucket, "site-replication/heal.lock")
lkctx, err := locker.GetLock(ctx, siteReplicationHealLockTimeout)
if err != nil {
return
}
ctx = lkctx.Context()
defer lkctx.Cancel()
// No unlock for "leader" lock.
healTimer := time.NewTimer(siteHealTimeInterval)
defer healTimer.Stop()

View File

@ -335,7 +335,7 @@ kill -9 ${site1_pid}
./mc rb minio2/bucket2
# Restart minio1 instance
minio server --config-dir /tmp/minio-internal --address ":9001" /tmp/minio-internal-idp1/{1...4} >/tmp/minio1_1.log 2>&1 &
sleep 30
sleep 40
# Test whether most recent tag update on minio2 is replicated to minio1
val=$(./mc tag list minio1/newbucket --json | jq -r .tagset | jq -r .key )