mirror of
https://github.com/minio/minio.git
synced 2025-04-04 03:40:30 -04:00
Ensure that only one node performs site replication healing (#15584)
When a node finds a change in the other replication cluster and applies to itself will already notify other peers. No need for all nodes in a given cluster to do site replication healing, only one node is sufficient.
This commit is contained in:
parent
97a6322de1
commit
b737c83a66
@ -25,6 +25,7 @@ import (
|
|||||||
"encoding/xml"
|
"encoding/xml"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
"net/url"
|
"net/url"
|
||||||
"reflect"
|
"reflect"
|
||||||
"sort"
|
"sort"
|
||||||
@ -3501,7 +3502,37 @@ func (c *SiteReplicationSys) PeerEditReq(ctx context.Context, arg madmin.PeerInf
|
|||||||
|
|
||||||
const siteHealTimeInterval = 10 * time.Second
|
const siteHealTimeInterval = 10 * time.Second
|
||||||
|
|
||||||
|
var siteReplicationHealLockTimeout = newDynamicTimeoutWithOpts(dynamicTimeoutOpts{
|
||||||
|
timeout: 30 * time.Second,
|
||||||
|
minimum: 10 * time.Second,
|
||||||
|
retryInterval: time.Second,
|
||||||
|
})
|
||||||
|
|
||||||
func (c *SiteReplicationSys) startHealRoutine(ctx context.Context, objAPI ObjectLayer) {
|
func (c *SiteReplicationSys) startHealRoutine(ctx context.Context, objAPI ObjectLayer) {
|
||||||
|
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||||
|
// Run the site replication healing in a loop
|
||||||
|
for {
|
||||||
|
c.healRoutine(ctx, objAPI)
|
||||||
|
duration := time.Duration(r.Float64() * float64(time.Minute))
|
||||||
|
if duration < time.Second {
|
||||||
|
// Make sure to sleep atleast a second to avoid high CPU ticks.
|
||||||
|
duration = time.Second
|
||||||
|
}
|
||||||
|
time.Sleep(duration)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *SiteReplicationSys) healRoutine(ctx context.Context, objAPI ObjectLayer) {
|
||||||
|
// Make sure only one node running site replication on the cluster.
|
||||||
|
locker := objAPI.NewNSLock(minioMetaBucket, "site-replication/heal.lock")
|
||||||
|
lkctx, err := locker.GetLock(ctx, siteReplicationHealLockTimeout)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ctx = lkctx.Context()
|
||||||
|
defer lkctx.Cancel()
|
||||||
|
// No unlock for "leader" lock.
|
||||||
|
|
||||||
healTimer := time.NewTimer(siteHealTimeInterval)
|
healTimer := time.NewTimer(siteHealTimeInterval)
|
||||||
defer healTimer.Stop()
|
defer healTimer.Stop()
|
||||||
|
|
||||||
|
@ -335,7 +335,7 @@ kill -9 ${site1_pid}
|
|||||||
./mc rb minio2/bucket2
|
./mc rb minio2/bucket2
|
||||||
# Restart minio1 instance
|
# Restart minio1 instance
|
||||||
minio server --config-dir /tmp/minio-internal --address ":9001" /tmp/minio-internal-idp1/{1...4} >/tmp/minio1_1.log 2>&1 &
|
minio server --config-dir /tmp/minio-internal --address ":9001" /tmp/minio-internal-idp1/{1...4} >/tmp/minio1_1.log 2>&1 &
|
||||||
sleep 30
|
sleep 40
|
||||||
|
|
||||||
# Test whether most recent tag update on minio2 is replicated to minio1
|
# Test whether most recent tag update on minio2 is replicated to minio1
|
||||||
val=$(./mc tag list minio1/newbucket --json | jq -r .tagset | jq -r .key )
|
val=$(./mc tag list minio1/newbucket --json | jq -r .tagset | jq -r .key )
|
||||||
|
Loading…
x
Reference in New Issue
Block a user