diff --git a/cmd/site-replication.go b/cmd/site-replication.go index ab82d65c2..1066e3960 100644 --- a/cmd/site-replication.go +++ b/cmd/site-replication.go @@ -25,6 +25,7 @@ import ( "encoding/xml" "errors" "fmt" + "math/rand" "net/url" "reflect" "sort" @@ -3501,7 +3502,37 @@ func (c *SiteReplicationSys) PeerEditReq(ctx context.Context, arg madmin.PeerInf const siteHealTimeInterval = 10 * time.Second +var siteReplicationHealLockTimeout = newDynamicTimeoutWithOpts(dynamicTimeoutOpts{ + timeout: 30 * time.Second, + minimum: 10 * time.Second, + retryInterval: time.Second, +}) + func (c *SiteReplicationSys) startHealRoutine(ctx context.Context, objAPI ObjectLayer) { + r := rand.New(rand.NewSource(time.Now().UnixNano())) + // Run the site replication healing in a loop + for { + c.healRoutine(ctx, objAPI) + duration := time.Duration(r.Float64() * float64(time.Minute)) + if duration < time.Second { + // Make sure to sleep atleast a second to avoid high CPU ticks. + duration = time.Second + } + time.Sleep(duration) + } +} + +func (c *SiteReplicationSys) healRoutine(ctx context.Context, objAPI ObjectLayer) { + // Make sure only one node running site replication on the cluster. + locker := objAPI.NewNSLock(minioMetaBucket, "site-replication/heal.lock") + lkctx, err := locker.GetLock(ctx, siteReplicationHealLockTimeout) + if err != nil { + return + } + ctx = lkctx.Context() + defer lkctx.Cancel() + // No unlock for "leader" lock. + healTimer := time.NewTimer(siteHealTimeInterval) defer healTimer.Stop() diff --git a/docs/site-replication/run-multi-site-minio-idp.sh b/docs/site-replication/run-multi-site-minio-idp.sh index 9581a7b18..aca0dcc29 100755 --- a/docs/site-replication/run-multi-site-minio-idp.sh +++ b/docs/site-replication/run-multi-site-minio-idp.sh @@ -335,7 +335,7 @@ kill -9 ${site1_pid} ./mc rb minio2/bucket2 # Restart minio1 instance minio server --config-dir /tmp/minio-internal --address ":9001" /tmp/minio-internal-idp1/{1...4} >/tmp/minio1_1.log 2>&1 & -sleep 30 +sleep 40 # Test whether most recent tag update on minio2 is replicated to minio1 val=$(./mc tag list minio1/newbucket --json | jq -r .tagset | jq -r .key )