feat: support perf site replication (#17477)

This commit is contained in:
jiuker
2023-07-06 13:28:26 +08:00
committed by GitHub
parent 6efcf9c982
commit 2dbb1cff4a
8 changed files with 266 additions and 9 deletions

View File

@@ -24,6 +24,7 @@ import (
"io"
"math/rand"
"net/http"
"net/url"
"sync"
"sync/atomic"
"time"
@@ -31,6 +32,7 @@ import (
"github.com/dustin/go-humanize"
"github.com/minio/madmin-go/v3"
"github.com/minio/minio-go/v7"
xhttp "github.com/minio/minio/internal/http"
"github.com/minio/pkg/randreader"
)
@@ -240,7 +242,7 @@ func (n *netPerfRX) Connect() {
n.Lock()
defer n.Unlock()
n.activeConnections++
atomic.StoreUint64(&globalNetPerfRX.RX, 0)
atomic.StoreUint64(&n.RX, 0)
n.lastToConnect = time.Now()
}
@@ -338,3 +340,85 @@ func netperf(ctx context.Context, duration time.Duration) madmin.NetperfNodeResu
globalNetPerfRX.Reset()
return madmin.NetperfNodeResult{Endpoint: "", TX: r.n / uint64(duration.Seconds()), RX: uint64(rx / delta.Seconds()), Error: errStr}
}
func siteNetperf(ctx context.Context, duration time.Duration) madmin.SiteNetPerfNodeResult {
r := &netperfReader{eof: make(chan struct{})}
r.buf = make([]byte, 128*humanize.KiByte)
rand.Read(r.buf)
clusterInfos, err := globalSiteReplicationSys.GetClusterInfo(ctx)
if err != nil {
return madmin.SiteNetPerfNodeResult{Error: err.Error()}
}
// Scale the number of connections from 32 -> 4 from small to large clusters.
connectionsPerPeer := 3 + (29+len(clusterInfos.Sites)-1)/len(clusterInfos.Sites)
errStr := ""
var wg sync.WaitGroup
for _, info := range clusterInfos.Sites {
// skip self
if globalDeploymentID == info.DeploymentID {
continue
}
info := info
wg.Add(connectionsPerPeer)
for i := 0; i < connectionsPerPeer; i++ {
go func() {
defer wg.Done()
cli, err := globalSiteReplicationSys.getAdminClient(ctx, info.DeploymentID)
if err != nil {
return
}
rp := cli.GetEndpointURL()
reqURL := &url.URL{
Scheme: rp.Scheme,
Host: rp.Host,
Path: adminPathPrefix + adminAPIVersionPrefix + adminAPISiteReplicationDevNull,
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, reqURL.String(), r)
if err != nil {
return
}
client := &http.Client{
Timeout: duration + 10*time.Second,
Transport: globalRemoteTargetTransport,
}
resp, err := client.Do(req)
if err != nil {
return
}
defer xhttp.DrainBody(resp.Body)
}()
}
}
time.Sleep(duration)
close(r.eof)
wg.Wait()
for {
if globalSiteNetPerfRX.ActiveConnections() == 0 || contextCanceled(ctx) {
break
}
time.Sleep(time.Second)
}
rx := float64(globalSiteNetPerfRX.RXSample)
delta := globalSiteNetPerfRX.firstToDisconnect.Sub(globalSiteNetPerfRX.lastToConnect)
// If the first disconnected before the last connected, we likely had a network issue.
if delta <= 0 {
rx = 0
errStr = "detected network disconnections, possibly an unstable network"
}
globalSiteNetPerfRX.Reset()
return madmin.SiteNetPerfNodeResult{
Endpoint: "",
TX: r.n,
TXTotalDuration: duration,
RX: uint64(rx),
RXTotalDuration: delta,
Error: errStr,
TotalConn: uint64(connectionsPerPeer),
}
}