mirror of
https://github.com/juanfont/headscale.git
synced 2025-11-09 21:49:39 -05:00
integration: rework retry for waiting for node sync
Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
committed by
Kristoffer Dalby
parent
9d236571f4
commit
3b16b75fe6
@@ -31,6 +31,7 @@ import (
|
||||
"tailscale.com/paths"
|
||||
"tailscale.com/types/key"
|
||||
"tailscale.com/types/netmap"
|
||||
"tailscale.com/util/multierr"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -529,7 +530,7 @@ func (t *TailscaleInContainer) Logout() error {
|
||||
return fmt.Errorf("failed to logout, stdout: %s, stderr: %s", stdout, stderr)
|
||||
}
|
||||
|
||||
return t.waitForBackendState("NeedsLogin")
|
||||
return t.waitForBackendState("NeedsLogin", integrationutil.PeerSyncTimeout())
|
||||
}
|
||||
|
||||
// Helper that runs `tailscale up` with no arguments.
|
||||
@@ -904,75 +905,115 @@ func (t *TailscaleInContainer) FailingPeersAsString() (string, bool, error) {
|
||||
|
||||
// WaitForNeedsLogin blocks until the Tailscale (tailscaled) instance has
|
||||
// started and needs to be logged into.
|
||||
func (t *TailscaleInContainer) WaitForNeedsLogin() error {
|
||||
return t.waitForBackendState("NeedsLogin")
|
||||
func (t *TailscaleInContainer) WaitForNeedsLogin(timeout time.Duration) error {
|
||||
return t.waitForBackendState("NeedsLogin", timeout)
|
||||
}
|
||||
|
||||
// WaitForRunning blocks until the Tailscale (tailscaled) instance is logged in
|
||||
// and ready to be used.
|
||||
func (t *TailscaleInContainer) WaitForRunning() error {
|
||||
return t.waitForBackendState("Running")
|
||||
func (t *TailscaleInContainer) WaitForRunning(timeout time.Duration) error {
|
||||
return t.waitForBackendState("Running", timeout)
|
||||
}
|
||||
|
||||
func (t *TailscaleInContainer) waitForBackendState(state string) error {
|
||||
return t.pool.Retry(func() error {
|
||||
status, err := t.Status()
|
||||
if err != nil {
|
||||
return errTailscaleStatus(t.hostname, err)
|
||||
}
|
||||
func (t *TailscaleInContainer) waitForBackendState(state string, timeout time.Duration) error {
|
||||
ticker := time.NewTicker(integrationutil.PeerSyncRetryInterval())
|
||||
defer ticker.Stop()
|
||||
|
||||
// ipnstate.Status.CurrentTailnet was added in Tailscale 1.22.0
|
||||
// https://github.com/tailscale/tailscale/pull/3865
|
||||
//
|
||||
// Before that, we can check the BackendState to see if the
|
||||
// tailscaled daemon is connected to the control system.
|
||||
if status.BackendState == state {
|
||||
return nil
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
|
||||
return errTailscaleNotConnected
|
||||
})
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return fmt.Errorf("timeout waiting for backend state %s on %s after %v", state, t.hostname, timeout)
|
||||
case <-ticker.C:
|
||||
status, err := t.Status()
|
||||
if err != nil {
|
||||
continue // Keep retrying on status errors
|
||||
}
|
||||
|
||||
// ipnstate.Status.CurrentTailnet was added in Tailscale 1.22.0
|
||||
// https://github.com/tailscale/tailscale/pull/3865
|
||||
//
|
||||
// Before that, we can check the BackendState to see if the
|
||||
// tailscaled daemon is connected to the control system.
|
||||
if status.BackendState == state {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WaitForPeers blocks until N number of peers is present in the
|
||||
// Peer list of the Tailscale instance and is reporting Online.
|
||||
func (t *TailscaleInContainer) WaitForPeers(expected int) error {
|
||||
return t.pool.Retry(func() error {
|
||||
status, err := t.Status()
|
||||
if err != nil {
|
||||
return errTailscaleStatus(t.hostname, err)
|
||||
}
|
||||
//
|
||||
// The method verifies that each peer:
|
||||
// - Has the expected peer count
|
||||
// - All peers are Online
|
||||
// - All peers have a hostname
|
||||
// - All peers have a DERP relay assigned
|
||||
//
|
||||
// Uses multierr to collect all validation errors.
|
||||
func (t *TailscaleInContainer) WaitForPeers(expected int, timeout, retryInterval time.Duration) error {
|
||||
ticker := time.NewTicker(retryInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
|
||||
var lastErrs []error
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
if len(lastErrs) > 0 {
|
||||
return fmt.Errorf("timeout waiting for %d peers on %s after %v, errors: %w", expected, t.hostname, timeout, multierr.New(lastErrs...))
|
||||
}
|
||||
return fmt.Errorf("timeout waiting for %d peers on %s after %v", expected, t.hostname, timeout)
|
||||
case <-ticker.C:
|
||||
status, err := t.Status()
|
||||
if err != nil {
|
||||
lastErrs = []error{errTailscaleStatus(t.hostname, err)}
|
||||
continue // Keep retrying on status errors
|
||||
}
|
||||
|
||||
if peers := status.Peers(); len(peers) != expected {
|
||||
lastErrs = []error{fmt.Errorf(
|
||||
"%s err: %w expected %d, got %d",
|
||||
t.hostname,
|
||||
errTailscaleWrongPeerCount,
|
||||
expected,
|
||||
len(peers),
|
||||
)}
|
||||
continue
|
||||
}
|
||||
|
||||
if peers := status.Peers(); len(peers) != expected {
|
||||
return fmt.Errorf(
|
||||
"%s err: %w expected %d, got %d",
|
||||
t.hostname,
|
||||
errTailscaleWrongPeerCount,
|
||||
expected,
|
||||
len(peers),
|
||||
)
|
||||
} else {
|
||||
// Verify that the peers of a given node is Online
|
||||
// has a hostname and a DERP relay.
|
||||
for _, peerKey := range peers {
|
||||
var peerErrors []error
|
||||
for _, peerKey := range status.Peers() {
|
||||
peer := status.Peer[peerKey]
|
||||
|
||||
if !peer.Online {
|
||||
return fmt.Errorf("[%s] peer count correct, but %s is not online", t.hostname, peer.HostName)
|
||||
peerErrors = append(peerErrors, fmt.Errorf("[%s] peer count correct, but %s is not online", t.hostname, peer.HostName))
|
||||
}
|
||||
|
||||
if peer.HostName == "" {
|
||||
return fmt.Errorf("[%s] peer count correct, but %s does not have a Hostname", t.hostname, peer.HostName)
|
||||
peerErrors = append(peerErrors, fmt.Errorf("[%s] peer count correct, but %s does not have a Hostname", t.hostname, peer.HostName))
|
||||
}
|
||||
|
||||
if peer.Relay == "" {
|
||||
return fmt.Errorf("[%s] peer count correct, but %s does not have a DERP", t.hostname, peer.HostName)
|
||||
peerErrors = append(peerErrors, fmt.Errorf("[%s] peer count correct, but %s does not have a DERP", t.hostname, peer.HostName))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
if len(peerErrors) > 0 {
|
||||
lastErrs = peerErrors
|
||||
continue
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type (
|
||||
|
||||
Reference in New Issue
Block a user