diff --git a/flake.nix b/flake.nix index 227d69c0..17a99b56 100644 --- a/flake.nix +++ b/flake.nix @@ -143,7 +143,6 @@ yq-go ripgrep postgresql - traceroute # 'dot' is needed for pprof graphs # go tool pprof -http=: @@ -160,7 +159,8 @@ # Add hi to make it even easier to use ci runner. hi - ]; + ] + ++ lib.optional pkgs.stdenv.isLinux [traceroute]; # Add entry to build a docker image with headscale # caveat: only works on Linux diff --git a/integration/auth_key_test.go b/integration/auth_key_test.go index ac69a6f5..1352a02b 100644 --- a/integration/auth_key_test.go +++ b/integration/auth_key_test.go @@ -84,8 +84,12 @@ func TestAuthKeyLogoutAndReloginSameUser(t *testing.T) { t.Logf("all clients logged out") - listNodes, err = headscale.ListNodes() - require.Equal(t, nodeCountBeforeLogout, len(listNodes)) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + var err error + listNodes, err = headscale.ListNodes() + assert.NoError(ct, err) + assert.Equal(ct, nodeCountBeforeLogout, len(listNodes), "Node count should match before logout count") + }, 20*time.Second, 1*time.Second) for _, node := range listNodes { assertLastSeenSet(t, node) @@ -115,8 +119,12 @@ func TestAuthKeyLogoutAndReloginSameUser(t *testing.T) { } } - listNodes, err = headscale.ListNodes() - require.Equal(t, nodeCountBeforeLogout, len(listNodes)) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + var err error + listNodes, err = headscale.ListNodes() + assert.NoError(ct, err) + assert.Equal(ct, nodeCountBeforeLogout, len(listNodes), "Node count should match after HTTPS reconnection") + }, 30*time.Second, 2*time.Second) for _, node := range listNodes { assertLastSeenSet(t, node) @@ -234,22 +242,29 @@ func TestAuthKeyLogoutAndReloginNewUser(t *testing.T) { } } - user1Nodes, err := headscale.ListNodes("user1") - assertNoErr(t, err) - assert.Len(t, user1Nodes, len(allClients)) + var user1Nodes []*v1.Node + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + var err error + user1Nodes, err = headscale.ListNodes("user1") + assert.NoError(ct, err) + assert.Len(ct, user1Nodes, len(allClients), "User1 should have all clients after re-login") + }, 20*time.Second, 1*time.Second) // Validate that all the old nodes are still present with user2 - user2Nodes, err := headscale.ListNodes("user2") - assertNoErr(t, err) - assert.Len(t, user2Nodes, len(allClients)/2) + var user2Nodes []*v1.Node + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + var err error + user2Nodes, err = headscale.ListNodes("user2") + assert.NoError(ct, err) + assert.Len(ct, user2Nodes, len(allClients)/2, "User2 should have half the clients") + }, 20*time.Second, 1*time.Second) for _, client := range allClients { - status, err := client.Status() - if err != nil { - t.Fatalf("failed to get status for client %s: %s", client.Hostname(), err) - } - - assert.Equal(t, "user1@test.no", status.User[status.Self.UserID].LoginName) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + status, err := client.Status() + assert.NoError(ct, err, "Failed to get status for client %s", client.Hostname()) + assert.Equal(ct, "user1@test.no", status.User[status.Self.UserID].LoginName, "Client %s should be logged in as user1", client.Hostname()) + }, 30*time.Second, 2*time.Second) } } diff --git a/integration/auth_web_flow_test.go b/integration/auth_web_flow_test.go index 83413e0d..56c05e62 100644 --- a/integration/auth_web_flow_test.go +++ b/integration/auth_web_flow_test.go @@ -4,11 +4,12 @@ import ( "net/netip" "slices" "testing" + "time" + v1 "github.com/juanfont/headscale/gen/go/headscale/v1" "github.com/juanfont/headscale/integration/hsic" "github.com/samber/lo" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) func TestAuthWebFlowAuthenticationPingAll(t *testing.T) { @@ -92,8 +93,13 @@ func TestAuthWebFlowLogoutAndRelogin(t *testing.T) { headscale, err := scenario.Headscale() assertNoErrGetHeadscale(t, err) - listNodes, err := headscale.ListNodes() - assert.Len(t, allClients, len(listNodes)) + var listNodes []*v1.Node + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + var err error + listNodes, err = headscale.ListNodes() + assert.NoError(ct, err) + assert.Len(ct, listNodes, len(allClients), "Node count should match client count after login") + }, 20*time.Second, 1*time.Second) nodeCountBeforeLogout := len(listNodes) t.Logf("node count before logout: %d", nodeCountBeforeLogout) @@ -137,8 +143,12 @@ func TestAuthWebFlowLogoutAndRelogin(t *testing.T) { success = pingAllHelper(t, allClients, allAddrs) t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps)) - listNodes, err = headscale.ListNodes() - require.Len(t, listNodes, nodeCountBeforeLogout) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + var err error + listNodes, err = headscale.ListNodes() + assert.NoError(ct, err) + assert.Len(ct, listNodes, nodeCountBeforeLogout, "Node count should match before logout count after re-login") + }, 20*time.Second, 1*time.Second) t.Logf("node count first login: %d, after relogin: %d", nodeCountBeforeLogout, len(listNodes)) for _, client := range allClients { diff --git a/integration/cli_test.go b/integration/cli_test.go index fd9c49a7..7f4f9936 100644 --- a/integration/cli_test.go +++ b/integration/cli_test.go @@ -64,26 +64,30 @@ func TestUserCommand(t *testing.T) { assertNoErr(t, err) var listUsers []*v1.User - err = executeAndUnmarshal(headscale, - []string{ - "headscale", - "users", - "list", - "--output", - "json", - }, - &listUsers, - ) - assertNoErr(t, err) + var result []string + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + err := executeAndUnmarshal(headscale, + []string{ + "headscale", + "users", + "list", + "--output", + "json", + }, + &listUsers, + ) + assert.NoError(ct, err) - slices.SortFunc(listUsers, sortWithID) - result := []string{listUsers[0].GetName(), listUsers[1].GetName()} + slices.SortFunc(listUsers, sortWithID) + result = []string{listUsers[0].GetName(), listUsers[1].GetName()} - assert.Equal( - t, - []string{"user1", "user2"}, - result, - ) + assert.Equal( + ct, + []string{"user1", "user2"}, + result, + "Should have user1 and user2 in users list", + ) + }, 20*time.Second, 1*time.Second) _, err = headscale.Execute( []string{ @@ -98,26 +102,29 @@ func TestUserCommand(t *testing.T) { assertNoErr(t, err) var listAfterRenameUsers []*v1.User - err = executeAndUnmarshal(headscale, - []string{ - "headscale", - "users", - "list", - "--output", - "json", - }, - &listAfterRenameUsers, - ) - assertNoErr(t, err) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + err := executeAndUnmarshal(headscale, + []string{ + "headscale", + "users", + "list", + "--output", + "json", + }, + &listAfterRenameUsers, + ) + assert.NoError(ct, err) - slices.SortFunc(listUsers, sortWithID) - result = []string{listAfterRenameUsers[0].GetName(), listAfterRenameUsers[1].GetName()} + slices.SortFunc(listAfterRenameUsers, sortWithID) + result = []string{listAfterRenameUsers[0].GetName(), listAfterRenameUsers[1].GetName()} - assert.Equal( - t, - []string{"user1", "newname"}, - result, - ) + assert.Equal( + ct, + []string{"user1", "newname"}, + result, + "Should have user1 and newname after rename operation", + ) + }, 20*time.Second, 1*time.Second) var listByUsername []*v1.User err = executeAndUnmarshal(headscale, @@ -187,30 +194,32 @@ func TestUserCommand(t *testing.T) { assert.Contains(t, deleteResult, "User destroyed") var listAfterIDDelete []*v1.User - err = executeAndUnmarshal(headscale, - []string{ - "headscale", - "users", - "list", - "--output", - "json", - }, - &listAfterIDDelete, - ) - assertNoErr(t, err) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + err := executeAndUnmarshal(headscale, + []string{ + "headscale", + "users", + "list", + "--output", + "json", + }, + &listAfterIDDelete, + ) + assert.NoError(ct, err) - slices.SortFunc(listAfterIDDelete, sortWithID) - want = []*v1.User{ - { - Id: 2, - Name: "newname", - Email: "user2@test.no", - }, - } + slices.SortFunc(listAfterIDDelete, sortWithID) + want := []*v1.User{ + { + Id: 2, + Name: "newname", + Email: "user2@test.no", + }, + } - if diff := tcmp.Diff(want, listAfterIDDelete, cmpopts.IgnoreUnexported(v1.User{}), cmpopts.IgnoreFields(v1.User{}, "CreatedAt")); diff != "" { - t.Errorf("unexpected users (-want +got):\n%s", diff) - } + if diff := tcmp.Diff(want, listAfterIDDelete, cmpopts.IgnoreUnexported(v1.User{}), cmpopts.IgnoreFields(v1.User{}, "CreatedAt")); diff != "" { + assert.Fail(ct, "unexpected users", "diff (-want +got):\n%s", diff) + } + }, 20*time.Second, 1*time.Second) deleteResult, err = headscale.Execute( []string{ @@ -569,10 +578,14 @@ func TestPreAuthKeyCorrectUserLoggedInCommand(t *testing.T) { ) assertNoErr(t, err) - listNodes, err := headscale.ListNodes() - require.NoError(t, err) - require.Len(t, listNodes, 1) - assert.Equal(t, user1, listNodes[0].GetUser().GetName()) + var listNodes []*v1.Node + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + var err error + listNodes, err = headscale.ListNodes() + assert.NoError(ct, err) + assert.Len(ct, listNodes, 1, "Should have exactly 1 node for user1") + assert.Equal(ct, user1, listNodes[0].GetUser().GetName(), "Node should belong to user1") + }, 15*time.Second, 1*time.Second) allClients, err := scenario.ListTailscaleClients() assertNoErrListClients(t, err) @@ -588,30 +601,31 @@ func TestPreAuthKeyCorrectUserLoggedInCommand(t *testing.T) { err = scenario.WaitForTailscaleLogout() assertNoErr(t, err) - status, err := client.Status() - assertNoErr(t, err) - if status.BackendState == "Starting" || status.BackendState == "Running" { - t.Fatalf("expected node to be logged out, backend state: %s", status.BackendState) - } + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + status, err := client.Status() + assert.NoError(ct, err) + assert.NotContains(ct, []string{"Starting", "Running"}, status.BackendState, + "Expected node to be logged out, backend state: %s", status.BackendState) + }, 30*time.Second, 2*time.Second) err = client.Login(headscale.GetEndpoint(), user2Key.GetKey()) assertNoErr(t, err) - status, err = client.Status() - assertNoErr(t, err) - if status.BackendState != "Running" { - t.Fatalf("expected node to be logged in, backend state: %s", status.BackendState) - } + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + status, err := client.Status() + assert.NoError(ct, err) + assert.Equal(ct, "Running", status.BackendState, "Expected node to be logged in, backend state: %s", status.BackendState) + assert.Equal(ct, "userid:2", status.Self.UserID.String(), "Expected node to be logged in as userid:2") + }, 30*time.Second, 2*time.Second) - if status.Self.UserID.String() != "userid:2" { - t.Fatalf("expected node to be logged in as userid:2, got: %s", status.Self.UserID.String()) - } - - listNodes, err = headscale.ListNodes() - require.NoError(t, err) - require.Len(t, listNodes, 2) - assert.Equal(t, user1, listNodes[0].GetUser().GetName()) - assert.Equal(t, user2, listNodes[1].GetUser().GetName()) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + var err error + listNodes, err = headscale.ListNodes() + assert.NoError(ct, err) + assert.Len(ct, listNodes, 2, "Should have 2 nodes after re-login") + assert.Equal(ct, user1, listNodes[0].GetUser().GetName(), "First node should belong to user1") + assert.Equal(ct, user2, listNodes[1].GetUser().GetName(), "Second node should belong to user2") + }, 20*time.Second, 1*time.Second) } func TestApiKeyCommand(t *testing.T) { @@ -844,7 +858,9 @@ func TestNodeTagCommand(t *testing.T) { nodes[index] = &node } - assert.Len(t, nodes, len(regIDs)) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + assert.Len(ct, nodes, len(regIDs), "Should have correct number of nodes after CLI operations") + }, 15*time.Second, 1*time.Second) var node v1.Node err = executeAndUnmarshal( @@ -1096,24 +1112,27 @@ func TestNodeCommand(t *testing.T) { nodes[index] = &node } - assert.Len(t, nodes, len(regIDs)) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + assert.Len(ct, nodes, len(regIDs), "Should have correct number of nodes after CLI operations") + }, 15*time.Second, 1*time.Second) // Test list all nodes after added seconds var listAll []v1.Node - err = executeAndUnmarshal( - headscale, - []string{ - "headscale", - "nodes", - "list", - "--output", - "json", - }, - &listAll, - ) - assert.NoError(t, err) - - assert.Len(t, listAll, 5) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + err := executeAndUnmarshal( + headscale, + []string{ + "headscale", + "nodes", + "list", + "--output", + "json", + }, + &listAll, + ) + assert.NoError(ct, err) + assert.Len(ct, listAll, len(regIDs), "Should list all nodes after CLI operations") + }, 20*time.Second, 1*time.Second) assert.Equal(t, uint64(1), listAll[0].GetId()) assert.Equal(t, uint64(2), listAll[1].GetId()) @@ -1173,7 +1192,9 @@ func TestNodeCommand(t *testing.T) { otherUserMachines[index] = &node } - assert.Len(t, otherUserMachines, len(otherUserRegIDs)) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + assert.Len(ct, otherUserMachines, len(otherUserRegIDs), "Should have correct number of otherUser machines after CLI operations") + }, 15*time.Second, 1*time.Second) // Test list all nodes after added otherUser var listAllWithotherUser []v1.Node @@ -1250,22 +1271,23 @@ func TestNodeCommand(t *testing.T) { // Test: list main user after node is deleted var listOnlyMachineUserAfterDelete []v1.Node - err = executeAndUnmarshal( - headscale, - []string{ - "headscale", - "nodes", - "list", - "--user", - "node-user", - "--output", - "json", - }, - &listOnlyMachineUserAfterDelete, - ) - assert.NoError(t, err) - - assert.Len(t, listOnlyMachineUserAfterDelete, 4) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + err := executeAndUnmarshal( + headscale, + []string{ + "headscale", + "nodes", + "list", + "--user", + "node-user", + "--output", + "json", + }, + &listOnlyMachineUserAfterDelete, + ) + assert.NoError(ct, err) + assert.Len(ct, listOnlyMachineUserAfterDelete, 4, "Should have 4 nodes for node-user after deletion") + }, 20*time.Second, 1*time.Second) } func TestNodeExpireCommand(t *testing.T) { diff --git a/integration/dns_test.go b/integration/dns_test.go index 456895cc..7cac4d47 100644 --- a/integration/dns_test.go +++ b/integration/dns_test.go @@ -50,34 +50,21 @@ func TestResolveMagicDNS(t *testing.T) { assert.Equal(t, peer.Hostname()+".headscale.net.", peerFQDN) - command := []string{ - "tailscale", - "ip", peerFQDN, - } - result, _, err := client.Execute(command) - if err != nil { - t.Fatalf( - "failed to execute resolve/ip command %s from %s: %s", - peerFQDN, - client.Hostname(), - err, - ) - } - - ips, err := peer.IPs() - if err != nil { - t.Fatalf( - "failed to get ips for %s: %s", - peer.Hostname(), - err, - ) - } - - for _, ip := range ips { - if !strings.Contains(result, ip.String()) { - t.Fatalf("ip %s is not found in \n%s\n", ip.String(), result) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + command := []string{ + "tailscale", + "ip", peerFQDN, } - } + result, _, err := client.Execute(command) + assert.NoError(ct, err, "Failed to execute resolve/ip command %s from %s", peerFQDN, client.Hostname()) + + ips, err := peer.IPs() + assert.NoError(ct, err, "Failed to get IPs for %s", peer.Hostname()) + + for _, ip := range ips { + assert.Contains(ct, result, ip.String(), "IP %s should be found in DNS resolution result from %s to %s", ip.String(), client.Hostname(), peer.Hostname()) + } + }, 30*time.Second, 2*time.Second) } } } diff --git a/integration/embedded_derp_test.go b/integration/embedded_derp_test.go index b1d947cd..051b9261 100644 --- a/integration/embedded_derp_test.go +++ b/integration/embedded_derp_test.go @@ -1,12 +1,12 @@ package integration import ( - "strings" "testing" "time" "github.com/juanfont/headscale/integration/hsic" "github.com/juanfont/headscale/integration/tsic" + "github.com/stretchr/testify/assert" "tailscale.com/tailcfg" "tailscale.com/types/key" ) @@ -140,17 +140,17 @@ func derpServerScenario( assertNoErrListFQDN(t, err) for _, client := range allClients { - status, err := client.Status() - assertNoErr(t, err) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + status, err := client.Status() + assert.NoError(ct, err, "Failed to get status for client %s", client.Hostname()) - for _, health := range status.Health { - if strings.Contains(health, "could not connect to any relay server") { - t.Errorf("expected to be connected to derp, found: %s", health) + for _, health := range status.Health { + assert.NotContains(ct, health, "could not connect to any relay server", + "Client %s should be connected to DERP relay", client.Hostname()) + assert.NotContains(ct, health, "could not connect to the 'Headscale Embedded DERP' relay server.", + "Client %s should be connected to Headscale Embedded DERP", client.Hostname()) } - if strings.Contains(health, "could not connect to the 'Headscale Embedded DERP' relay server.") { - t.Errorf("expected to be connected to derp, found: %s", health) - } - } + }, 30*time.Second, 2*time.Second) } success := pingDerpAllHelper(t, allClients, allHostnames) @@ -161,17 +161,17 @@ func derpServerScenario( } for _, client := range allClients { - status, err := client.Status() - assertNoErr(t, err) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + status, err := client.Status() + assert.NoError(ct, err, "Failed to get status for client %s", client.Hostname()) - for _, health := range status.Health { - if strings.Contains(health, "could not connect to any relay server") { - t.Errorf("expected to be connected to derp, found: %s", health) + for _, health := range status.Health { + assert.NotContains(ct, health, "could not connect to any relay server", + "Client %s should be connected to DERP relay after first run", client.Hostname()) + assert.NotContains(ct, health, "could not connect to the 'Headscale Embedded DERP' relay server.", + "Client %s should be connected to Headscale Embedded DERP after first run", client.Hostname()) } - if strings.Contains(health, "could not connect to the 'Headscale Embedded DERP' relay server.") { - t.Errorf("expected to be connected to derp, found: %s", health) - } - } + }, 30*time.Second, 2*time.Second) } t.Logf("Run 1: %d successful pings out of %d", success, len(allClients)*len(allHostnames)) @@ -186,17 +186,17 @@ func derpServerScenario( } for _, client := range allClients { - status, err := client.Status() - assertNoErr(t, err) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + status, err := client.Status() + assert.NoError(ct, err, "Failed to get status for client %s", client.Hostname()) - for _, health := range status.Health { - if strings.Contains(health, "could not connect to any relay server") { - t.Errorf("expected to be connected to derp, found: %s", health) + for _, health := range status.Health { + assert.NotContains(ct, health, "could not connect to any relay server", + "Client %s should be connected to DERP relay after second run", client.Hostname()) + assert.NotContains(ct, health, "could not connect to the 'Headscale Embedded DERP' relay server.", + "Client %s should be connected to Headscale Embedded DERP after second run", client.Hostname()) } - if strings.Contains(health, "could not connect to the 'Headscale Embedded DERP' relay server.") { - t.Errorf("expected to be connected to derp, found: %s", health) - } - } + }, 30*time.Second, 2*time.Second) } t.Logf("Run2: %d successful pings out of %d", success, len(allClients)*len(allHostnames)) diff --git a/integration/general_test.go b/integration/general_test.go index c60c2f46..0e1a8da5 100644 --- a/integration/general_test.go +++ b/integration/general_test.go @@ -179,9 +179,11 @@ func testEphemeralWithOptions(t *testing.T, opts ...hsic.Option) { t.Logf("all clients logged out") - nodes, err := headscale.ListNodes() - assertNoErr(t, err) - require.Len(t, nodes, 0) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + nodes, err := headscale.ListNodes() + assert.NoError(ct, err) + assert.Len(ct, nodes, 0, "All ephemeral nodes should be cleaned up after logout") + }, 30*time.Second, 2*time.Second) } // TestEphemeral2006DeletedTooQuickly verifies that ephemeral nodes are not @@ -534,26 +536,27 @@ func TestUpdateHostnameFromClient(t *testing.T) { assertNoErrSync(t, err) var nodes []*v1.Node - err = executeAndUnmarshal( - headscale, - []string{ - "headscale", - "node", - "list", - "--output", - "json", - }, - &nodes, - ) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + err := executeAndUnmarshal( + headscale, + []string{ + "headscale", + "node", + "list", + "--output", + "json", + }, + &nodes, + ) + assert.NoError(ct, err) + assert.Len(ct, nodes, 3, "Should have 3 nodes after hostname updates") - assertNoErr(t, err) - assert.Len(t, nodes, 3) - - for _, node := range nodes { - hostname := hostnames[strconv.FormatUint(node.GetId(), 10)] - assert.Equal(t, hostname, node.GetName()) - assert.Equal(t, util.ConvertWithFQDNRules(hostname), node.GetGivenName()) - } + for _, node := range nodes { + hostname := hostnames[strconv.FormatUint(node.GetId(), 10)] + assert.Equal(ct, hostname, node.GetName(), "Node name should match hostname") + assert.Equal(ct, util.ConvertWithFQDNRules(hostname), node.GetGivenName(), "Given name should match FQDN rules") + } + }, 20*time.Second, 1*time.Second) // Rename givenName in nodes for _, node := range nodes { @@ -684,11 +687,13 @@ func TestExpireNode(t *testing.T) { t.Logf("before expire: %d successful pings out of %d", success, len(allClients)*len(allIps)) for _, client := range allClients { - status, err := client.Status() - assertNoErr(t, err) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + status, err := client.Status() + assert.NoError(ct, err) - // Assert that we have the original count - self - assert.Len(t, status.Peers(), spec.NodesPerUser-1) + // Assert that we have the original count - self + assert.Len(ct, status.Peers(), spec.NodesPerUser-1, "Client %s should see correct number of peers", client.Hostname()) + }, 30*time.Second, 1*time.Second) } headscale, err := scenario.Headscale() @@ -850,53 +855,57 @@ func TestNodeOnlineStatus(t *testing.T) { return } - result, err := headscale.Execute([]string{ - "headscale", "nodes", "list", "--output", "json", - }) - assertNoErr(t, err) - var nodes []*v1.Node - err = json.Unmarshal([]byte(result), &nodes) - assertNoErr(t, err) + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + result, err := headscale.Execute([]string{ + "headscale", "nodes", "list", "--output", "json", + }) + assert.NoError(ct, err) - // Verify that headscale reports the nodes as online - for _, node := range nodes { - // All nodes should be online - assert.Truef( - t, - node.GetOnline(), - "expected %s to have online status in Headscale, marked as offline %s after start", - node.GetName(), - time.Since(start), - ) - } + err = json.Unmarshal([]byte(result), &nodes) + assert.NoError(ct, err) - // Verify that all nodes report all nodes to be online - for _, client := range allClients { - status, err := client.Status() - assertNoErr(t, err) - - for _, peerKey := range status.Peers() { - peerStatus := status.Peer[peerKey] - - // .Online is only available from CapVer 16, which - // is not present in 1.18 which is the lowest we - // test. - if strings.Contains(client.Hostname(), "1-18") { - continue - } - - // All peers of this nodes are reporting to be - // connected to the control server + // Verify that headscale reports the nodes as online + for _, node := range nodes { + // All nodes should be online assert.Truef( - t, - peerStatus.Online, - "expected node %s to be marked as online in %s peer list, marked as offline %s after start", - peerStatus.HostName, - client.Hostname(), + ct, + node.GetOnline(), + "expected %s to have online status in Headscale, marked as offline %s after start", + node.GetName(), time.Since(start), ) } + }, 15*time.Second, 1*time.Second) + + // Verify that all nodes report all nodes to be online + for _, client := range allClients { + assert.EventuallyWithT(t, func(ct *assert.CollectT) { + status, err := client.Status() + assert.NoError(ct, err) + + for _, peerKey := range status.Peers() { + peerStatus := status.Peer[peerKey] + + // .Online is only available from CapVer 16, which + // is not present in 1.18 which is the lowest we + // test. + if strings.Contains(client.Hostname(), "1-18") { + continue + } + + // All peers of this nodes are reporting to be + // connected to the control server + assert.Truef( + ct, + peerStatus.Online, + "expected node %s to be marked as online in %s peer list, marked as offline %s after start", + peerStatus.HostName, + client.Hostname(), + time.Since(start), + ) + } + }, 15*time.Second, 1*time.Second) } // Check maximum once per second diff --git a/integration/utils.go b/integration/utils.go index c19f6459..a7ab048b 100644 --- a/integration/utils.go +++ b/integration/utils.go @@ -21,8 +21,13 @@ import ( ) const ( + // derpPingTimeout defines the timeout for individual DERP ping operations + // Used in DERP connectivity tests to verify relay server communication derpPingTimeout = 2 * time.Second - derpPingCount = 10 + + // derpPingCount defines the number of ping attempts for DERP connectivity tests + // Higher count provides better reliability assessment of DERP connectivity + derpPingCount = 10 ) func assertNoErr(t *testing.T, err error) { @@ -105,6 +110,9 @@ func didClientUseWebsocketForDERP(t *testing.T, client TailscaleClient) bool { return count > 0 } +// pingAllHelper performs ping tests between all clients and addresses, returning success count. +// This is used to validate network connectivity in integration tests. +// Returns the total number of successful ping operations. func pingAllHelper(t *testing.T, clients []TailscaleClient, addrs []string, opts ...tsic.PingOption) int { t.Helper() success := 0 @@ -123,6 +131,9 @@ func pingAllHelper(t *testing.T, clients []TailscaleClient, addrs []string, opts return success } +// pingDerpAllHelper performs DERP-based ping tests between all clients and addresses. +// This specifically tests connectivity through DERP relay servers, which is important +// for validating NAT traversal and relay functionality. Returns success count. func pingDerpAllHelper(t *testing.T, clients []TailscaleClient, addrs []string) int { t.Helper() success := 0 @@ -304,9 +315,13 @@ func assertValidNetcheck(t *testing.T, client TailscaleClient) { assert.NotEqualf(t, 0, report.PreferredDERP, "%q does not have a DERP relay", client.Hostname()) } -// assertCommandOutputContains executes a command for a set time and asserts that the output -// reaches a desired state. -// It should be used instead of sleeping before executing. +// assertCommandOutputContains executes a command with exponential backoff retry until the output +// contains the expected string or timeout is reached (10 seconds). +// This implements eventual consistency patterns and should be used instead of time.Sleep +// before executing commands that depend on network state propagation. +// +// Timeout: 10 seconds with exponential backoff +// Use cases: DNS resolution, route propagation, policy updates func assertCommandOutputContains(t *testing.T, c TailscaleClient, command []string, contains string) { t.Helper()