auth: ensure machines are allowed in when pak change (#2917)

This commit is contained in:
Kristoffer Dalby
2025-11-30 15:51:01 +01:00
committed by GitHub
parent f658a8eacd
commit 7f1631c4f1
19 changed files with 692 additions and 123 deletions

View File

@@ -456,3 +456,87 @@ func TestAuthKeyLogoutAndReloginSameUserExpiredKey(t *testing.T) {
}
}
// TestAuthKeyDeleteKey tests Issue #2830: node with deleted auth key should still reconnect.
// Scenario from user report: "create node, delete the auth key, restart to validate it can connect"
// Steps:
// 1. Create node with auth key
// 2. DELETE the auth key from database (completely remove it)
// 3. Restart node - should successfully reconnect using MachineKey identity
func TestAuthKeyDeleteKey(t *testing.T) {
IntegrationSkip(t)
// Create scenario with NO nodes - we'll create the node manually so we can capture the auth key
scenario, err := NewScenario(ScenarioSpec{
NodesPerUser: 0, // No nodes created automatically
Users: []string{"user1"},
})
require.NoError(t, err)
defer scenario.ShutdownAssertNoPanics(t)
err = scenario.CreateHeadscaleEnv([]tsic.Option{}, hsic.WithTestName("delkey"), hsic.WithTLS(), hsic.WithDERPAsIP())
requireNoErrHeadscaleEnv(t, err)
headscale, err := scenario.Headscale()
requireNoErrGetHeadscale(t, err)
// Get the user
userMap, err := headscale.MapUsers()
require.NoError(t, err)
userID := userMap["user1"].GetId()
// Create a pre-auth key - we keep the full key string before it gets redacted
authKey, err := scenario.CreatePreAuthKey(userID, false, false)
require.NoError(t, err)
authKeyString := authKey.GetKey()
authKeyID := authKey.GetId()
t.Logf("Created pre-auth key ID %d: %s", authKeyID, authKeyString)
// Create a tailscale client and log it in with the auth key
client, err := scenario.CreateTailscaleNode(
"head",
tsic.WithNetwork(scenario.networks[scenario.testDefaultNetwork]),
)
require.NoError(t, err)
err = client.Login(headscale.GetEndpoint(), authKeyString)
require.NoError(t, err)
// Wait for the node to be registered
var user1Nodes []*v1.Node
assert.EventuallyWithT(t, func(c *assert.CollectT) {
var err error
user1Nodes, err = headscale.ListNodes("user1")
assert.NoError(c, err)
assert.Len(c, user1Nodes, 1)
}, 30*time.Second, 500*time.Millisecond, "waiting for node to be registered")
nodeID := user1Nodes[0].GetId()
nodeName := user1Nodes[0].GetName()
t.Logf("Node %d (%s) created successfully with auth_key_id=%d", nodeID, nodeName, authKeyID)
// Verify node is online
requireAllClientsOnline(t, headscale, []types.NodeID{types.NodeID(nodeID)}, true, "node should be online initially", 120*time.Second)
// DELETE the pre-auth key using the API
t.Logf("Deleting pre-auth key ID %d using API", authKeyID)
err = headscale.DeleteAuthKey(userID, authKeyString)
require.NoError(t, err)
t.Logf("Successfully deleted auth key")
// Simulate node restart (down + up)
t.Logf("Restarting node after deleting its auth key")
err = client.Down()
require.NoError(t, err)
time.Sleep(3 * time.Second)
err = client.Up()
require.NoError(t, err)
// Verify node comes back online
// This will FAIL without the fix because auth key validation will reject deleted key
// With the fix, MachineKey identity allows reconnection even with deleted key
requireAllClientsOnline(t, headscale, []types.NodeID{types.NodeID(nodeID)}, true, "node should reconnect after restart despite deleted key", 120*time.Second)
t.Logf("✓ Node successfully reconnected after its auth key was deleted")
}

View File

@@ -24,6 +24,7 @@ type ControlServer interface {
WaitForRunning() error
CreateUser(user string) (*v1.User, error)
CreateAuthKey(user uint64, reusable bool, ephemeral bool) (*v1.PreAuthKey, error)
DeleteAuthKey(user uint64, key string) error
ListNodes(users ...string) ([]*v1.Node, error)
DeleteNode(nodeID uint64) error
NodesByUser() (map[string][]*v1.Node, error)

View File

@@ -1031,6 +1031,34 @@ func (t *HeadscaleInContainer) CreateAuthKey(
return &preAuthKey, nil
}
// DeleteAuthKey deletes an "authorisation key" for a User.
func (t *HeadscaleInContainer) DeleteAuthKey(
user uint64,
key string,
) error {
command := []string{
"headscale",
"--user",
strconv.FormatUint(user, 10),
"preauthkeys",
"delete",
key,
"--output",
"json",
}
_, _, err := dockertestutil.ExecuteCommand(
t.container,
command,
[]string{},
)
if err != nil {
return fmt.Errorf("failed to execute delete auth key command: %w", err)
}
return nil
}
// ListNodes lists the currently registered Nodes in headscale.
// Optionally a list of usernames can be passed to get users for
// specific users.

View File

@@ -20,6 +20,7 @@ import (
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
"github.com/juanfont/headscale/integration/hsic"
"github.com/juanfont/headscale/integration/integrationutil"
"github.com/juanfont/headscale/integration/tsic"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -2333,6 +2334,12 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
require.NoError(t, err)
scenario.runHeadscaleRegister("user1", body)
// Wait for the client to sync with the server after webauth registration.
// Unlike authkey login which blocks until complete, webauth registration
// happens on the server side and the client needs time to receive the network map.
err = routerUsernet1.WaitForRunning(integrationutil.PeerSyncTimeout())
require.NoError(t, err, "webauth client failed to reach Running state")
} else {
userMap, err := headscale.MapUsers()
require.NoError(t, err)