auth: ensure machines are allowed in when pak change (#2917)

This commit is contained in:
Kristoffer Dalby
2025-11-30 15:51:01 +01:00
parent 16d811b306
commit 3cf2d7195a
19 changed files with 695 additions and 118 deletions

View File

@@ -3196,6 +3196,93 @@ func TestNodeReregistrationWithExpiredPreAuthKey(t *testing.T) {
assert.Contains(t, err.Error(), "authkey expired", "error should mention key expiration")
}
// TestIssue2830_ExistingNodeReregistersWithExpiredKey tests the fix for issue #2830.
// When a node is already registered and the pre-auth key expires, the node should
// still be able to re-register (e.g., after a container restart) using the same
// expired key. The key was only needed for initial authentication.
func TestIssue2830_ExistingNodeReregistersWithExpiredKey(t *testing.T) {
t.Parallel()
app := createTestApp(t)
user := app.state.CreateUserForTest("test-user")
// Create a valid key (will expire it later)
expiry := time.Now().Add(1 * time.Hour)
pak, err := app.state.CreatePreAuthKey(types.UserID(user.ID), false, false, &expiry, nil)
require.NoError(t, err)
machineKey := key.NewMachine()
nodeKey := key.NewNode()
// Register the node initially (key is still valid)
req := tailcfg.RegisterRequest{
Auth: &tailcfg.RegisterResponseAuth{
AuthKey: pak.Key,
},
NodeKey: nodeKey.Public(),
Hostinfo: &tailcfg.Hostinfo{
Hostname: "issue2830-node",
},
Expiry: time.Now().Add(24 * time.Hour),
}
resp, err := app.handleRegister(context.Background(), req, machineKey.Public())
require.NoError(t, err, "initial registration should succeed")
require.NotNil(t, resp)
require.True(t, resp.MachineAuthorized, "node should be authorized after initial registration")
// Verify node was created
allNodes := app.state.ListNodes()
require.Equal(t, 1, allNodes.Len())
initialNodeID := allNodes.At(0).ID()
// Now expire the key by updating it in the database to have an expiry in the past.
// This simulates the real-world scenario where a key expires after initial registration.
pastExpiry := time.Now().Add(-1 * time.Hour)
err = app.state.DB().DB.Model(&types.PreAuthKey{}).
Where("id = ?", pak.ID).
Update("expiration", pastExpiry).Error
require.NoError(t, err, "should be able to update key expiration")
// Reload the key to verify it's now expired
expiredPak, err := app.state.GetPreAuthKey(pak.Key)
require.NoError(t, err)
require.NotNil(t, expiredPak.Expiration)
require.True(t, expiredPak.Expiration.Before(time.Now()), "key should be expired")
// Verify the expired key would fail validation
err = expiredPak.Validate()
require.Error(t, err, "key should fail validation when expired")
require.Contains(t, err.Error(), "authkey expired")
// Attempt to re-register with the SAME key (now expired).
// This should SUCCEED because:
// - The node already exists with the same MachineKey and User
// - The fix allows existing nodes to re-register even with expired keys
// - The key was only needed for initial authentication
req2 := tailcfg.RegisterRequest{
Auth: &tailcfg.RegisterResponseAuth{
AuthKey: pak.Key, // Same key as initial registration (now expired)
},
NodeKey: nodeKey.Public(), // Same NodeKey as initial registration
Hostinfo: &tailcfg.Hostinfo{
Hostname: "issue2830-node",
},
Expiry: time.Now().Add(24 * time.Hour),
}
resp2, err := app.handleRegister(context.Background(), req2, machineKey.Public())
require.NoError(t, err, "re-registration should succeed even with expired key for existing node")
assert.NotNil(t, resp2)
assert.True(t, resp2.MachineAuthorized, "node should remain authorized after re-registration")
// Verify we still have only one node (re-registered, not created new)
allNodes = app.state.ListNodes()
require.Equal(t, 1, allNodes.Len(), "should have exactly one node (re-registered)")
assert.Equal(t, initialNodeID, allNodes.At(0).ID(), "node ID should not change on re-registration")
}
// TestGitHubIssue2830_ExistingNodeCanReregisterWithUsedPreAuthKey tests that an existing node
// can re-register using a pre-auth key that's already marked as Used=true, as long as:
// 1. The node is re-registering with the same MachineKey it originally used

View File

@@ -268,9 +268,19 @@ func GetPreAuthKey(tx *gorm.DB, key string) (*types.PreAuthKey, error) {
}
// DestroyPreAuthKey destroys a preauthkey. Returns error if the PreAuthKey
// does not exist.
// does not exist. This also clears the auth_key_id on any nodes that reference
// this key.
func DestroyPreAuthKey(tx *gorm.DB, pak types.PreAuthKey) error {
return tx.Transaction(func(db *gorm.DB) error {
// First, clear the foreign key reference on any nodes using this key
err := db.Model(&types.Node{}).
Where("auth_key_id = ?", pak.ID).
Update("auth_key_id", nil).Error
if err != nil {
return fmt.Errorf("failed to clear auth_key_id on nodes: %w", err)
}
// Then delete the pre-auth key
if result := db.Unscoped().Delete(pak); result.Error != nil {
return result.Error
}
@@ -285,6 +295,12 @@ func (hsdb *HSDatabase) ExpirePreAuthKey(k *types.PreAuthKey) error {
})
}
func (hsdb *HSDatabase) DeletePreAuthKey(k *types.PreAuthKey) error {
return hsdb.Write(func(tx *gorm.DB) error {
return DestroyPreAuthKey(tx, *k)
})
}
// UsePreAuthKey marks a PreAuthKey as used.
func UsePreAuthKey(tx *gorm.DB, k *types.PreAuthKey) error {
err := tx.Model(k).Update("used", true).Error

View File

@@ -206,6 +206,27 @@ func (api headscaleV1APIServer) ExpirePreAuthKey(
return &v1.ExpirePreAuthKeyResponse{}, nil
}
func (api headscaleV1APIServer) DeletePreAuthKey(
ctx context.Context,
request *v1.DeletePreAuthKeyRequest,
) (*v1.DeletePreAuthKeyResponse, error) {
preAuthKey, err := api.h.state.GetPreAuthKey(request.Key)
if err != nil {
return nil, err
}
if uint64(preAuthKey.User.ID) != request.GetUser() {
return nil, fmt.Errorf("preauth key does not belong to user")
}
err = api.h.state.DeletePreAuthKey(preAuthKey)
if err != nil {
return nil, err
}
return &v1.DeletePreAuthKeyResponse{}, nil
}
func (api headscaleV1APIServer) ListPreAuthKeys(
ctx context.Context,
request *v1.ListPreAuthKeysRequest,

View File

@@ -976,6 +976,11 @@ func (s *State) ExpirePreAuthKey(preAuthKey *types.PreAuthKey) error {
return s.db.ExpirePreAuthKey(preAuthKey)
}
// DeletePreAuthKey permanently deletes a pre-authentication key.
func (s *State) DeletePreAuthKey(preAuthKey *types.PreAuthKey) error {
return s.db.DeletePreAuthKey(preAuthKey)
}
// GetRegistrationCacheEntry retrieves a node registration from cache.
func (s *State) GetRegistrationCacheEntry(id types.RegistrationID) (*types.RegisterNode, bool) {
entry, found := s.registrationCache.Get(id)
@@ -1313,11 +1318,18 @@ func (s *State) HandleNodeFromPreAuthKey(
// key was only needed for initial authentication. NodeKey rotation requires validation.
existingNodeSameUser, existsSameUser := s.nodeStore.GetNodeByMachineKey(machineKey, types.UserID(pak.User.ID))
// Skip validation only if both the AuthKeyID and NodeKey match (not a rotation).
isExistingNodeReregistering := existsSameUser && existingNodeSameUser.Valid() &&
existingNodeSameUser.AuthKey().Valid() &&
existingNodeSameUser.AuthKeyID().Valid() &&
existingNodeSameUser.AuthKeyID().Get() == pak.ID
// For existing nodes, skip validation if:
// 1. MachineKey matches (cryptographic proof of machine identity)
// 2. User matches (from the PAK being used)
// 3. Not a NodeKey rotation (rotation requires fresh validation)
//
// Security: MachineKey is the cryptographic identity. If someone has the MachineKey,
// they control the machine. The PAK was only needed to authorize initial join.
// We don't check which specific PAK was used originally because:
// - Container restarts may use different PAKs (e.g., env var changed)
// - Original PAK may be deleted
// - MachineKey + User is sufficient to prove this is the same node
isExistingNodeReregistering := existsSameUser && existingNodeSameUser.Valid()
// Check if this is a NodeKey rotation (different NodeKey)
isNodeKeyRotation := existsSameUser && existingNodeSameUser.Valid() &&