// Package state provides core state management for Headscale, coordinating // between subsystems like database, IP allocation, policy management, and DERP routing. package state import ( "cmp" "context" "errors" "fmt" "io" "net/netip" "os" "slices" "sync" "sync/atomic" "time" hsdb "github.com/juanfont/headscale/hscontrol/db" "github.com/juanfont/headscale/hscontrol/policy" "github.com/juanfont/headscale/hscontrol/policy/matcher" "github.com/juanfont/headscale/hscontrol/routes" "github.com/juanfont/headscale/hscontrol/types" "github.com/juanfont/headscale/hscontrol/types/change" "github.com/juanfont/headscale/hscontrol/util" "github.com/rs/zerolog/log" "golang.org/x/sync/errgroup" "gorm.io/gorm" "tailscale.com/net/tsaddr" "tailscale.com/tailcfg" "tailscale.com/types/key" "tailscale.com/types/ptr" "tailscale.com/types/views" zcache "zgo.at/zcache/v2" ) const ( // registerCacheExpiration defines how long node registration entries remain in cache. registerCacheExpiration = time.Minute * 15 // registerCacheCleanup defines the interval for cleaning up expired cache entries. registerCacheCleanup = time.Minute * 20 ) // ErrUnsupportedPolicyMode is returned for invalid policy modes. Valid modes are "file" and "db". var ErrUnsupportedPolicyMode = errors.New("unsupported policy mode") // State manages Headscale's core state, coordinating between database, policy management, // IP allocation, and DERP routing. All methods are thread-safe. type State struct { // cfg holds the current Headscale configuration cfg *types.Config // nodeStore provides an in-memory cache for nodes. nodeStore *NodeStore // subsystem keeping state // db provides persistent storage and database operations db *hsdb.HSDatabase // ipAlloc manages IP address allocation for nodes ipAlloc *hsdb.IPAllocator // derpMap contains the current DERP relay configuration derpMap atomic.Pointer[tailcfg.DERPMap] // polMan handles policy evaluation and management polMan policy.PolicyManager // registrationCache caches node registration data to reduce database load registrationCache *zcache.Cache[types.RegistrationID, types.RegisterNode] // primaryRoutes tracks primary route assignments for nodes primaryRoutes *routes.PrimaryRoutes } // NewState creates and initializes a new State instance, setting up the database, // IP allocator, DERP map, policy manager, and loading existing users and nodes. func NewState(cfg *types.Config) (*State, error) { cacheExpiration := registerCacheExpiration if cfg.Tuning.RegisterCacheExpiration != 0 { cacheExpiration = cfg.Tuning.RegisterCacheExpiration } cacheCleanup := registerCacheCleanup if cfg.Tuning.RegisterCacheCleanup != 0 { cacheCleanup = cfg.Tuning.RegisterCacheCleanup } registrationCache := zcache.New[types.RegistrationID, types.RegisterNode]( cacheExpiration, cacheCleanup, ) registrationCache.OnEvicted( func(id types.RegistrationID, rn types.RegisterNode) { rn.SendAndClose(nil) }, ) db, err := hsdb.NewHeadscaleDatabase( cfg.Database, cfg.BaseDomain, registrationCache, ) if err != nil { return nil, fmt.Errorf("init database: %w", err) } ipAlloc, err := hsdb.NewIPAllocator(db, cfg.PrefixV4, cfg.PrefixV6, cfg.IPAllocation) if err != nil { return nil, fmt.Errorf("init ip allocatior: %w", err) } nodes, err := db.ListNodes() if err != nil { return nil, fmt.Errorf("loading nodes: %w", err) } // On startup, all nodes should be marked as offline until they reconnect // This ensures we don't have stale online status from previous runs for _, node := range nodes { node.IsOnline = ptr.To(false) } users, err := db.ListUsers() if err != nil { return nil, fmt.Errorf("loading users: %w", err) } pol, err := policyBytes(db, cfg) if err != nil { return nil, fmt.Errorf("loading policy: %w", err) } polMan, err := policy.NewPolicyManager(pol, users, nodes.ViewSlice()) if err != nil { return nil, fmt.Errorf("init policy manager: %w", err) } // PolicyManager.BuildPeerMap handles both global and per-node filter complexity. // This moves the complex peer relationship logic into the policy package where it belongs. nodeStore := NewNodeStore(nodes, func(nodes []types.NodeView) map[types.NodeID][]types.NodeView { return polMan.BuildPeerMap(views.SliceOf(nodes)) }) nodeStore.Start() return &State{ cfg: cfg, db: db, ipAlloc: ipAlloc, polMan: polMan, registrationCache: registrationCache, primaryRoutes: routes.New(), nodeStore: nodeStore, }, nil } // Close gracefully shuts down the State instance and releases all resources. func (s *State) Close() error { s.nodeStore.Stop() if err := s.db.Close(); err != nil { return fmt.Errorf("closing database: %w", err) } return nil } // policyBytes loads policy configuration from file or database based on the configured mode. // Returns nil if no policy is configured, which is valid. func policyBytes(db *hsdb.HSDatabase, cfg *types.Config) ([]byte, error) { switch cfg.Policy.Mode { case types.PolicyModeFile: path := cfg.Policy.Path // It is fine to start headscale without a policy file. if len(path) == 0 { return nil, nil } absPath := util.AbsolutePathFromConfigPath(path) policyFile, err := os.Open(absPath) if err != nil { return nil, err } defer policyFile.Close() return io.ReadAll(policyFile) case types.PolicyModeDB: p, err := db.GetPolicy() if err != nil { if errors.Is(err, types.ErrPolicyNotFound) { return nil, nil } return nil, err } if p.Data == "" { return nil, nil } return []byte(p.Data), err } return nil, fmt.Errorf("%w: %s", ErrUnsupportedPolicyMode, cfg.Policy.Mode) } // SetDERPMap updates the DERP relay configuration. func (s *State) SetDERPMap(dm *tailcfg.DERPMap) { s.derpMap.Store(dm) } // DERPMap returns the current DERP relay configuration for peer-to-peer connectivity. func (s *State) DERPMap() tailcfg.DERPMapView { return s.derpMap.Load().View() } // ReloadPolicy reloads the access control policy and triggers auto-approval if changed. // Returns true if the policy changed. func (s *State) ReloadPolicy() ([]change.ChangeSet, error) { pol, err := policyBytes(s.db, s.cfg) if err != nil { return nil, fmt.Errorf("loading policy: %w", err) } policyChanged, err := s.polMan.SetPolicy(pol) if err != nil { return nil, fmt.Errorf("setting policy: %w", err) } // Rebuild peer maps after policy changes because the peersFunc in NodeStore // uses the PolicyManager's filters. Without this, nodes won't see newly allowed // peers until a node is added/removed, causing autogroup:self policies to not // propagate correctly when switching between policy types. s.nodeStore.RebuildPeerMaps() cs := []change.ChangeSet{change.PolicyChange()} // Always call autoApproveNodes during policy reload, regardless of whether // the policy content has changed. This ensures that routes are re-evaluated // when they might have been manually disabled but could now be auto-approved // with the current policy. rcs, err := s.autoApproveNodes() if err != nil { return nil, fmt.Errorf("auto approving nodes: %w", err) } // TODO(kradalby): These changes can probably be safely ignored. // If the PolicyChange is happening, that will lead to a full update // meaning that we do not need to send individual route changes. cs = append(cs, rcs...) if len(rcs) > 0 || policyChanged { log.Info(). Bool("policy.changed", policyChanged). Int("route.changes", len(rcs)). Int("total.changes", len(cs)). Msg("Policy reload completed with changes") } return cs, nil } // CreateUser creates a new user and updates the policy manager. // Returns the created user, change set, and any error. func (s *State) CreateUser(user types.User) (*types.User, change.ChangeSet, error) { if err := s.db.DB.Save(&user).Error; err != nil { return nil, change.EmptySet, fmt.Errorf("creating user: %w", err) } // Check if policy manager needs updating c, err := s.updatePolicyManagerUsers() if err != nil { // Log the error but don't fail the user creation return &user, change.EmptySet, fmt.Errorf("failed to update policy manager after user creation: %w", err) } // Even if the policy manager doesn't detect a filter change, SSH policies // might now be resolvable when they weren't before. If there are existing // nodes, we should send a policy change to ensure they get updated SSH policies. // TODO(kradalby): detect this, or rebuild all SSH policies so we can determine // this upstream. if c.Empty() { c = change.PolicyChange() } log.Info().Str("user.name", user.Name).Msg("User created") return &user, c, nil } // UpdateUser modifies an existing user using the provided update function within a transaction. // Returns the updated user, change set, and any error. func (s *State) UpdateUser(userID types.UserID, updateFn func(*types.User) error) (*types.User, change.ChangeSet, error) { user, err := hsdb.Write(s.db.DB, func(tx *gorm.DB) (*types.User, error) { user, err := hsdb.GetUserByID(tx, userID) if err != nil { return nil, err } if err := updateFn(user); err != nil { return nil, err } if err := tx.Save(user).Error; err != nil { return nil, fmt.Errorf("updating user: %w", err) } return user, nil }) if err != nil { return nil, change.EmptySet, err } // Check if policy manager needs updating c, err := s.updatePolicyManagerUsers() if err != nil { return user, change.EmptySet, fmt.Errorf("failed to update policy manager after user update: %w", err) } // TODO(kradalby): We might want to update nodestore with the user data return user, c, nil } // DeleteUser permanently removes a user and all associated data (nodes, API keys, etc). // This operation is irreversible. func (s *State) DeleteUser(userID types.UserID) error { return s.db.DestroyUser(userID) } // RenameUser changes a user's name. The new name must be unique. func (s *State) RenameUser(userID types.UserID, newName string) (*types.User, change.ChangeSet, error) { return s.UpdateUser(userID, func(user *types.User) error { user.Name = newName return nil }) } // GetUserByID retrieves a user by ID. func (s *State) GetUserByID(userID types.UserID) (*types.User, error) { return s.db.GetUserByID(userID) } // GetUserByName retrieves a user by name. func (s *State) GetUserByName(name string) (*types.User, error) { return s.db.GetUserByName(name) } // GetUserByOIDCIdentifier retrieves a user by their OIDC identifier. func (s *State) GetUserByOIDCIdentifier(id string) (*types.User, error) { return s.db.GetUserByOIDCIdentifier(id) } // ListUsersWithFilter retrieves users matching the specified filter criteria. func (s *State) ListUsersWithFilter(filter *types.User) ([]types.User, error) { return s.db.ListUsers(filter) } // ListAllUsers retrieves all users in the system. func (s *State) ListAllUsers() ([]types.User, error) { return s.db.ListUsers() } // persistNodeToDB saves the given node state to the database. // This function must receive the exact node state to save to ensure consistency between // NodeStore and the database. It verifies the node still exists in NodeStore to prevent // race conditions where a node might be deleted between UpdateNode returning and // persistNodeToDB being called. func (s *State) persistNodeToDB(node types.NodeView) (types.NodeView, change.ChangeSet, error) { if !node.Valid() { return types.NodeView{}, change.EmptySet, fmt.Errorf("invalid node view provided") } // Verify the node still exists in NodeStore before persisting to database. // Without this check, we could hit a race condition where UpdateNode returns a valid // node from a batch update, then the node gets deleted (e.g., ephemeral node logout), // and persistNodeToDB would incorrectly re-insert the deleted node into the database. _, exists := s.nodeStore.GetNode(node.ID()) if !exists { log.Warn(). Uint64("node.id", node.ID().Uint64()). Str("node.name", node.Hostname()). Bool("is_ephemeral", node.IsEphemeral()). Msg("Node no longer exists in NodeStore, skipping database persist to prevent race condition") return types.NodeView{}, change.EmptySet, fmt.Errorf("node %d no longer exists in NodeStore, skipping database persist", node.ID()) } nodePtr := node.AsStruct() if err := s.db.DB.Save(nodePtr).Error; err != nil { return types.NodeView{}, change.EmptySet, fmt.Errorf("saving node: %w", err) } // Check if policy manager needs updating c, err := s.updatePolicyManagerNodes() if err != nil { return nodePtr.View(), change.EmptySet, fmt.Errorf("failed to update policy manager after node save: %w", err) } if c.Empty() { c = change.NodeAdded(node.ID()) } return node, c, nil } func (s *State) SaveNode(node types.NodeView) (types.NodeView, change.ChangeSet, error) { // Update NodeStore first nodePtr := node.AsStruct() resultNode := s.nodeStore.PutNode(*nodePtr) // Then save to database using the result from PutNode return s.persistNodeToDB(resultNode) } // DeleteNode permanently removes a node and cleans up associated resources. // Returns whether policies changed and any error. This operation is irreversible. func (s *State) DeleteNode(node types.NodeView) (change.ChangeSet, error) { s.nodeStore.DeleteNode(node.ID()) err := s.db.DeleteNode(node.AsStruct()) if err != nil { return change.EmptySet, err } c := change.NodeRemoved(node.ID()) // Check if policy manager needs updating after node deletion policyChange, err := s.updatePolicyManagerNodes() if err != nil { return change.EmptySet, fmt.Errorf("failed to update policy manager after node deletion: %w", err) } if !policyChange.Empty() { c = policyChange } return c, nil } // Connect marks a node as connected and updates its primary routes in the state. func (s *State) Connect(id types.NodeID) []change.ChangeSet { // CRITICAL FIX: Update the online status in NodeStore BEFORE creating change notification // This ensures that when the NodeCameOnline change is distributed and processed by other nodes, // the NodeStore already reflects the correct online status for full map generation. // now := time.Now() node, ok := s.nodeStore.UpdateNode(id, func(n *types.Node) { n.IsOnline = ptr.To(true) // n.LastSeen = ptr.To(now) }) if !ok { return nil } c := []change.ChangeSet{change.NodeOnline(id)} log.Info().Uint64("node.id", id.Uint64()).Str("node.name", node.Hostname()).Msg("Node connected") // Use the node's current routes for primary route update // SubnetRoutes() returns only the intersection of announced AND approved routes // We MUST use SubnetRoutes() to maintain the security model routeChange := s.primaryRoutes.SetRoutes(id, node.SubnetRoutes()...) if routeChange { c = append(c, change.NodeAdded(id)) } return c } // Disconnect marks a node as disconnected and updates its primary routes in the state. func (s *State) Disconnect(id types.NodeID) ([]change.ChangeSet, error) { now := time.Now() node, ok := s.nodeStore.UpdateNode(id, func(n *types.Node) { n.LastSeen = ptr.To(now) // NodeStore is the source of truth for all node state including online status. n.IsOnline = ptr.To(false) }) if !ok { return nil, fmt.Errorf("node not found: %d", id) } log.Info().Uint64("node.id", id.Uint64()).Str("node.name", node.Hostname()).Msg("Node disconnected") // Special error handling for disconnect - we log errors but continue // because NodeStore is already updated and we need to notify peers _, c, err := s.persistNodeToDB(node) if err != nil { // Log error but don't fail the disconnection - NodeStore is already updated // and we need to send change notifications to peers log.Error().Err(err).Uint64("node.id", id.Uint64()).Str("node.name", node.Hostname()).Msg("Failed to update last seen in database") c = change.EmptySet } // The node is disconnecting so make sure that none of the routes it // announced are served to any nodes. routeChange := s.primaryRoutes.SetRoutes(id) cs := []change.ChangeSet{change.NodeOffline(id), c} // If we have a policy change or route change, return that as it's more comprehensive // Otherwise, return the NodeOffline change to ensure nodes are notified if c.IsFull() || routeChange { cs = append(cs, change.PolicyChange()) } return cs, nil } // GetNodeByID retrieves a node by ID. // GetNodeByID retrieves a node by its ID. // The bool indicates if the node exists or is available (like "err not found"). // The NodeView might be invalid, so it must be checked with .Valid(), which must be used to ensure // it isn't an invalid node (this is more of a node error or node is broken). func (s *State) GetNodeByID(nodeID types.NodeID) (types.NodeView, bool) { return s.nodeStore.GetNode(nodeID) } // GetNodeByNodeKey retrieves a node by its Tailscale public key. // The bool indicates if the node exists or is available (like "err not found"). // The NodeView might be invalid, so it must be checked with .Valid(), which must be used to ensure // it isn't an invalid node (this is more of a node error or node is broken). func (s *State) GetNodeByNodeKey(nodeKey key.NodePublic) (types.NodeView, bool) { return s.nodeStore.GetNodeByNodeKey(nodeKey) } // GetNodeByMachineKey retrieves a node by its machine key and user ID. // The bool indicates if the node exists or is available (like "err not found"). // The NodeView might be invalid, so it must be checked with .Valid(), which must be used to ensure // it isn't an invalid node (this is more of a node error or node is broken). func (s *State) GetNodeByMachineKey(machineKey key.MachinePublic, userID types.UserID) (types.NodeView, bool) { return s.nodeStore.GetNodeByMachineKey(machineKey, userID) } // ListNodes retrieves specific nodes by ID, or all nodes if no IDs provided. func (s *State) ListNodes(nodeIDs ...types.NodeID) views.Slice[types.NodeView] { if len(nodeIDs) == 0 { return s.nodeStore.ListNodes() } // Filter nodes by the requested IDs allNodes := s.nodeStore.ListNodes() nodeIDSet := make(map[types.NodeID]struct{}, len(nodeIDs)) for _, id := range nodeIDs { nodeIDSet[id] = struct{}{} } var filteredNodes []types.NodeView for _, node := range allNodes.All() { if _, exists := nodeIDSet[node.ID()]; exists { filteredNodes = append(filteredNodes, node) } } return views.SliceOf(filteredNodes) } // ListNodesByUser retrieves all nodes belonging to a specific user. func (s *State) ListNodesByUser(userID types.UserID) views.Slice[types.NodeView] { return s.nodeStore.ListNodesByUser(userID) } // ListPeers retrieves nodes that can communicate with the specified node based on policy. func (s *State) ListPeers(nodeID types.NodeID, peerIDs ...types.NodeID) views.Slice[types.NodeView] { if len(peerIDs) == 0 { return s.nodeStore.ListPeers(nodeID) } // For specific peerIDs, filter from all nodes allNodes := s.nodeStore.ListNodes() nodeIDSet := make(map[types.NodeID]struct{}, len(peerIDs)) for _, id := range peerIDs { nodeIDSet[id] = struct{}{} } var filteredNodes []types.NodeView for _, node := range allNodes.All() { if _, exists := nodeIDSet[node.ID()]; exists { filteredNodes = append(filteredNodes, node) } } return views.SliceOf(filteredNodes) } // ListEphemeralNodes retrieves all ephemeral (temporary) nodes in the system. func (s *State) ListEphemeralNodes() views.Slice[types.NodeView] { allNodes := s.nodeStore.ListNodes() var ephemeralNodes []types.NodeView for _, node := range allNodes.All() { // Check if node is ephemeral by checking its AuthKey if node.AuthKey().Valid() && node.AuthKey().Ephemeral() { ephemeralNodes = append(ephemeralNodes, node) } } return views.SliceOf(ephemeralNodes) } // SetNodeExpiry updates the expiration time for a node. func (s *State) SetNodeExpiry(nodeID types.NodeID, expiry time.Time) (types.NodeView, change.ChangeSet, error) { // Update NodeStore before database to ensure consistency. The NodeStore update is // blocking and will be the source of truth for the batcher. The database update must // make the exact same change. If the database update fails, the NodeStore change will // remain, but since we return an error, no change notification will be sent to the // batcher, preventing inconsistent state propagation. expiryPtr := expiry n, ok := s.nodeStore.UpdateNode(nodeID, func(node *types.Node) { node.Expiry = &expiryPtr }) if !ok { return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID) } return s.persistNodeToDB(n) } // SetNodeTags assigns tags to a node for use in access control policies. func (s *State) SetNodeTags(nodeID types.NodeID, tags []string) (types.NodeView, change.ChangeSet, error) { // Update NodeStore before database to ensure consistency. The NodeStore update is // blocking and will be the source of truth for the batcher. The database update must // make the exact same change. n, ok := s.nodeStore.UpdateNode(nodeID, func(node *types.Node) { node.ForcedTags = tags }) if !ok { return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID) } return s.persistNodeToDB(n) } // SetApprovedRoutes sets the network routes that a node is approved to advertise. func (s *State) SetApprovedRoutes(nodeID types.NodeID, routes []netip.Prefix) (types.NodeView, change.ChangeSet, error) { // TODO(kradalby): In principle we should call the AutoApprove logic here // because even if the CLI removes an auto-approved route, it will be added // back automatically. n, ok := s.nodeStore.UpdateNode(nodeID, func(node *types.Node) { node.ApprovedRoutes = routes }) if !ok { return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID) } // Persist the node changes to the database nodeView, c, err := s.persistNodeToDB(n) if err != nil { return types.NodeView{}, change.EmptySet, err } // Update primary routes table based on SubnetRoutes (intersection of announced and approved). // The primary routes table is what the mapper uses to generate network maps, so updating it // here ensures that route changes are distributed to peers. routeChange := s.primaryRoutes.SetRoutes(nodeID, nodeView.SubnetRoutes()...) // If routes changed or the changeset isn't already a full update, trigger a policy change // to ensure all nodes get updated network maps if routeChange || !c.IsFull() { c = change.PolicyChange() } return nodeView, c, nil } // RenameNode changes the display name of a node. func (s *State) RenameNode(nodeID types.NodeID, newName string) (types.NodeView, change.ChangeSet, error) { if err := util.ValidateHostname(newName); err != nil { return types.NodeView{}, change.EmptySet, fmt.Errorf("renaming node: %w", err) } // Check name uniqueness against NodeStore allNodes := s.nodeStore.ListNodes() for i := 0; i < allNodes.Len(); i++ { node := allNodes.At(i) if node.ID() != nodeID && node.AsStruct().GivenName == newName { return types.NodeView{}, change.EmptySet, fmt.Errorf("name is not unique: %s", newName) } } // Update NodeStore before database to ensure consistency. The NodeStore update is // blocking and will be the source of truth for the batcher. The database update must // make the exact same change. n, ok := s.nodeStore.UpdateNode(nodeID, func(node *types.Node) { node.GivenName = newName }) if !ok { return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID) } return s.persistNodeToDB(n) } // AssignNodeToUser transfers a node to a different user. func (s *State) AssignNodeToUser(nodeID types.NodeID, userID types.UserID) (types.NodeView, change.ChangeSet, error) { // Validate that both node and user exist _, found := s.GetNodeByID(nodeID) if !found { return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found: %d", nodeID) } user, err := s.GetUserByID(userID) if err != nil { return types.NodeView{}, change.EmptySet, fmt.Errorf("user not found: %w", err) } // Update NodeStore before database to ensure consistency. The NodeStore update is // blocking and will be the source of truth for the batcher. The database update must // make the exact same change. n, ok := s.nodeStore.UpdateNode(nodeID, func(n *types.Node) { n.User = *user n.UserID = uint(userID) }) if !ok { return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", nodeID) } return s.persistNodeToDB(n) } // BackfillNodeIPs assigns IP addresses to nodes that don't have them. func (s *State) BackfillNodeIPs() ([]string, error) { changes, err := s.db.BackfillNodeIPs(s.ipAlloc) if err != nil { return nil, err } // Refresh NodeStore after IP changes to ensure consistency if len(changes) > 0 { nodes, err := s.db.ListNodes() if err != nil { return changes, fmt.Errorf("failed to refresh NodeStore after IP backfill: %w", err) } for _, node := range nodes { // Preserve online status and NetInfo when refreshing from database existingNode, exists := s.nodeStore.GetNode(node.ID) if exists && existingNode.Valid() { node.IsOnline = ptr.To(existingNode.IsOnline().Get()) // TODO(kradalby): We should ensure we use the same hostinfo and node merge semantics // when a node re-registers as we do when it sends a map request (UpdateNodeFromMapRequest). // Preserve NetInfo from existing node to prevent loss during backfill netInfo := netInfoFromMapRequest(node.ID, existingNode.Hostinfo().AsStruct(), node.Hostinfo) node.Hostinfo = existingNode.Hostinfo().AsStruct() node.Hostinfo.NetInfo = netInfo } // TODO(kradalby): This should just update the IP addresses, nothing else in the node store. // We should avoid PutNode here. _ = s.nodeStore.PutNode(*node) } } return changes, nil } // ExpireExpiredNodes finds and processes expired nodes since the last check. // Returns next check time, state update with expired nodes, and whether any were found. func (s *State) ExpireExpiredNodes(lastCheck time.Time) (time.Time, []change.ChangeSet, bool) { // Why capture start time: We need to ensure we don't miss nodes that expire // while this function is running by using a consistent timestamp for the next check started := time.Now() var updates []change.ChangeSet for _, node := range s.nodeStore.ListNodes().All() { if !node.Valid() { continue } // Why check After(lastCheck): We only want to notify about nodes that // expired since the last check to avoid duplicate notifications if node.IsExpired() && node.Expiry().Valid() && node.Expiry().Get().After(lastCheck) { updates = append(updates, change.KeyExpiry(node.ID(), node.Expiry().Get())) } } if len(updates) > 0 { return started, updates, true } return started, nil, false } // SSHPolicy returns the SSH access policy for a node. func (s *State) SSHPolicy(node types.NodeView) (*tailcfg.SSHPolicy, error) { return s.polMan.SSHPolicy(node) } // Filter returns the current network filter rules and matches. func (s *State) Filter() ([]tailcfg.FilterRule, []matcher.Match) { return s.polMan.Filter() } // FilterForNode returns filter rules for a specific node, handling autogroup:self per-node. func (s *State) FilterForNode(node types.NodeView) ([]tailcfg.FilterRule, error) { return s.polMan.FilterForNode(node) } // MatchersForNode returns matchers for peer relationship determination (unreduced). func (s *State) MatchersForNode(node types.NodeView) ([]matcher.Match, error) { return s.polMan.MatchersForNode(node) } // NodeCanHaveTag checks if a node is allowed to have a specific tag. func (s *State) NodeCanHaveTag(node types.NodeView, tag string) bool { return s.polMan.NodeCanHaveTag(node, tag) } // SetPolicy updates the policy configuration. func (s *State) SetPolicy(pol []byte) (bool, error) { return s.polMan.SetPolicy(pol) } // AutoApproveRoutes checks if a node's routes should be auto-approved. // AutoApproveRoutes checks if any routes should be auto-approved for a node and updates them. func (s *State) AutoApproveRoutes(nv types.NodeView) bool { approved, changed := policy.ApproveRoutesWithPolicy(s.polMan, nv, nv.ApprovedRoutes().AsSlice(), nv.AnnouncedRoutes()) if changed { log.Debug(). Uint64("node.id", nv.ID().Uint64()). Str("node.name", nv.Hostname()). Strs("routes.announced", util.PrefixesToString(nv.AnnouncedRoutes())). Strs("routes.approved.old", util.PrefixesToString(nv.ApprovedRoutes().AsSlice())). Strs("routes.approved.new", util.PrefixesToString(approved)). Msg("Single node auto-approval detected route changes") // Persist the auto-approved routes to database and NodeStore via SetApprovedRoutes // This ensures consistency between database and NodeStore _, _, err := s.SetApprovedRoutes(nv.ID(), approved) if err != nil { log.Error(). Uint64("node.id", nv.ID().Uint64()). Str("node.name", nv.Hostname()). Err(err). Msg("Failed to persist auto-approved routes") return false } log.Info().Uint64("node.id", nv.ID().Uint64()).Str("node.name", nv.Hostname()).Strs("routes.approved", util.PrefixesToString(approved)).Msg("Routes approved") } return changed } // GetPolicy retrieves the current policy from the database. func (s *State) GetPolicy() (*types.Policy, error) { return s.db.GetPolicy() } // SetPolicyInDB stores policy data in the database. func (s *State) SetPolicyInDB(data string) (*types.Policy, error) { return s.db.SetPolicy(data) } // SetNodeRoutes sets the primary routes for a node. func (s *State) SetNodeRoutes(nodeID types.NodeID, routes ...netip.Prefix) change.ChangeSet { if s.primaryRoutes.SetRoutes(nodeID, routes...) { // Route changes affect packet filters for all nodes, so trigger a policy change // to ensure filters are regenerated across the entire network return change.PolicyChange() } return change.EmptySet } // GetNodePrimaryRoutes returns the primary routes for a node. func (s *State) GetNodePrimaryRoutes(nodeID types.NodeID) []netip.Prefix { return s.primaryRoutes.PrimaryRoutes(nodeID) } // PrimaryRoutesString returns a string representation of all primary routes. func (s *State) PrimaryRoutesString() string { return s.primaryRoutes.String() } // ValidateAPIKey checks if an API key is valid and active. func (s *State) ValidateAPIKey(keyStr string) (bool, error) { return s.db.ValidateAPIKey(keyStr) } // CreateAPIKey generates a new API key with optional expiration. func (s *State) CreateAPIKey(expiration *time.Time) (string, *types.APIKey, error) { return s.db.CreateAPIKey(expiration) } // GetAPIKey retrieves an API key by its prefix. func (s *State) GetAPIKey(prefix string) (*types.APIKey, error) { return s.db.GetAPIKey(prefix) } // ExpireAPIKey marks an API key as expired. func (s *State) ExpireAPIKey(key *types.APIKey) error { return s.db.ExpireAPIKey(key) } // ListAPIKeys returns all API keys in the system. func (s *State) ListAPIKeys() ([]types.APIKey, error) { return s.db.ListAPIKeys() } // DestroyAPIKey permanently removes an API key. func (s *State) DestroyAPIKey(key types.APIKey) error { return s.db.DestroyAPIKey(key) } // CreatePreAuthKey generates a new pre-authentication key for a user. func (s *State) CreatePreAuthKey(userID types.UserID, reusable bool, ephemeral bool, expiration *time.Time, aclTags []string) (*types.PreAuthKey, error) { return s.db.CreatePreAuthKey(userID, reusable, ephemeral, expiration, aclTags) } // Test helpers for the state layer // CreateUserForTest creates a test user. This is a convenience wrapper around the database layer. func (s *State) CreateUserForTest(name ...string) *types.User { return s.db.CreateUserForTest(name...) } // CreateNodeForTest creates a test node. This is a convenience wrapper around the database layer. func (s *State) CreateNodeForTest(user *types.User, hostname ...string) *types.Node { return s.db.CreateNodeForTest(user, hostname...) } // CreateRegisteredNodeForTest creates a test node with allocated IPs. This is a convenience wrapper around the database layer. func (s *State) CreateRegisteredNodeForTest(user *types.User, hostname ...string) *types.Node { return s.db.CreateRegisteredNodeForTest(user, hostname...) } // CreateNodesForTest creates multiple test nodes. This is a convenience wrapper around the database layer. func (s *State) CreateNodesForTest(user *types.User, count int, namePrefix ...string) []*types.Node { return s.db.CreateNodesForTest(user, count, namePrefix...) } // CreateUsersForTest creates multiple test users. This is a convenience wrapper around the database layer. func (s *State) CreateUsersForTest(count int, namePrefix ...string) []*types.User { return s.db.CreateUsersForTest(count, namePrefix...) } // DB returns the underlying database for testing purposes. func (s *State) DB() *hsdb.HSDatabase { return s.db } // GetPreAuthKey retrieves a pre-authentication key by ID. func (s *State) GetPreAuthKey(id string) (*types.PreAuthKey, error) { return s.db.GetPreAuthKey(id) } // ListPreAuthKeys returns all pre-authentication keys for a user. func (s *State) ListPreAuthKeys(userID types.UserID) ([]types.PreAuthKey, error) { return s.db.ListPreAuthKeys(userID) } // ExpirePreAuthKey marks a pre-authentication key as expired. func (s *State) ExpirePreAuthKey(preAuthKey *types.PreAuthKey) error { return s.db.ExpirePreAuthKey(preAuthKey) } // GetRegistrationCacheEntry retrieves a node registration from cache. func (s *State) GetRegistrationCacheEntry(id types.RegistrationID) (*types.RegisterNode, bool) { entry, found := s.registrationCache.Get(id) if !found { return nil, false } return &entry, true } // SetRegistrationCacheEntry stores a node registration in cache. func (s *State) SetRegistrationCacheEntry(id types.RegistrationID, entry types.RegisterNode) { s.registrationCache.Set(id, entry) } // logHostinfoValidation logs warnings when hostinfo is nil or has empty hostname. func logHostinfoValidation(machineKey, nodeKey, username, hostname string, hostinfo *tailcfg.Hostinfo) { if hostinfo == nil { log.Warn(). Caller(). Str("machine.key", machineKey). Str("node.key", nodeKey). Str("user.name", username). Str("generated.hostname", hostname). Msg("Registration had nil hostinfo, generated default hostname") } else if hostinfo.Hostname == "" { log.Warn(). Caller(). Str("machine.key", machineKey). Str("node.key", nodeKey). Str("user.name", username). Str("generated.hostname", hostname). Msg("Registration had empty hostname, generated default") } } // preserveNetInfo preserves NetInfo from an existing node for faster DERP connectivity. // If no existing node is provided, it creates new netinfo from the provided hostinfo. func preserveNetInfo(existingNode types.NodeView, nodeID types.NodeID, validHostinfo *tailcfg.Hostinfo) *tailcfg.NetInfo { var existingHostinfo *tailcfg.Hostinfo if existingNode.Valid() { existingHostinfo = existingNode.Hostinfo().AsStruct() } return netInfoFromMapRequest(nodeID, existingHostinfo, validHostinfo) } // newNodeParams contains parameters for creating a new node. type newNodeParams struct { User types.User MachineKey key.MachinePublic NodeKey key.NodePublic DiscoKey key.DiscoPublic Hostname string Hostinfo *tailcfg.Hostinfo Endpoints []netip.AddrPort Expiry *time.Time RegisterMethod string // Optional: Pre-auth key specific fields PreAuthKey *types.PreAuthKey // Optional: Existing node for netinfo preservation ExistingNodeForNetinfo types.NodeView } // createAndSaveNewNode creates a new node, allocates IPs, saves to DB, and adds to NodeStore. // It preserves netinfo from an existing node if one is provided (for faster DERP connectivity). func (s *State) createAndSaveNewNode(params newNodeParams) (types.NodeView, error) { // Preserve NetInfo from existing node if available if params.Hostinfo != nil { params.Hostinfo.NetInfo = preserveNetInfo( params.ExistingNodeForNetinfo, types.NodeID(0), params.Hostinfo, ) } // Prepare the node for registration nodeToRegister := types.Node{ Hostname: params.Hostname, UserID: params.User.ID, User: params.User, MachineKey: params.MachineKey, NodeKey: params.NodeKey, DiscoKey: params.DiscoKey, Hostinfo: params.Hostinfo, Endpoints: params.Endpoints, LastSeen: ptr.To(time.Now()), RegisterMethod: params.RegisterMethod, Expiry: params.Expiry, } // Pre-auth key specific fields if params.PreAuthKey != nil { nodeToRegister.ForcedTags = params.PreAuthKey.Proto().GetAclTags() nodeToRegister.AuthKey = params.PreAuthKey nodeToRegister.AuthKeyID = ¶ms.PreAuthKey.ID } // Allocate new IPs ipv4, ipv6, err := s.ipAlloc.Next() if err != nil { return types.NodeView{}, fmt.Errorf("allocating IPs: %w", err) } nodeToRegister.IPv4 = ipv4 nodeToRegister.IPv6 = ipv6 // Ensure unique given name if not set if nodeToRegister.GivenName == "" { givenName, err := hsdb.EnsureUniqueGivenName(s.db.DB, nodeToRegister.Hostname) if err != nil { return types.NodeView{}, fmt.Errorf("failed to ensure unique given name: %w", err) } nodeToRegister.GivenName = givenName } // New node - database first to get ID, then NodeStore savedNode, err := hsdb.Write(s.db.DB, func(tx *gorm.DB) (*types.Node, error) { if err := tx.Save(&nodeToRegister).Error; err != nil { return nil, fmt.Errorf("failed to save node: %w", err) } if params.PreAuthKey != nil && !params.PreAuthKey.Reusable { err := hsdb.UsePreAuthKey(tx, params.PreAuthKey) if err != nil { return nil, fmt.Errorf("using pre auth key: %w", err) } } return &nodeToRegister, nil }) if err != nil { return types.NodeView{}, err } // Add to NodeStore after database creates the ID return s.nodeStore.PutNode(*savedNode), nil } // HandleNodeFromAuthPath handles node registration through authentication flow (like OIDC). func (s *State) HandleNodeFromAuthPath( registrationID types.RegistrationID, userID types.UserID, expiry *time.Time, registrationMethod string, ) (types.NodeView, change.ChangeSet, error) { // Get the registration entry from cache regEntry, ok := s.GetRegistrationCacheEntry(registrationID) if !ok { return types.NodeView{}, change.EmptySet, hsdb.ErrNodeNotFoundRegistrationCache } // Get the user user, err := s.db.GetUserByID(userID) if err != nil { return types.NodeView{}, change.EmptySet, fmt.Errorf("failed to find user: %w", err) } // Ensure we have a valid hostname from the registration cache entry hostname := util.EnsureHostname( regEntry.Node.Hostinfo, regEntry.Node.MachineKey.String(), regEntry.Node.NodeKey.String(), ) // Ensure we have valid hostinfo validHostinfo := cmp.Or(regEntry.Node.Hostinfo, &tailcfg.Hostinfo{}) validHostinfo.Hostname = hostname logHostinfoValidation( regEntry.Node.MachineKey.ShortString(), regEntry.Node.NodeKey.String(), user.Username(), hostname, regEntry.Node.Hostinfo, ) var finalNode types.NodeView // Check if node already exists with same machine key for this user existingNodeSameUser, existsSameUser := s.nodeStore.GetNodeByMachineKey(regEntry.Node.MachineKey, types.UserID(user.ID)) // If this node exists for this user, update the node in place. if existsSameUser && existingNodeSameUser.Valid() { log.Debug(). Caller(). Str("registration_id", registrationID.String()). Str("user.name", user.Username()). Str("registrationMethod", registrationMethod). Str("node.name", existingNodeSameUser.Hostname()). Uint64("node.id", existingNodeSameUser.ID().Uint64()). Msg("Updating existing node registration") // Update existing node - NodeStore first, then database updatedNodeView, ok := s.nodeStore.UpdateNode(existingNodeSameUser.ID(), func(node *types.Node) { node.NodeKey = regEntry.Node.NodeKey node.DiscoKey = regEntry.Node.DiscoKey node.Hostname = hostname // TODO(kradalby): We should ensure we use the same hostinfo and node merge semantics // when a node re-registers as we do when it sends a map request (UpdateNodeFromMapRequest). // Preserve NetInfo from existing node when re-registering node.Hostinfo = validHostinfo node.Hostinfo.NetInfo = preserveNetInfo(existingNodeSameUser, existingNodeSameUser.ID(), validHostinfo) node.Endpoints = regEntry.Node.Endpoints node.RegisterMethod = regEntry.Node.RegisterMethod node.IsOnline = ptr.To(false) node.LastSeen = ptr.To(time.Now()) if expiry != nil { node.Expiry = expiry } else { node.Expiry = regEntry.Node.Expiry } }) if !ok { return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", existingNodeSameUser.ID()) } // Use the node from UpdateNode to save to database _, err = hsdb.Write(s.db.DB, func(tx *gorm.DB) (*types.Node, error) { if err := tx.Save(updatedNodeView.AsStruct()).Error; err != nil { return nil, fmt.Errorf("failed to save node: %w", err) } return nil, nil }) if err != nil { return types.NodeView{}, change.EmptySet, err } log.Trace(). Caller(). Str("node.name", updatedNodeView.Hostname()). Uint64("node.id", updatedNodeView.ID().Uint64()). Str("machine.key", regEntry.Node.MachineKey.ShortString()). Str("node.key", updatedNodeView.NodeKey().ShortString()). Str("user.name", user.Name). Msg("Node re-authorized") finalNode = updatedNodeView } else { // Node does not exist for this user with this machine key // Check if node exists with this machine key for a different user (for netinfo preservation) existingNodeAnyUser, existsAnyUser := s.nodeStore.GetNodeByMachineKeyAnyUser(regEntry.Node.MachineKey) if existsAnyUser && existingNodeAnyUser.Valid() && existingNodeAnyUser.UserID() != user.ID { // Node exists but belongs to a different user // Create a NEW node for the new user (do not transfer) // This allows the same machine to have separate node identities per user oldUser := existingNodeAnyUser.User() log.Info(). Caller(). Str("existing.node.name", existingNodeAnyUser.Hostname()). Uint64("existing.node.id", existingNodeAnyUser.ID().Uint64()). Str("machine.key", regEntry.Node.MachineKey.ShortString()). Str("old.user", oldUser.Username()). Str("new.user", user.Username()). Str("method", registrationMethod). Msg("Creating new node for different user (same machine key exists for another user)") } // Create a completely new node log.Debug(). Caller(). Str("registration_id", registrationID.String()). Str("user.name", user.Username()). Str("registrationMethod", registrationMethod). Str("expiresAt", fmt.Sprintf("%v", expiry)). Msg("Registering new node from auth callback") // Create and save new node var err error finalNode, err = s.createAndSaveNewNode(newNodeParams{ User: *user, MachineKey: regEntry.Node.MachineKey, NodeKey: regEntry.Node.NodeKey, DiscoKey: regEntry.Node.DiscoKey, Hostname: hostname, Hostinfo: validHostinfo, Endpoints: regEntry.Node.Endpoints, Expiry: cmp.Or(expiry, regEntry.Node.Expiry), RegisterMethod: registrationMethod, ExistingNodeForNetinfo: cmp.Or(existingNodeAnyUser, types.NodeView{}), }) if err != nil { return types.NodeView{}, change.EmptySet, err } } // Signal to waiting clients regEntry.SendAndClose(finalNode.AsStruct()) // Delete from registration cache s.registrationCache.Delete(registrationID) // Update policy managers usersChange, err := s.updatePolicyManagerUsers() if err != nil { return finalNode, change.NodeAdded(finalNode.ID()), fmt.Errorf("failed to update policy manager users: %w", err) } nodesChange, err := s.updatePolicyManagerNodes() if err != nil { return finalNode, change.NodeAdded(finalNode.ID()), fmt.Errorf("failed to update policy manager nodes: %w", err) } var c change.ChangeSet if !usersChange.Empty() || !nodesChange.Empty() { c = change.PolicyChange() } else { c = change.NodeAdded(finalNode.ID()) } return finalNode, c, nil } // HandleNodeFromPreAuthKey handles node registration using a pre-authentication key. func (s *State) HandleNodeFromPreAuthKey( regReq tailcfg.RegisterRequest, machineKey key.MachinePublic, ) (types.NodeView, change.ChangeSet, error) { pak, err := s.GetPreAuthKey(regReq.Auth.AuthKey) if err != nil { return types.NodeView{}, change.EmptySet, err } err = pak.Validate() if err != nil { return types.NodeView{}, change.EmptySet, err } // Ensure we have a valid hostname - handle nil/empty cases hostname := util.EnsureHostname( regReq.Hostinfo, machineKey.String(), regReq.NodeKey.String(), ) // Ensure we have valid hostinfo validHostinfo := cmp.Or(regReq.Hostinfo, &tailcfg.Hostinfo{}) validHostinfo.Hostname = hostname logHostinfoValidation( machineKey.ShortString(), regReq.NodeKey.ShortString(), pak.User.Username(), hostname, regReq.Hostinfo, ) log.Debug(). Caller(). Str("node.name", hostname). Str("machine.key", machineKey.ShortString()). Str("node.key", regReq.NodeKey.ShortString()). Str("user.name", pak.User.Username()). Msg("Registering node with pre-auth key") var finalNode types.NodeView // Check if node already exists with same machine key for this user existingNodeSameUser, existsSameUser := s.nodeStore.GetNodeByMachineKey(machineKey, types.UserID(pak.User.ID)) // If this node exists for this user, update the node in place. if existsSameUser && existingNodeSameUser.Valid() { log.Trace(). Caller(). Str("node.name", existingNodeSameUser.Hostname()). Uint64("node.id", existingNodeSameUser.ID().Uint64()). Str("machine.key", machineKey.ShortString()). Str("node.key", existingNodeSameUser.NodeKey().ShortString()). Str("user.name", pak.User.Username()). Msg("Node re-registering with existing machine key and user, updating in place") // Update existing node - NodeStore first, then database updatedNodeView, ok := s.nodeStore.UpdateNode(existingNodeSameUser.ID(), func(node *types.Node) { node.NodeKey = regReq.NodeKey node.Hostname = hostname // TODO(kradalby): We should ensure we use the same hostinfo and node merge semantics // when a node re-registers as we do when it sends a map request (UpdateNodeFromMapRequest). // Preserve NetInfo from existing node when re-registering node.Hostinfo = validHostinfo node.Hostinfo.NetInfo = preserveNetInfo(existingNodeSameUser, existingNodeSameUser.ID(), validHostinfo) node.RegisterMethod = util.RegisterMethodAuthKey // TODO(kradalby): This might need a rework as part of #2417 node.ForcedTags = pak.Proto().GetAclTags() node.AuthKey = pak node.AuthKeyID = &pak.ID node.IsOnline = ptr.To(false) node.LastSeen = ptr.To(time.Now()) // Update expiry, if it is zero, it means that the node will // not have an expiry anymore. If it is non-zero, we set that. node.Expiry = ®Req.Expiry }) if !ok { return types.NodeView{}, change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", existingNodeSameUser.ID()) } // Use the node from UpdateNode to save to database _, err = hsdb.Write(s.db.DB, func(tx *gorm.DB) (*types.Node, error) { if err := tx.Save(updatedNodeView.AsStruct()).Error; err != nil { return nil, fmt.Errorf("failed to save node: %w", err) } if !pak.Reusable { err = hsdb.UsePreAuthKey(tx, pak) if err != nil { return nil, fmt.Errorf("using pre auth key: %w", err) } } return nil, nil }) if err != nil { return types.NodeView{}, change.EmptySet, fmt.Errorf("writing node to database: %w", err) } log.Trace(). Caller(). Str("node.name", updatedNodeView.Hostname()). Uint64("node.id", updatedNodeView.ID().Uint64()). Str("machine.key", machineKey.ShortString()). Str("node.key", updatedNodeView.NodeKey().ShortString()). Str("user.name", pak.User.Username()). Msg("Node re-authorized") finalNode = updatedNodeView } else { // Node does not exist for this user with this machine key // Check if node exists with this machine key for a different user existingNodeAnyUser, existsAnyUser := s.nodeStore.GetNodeByMachineKeyAnyUser(machineKey) if existsAnyUser && existingNodeAnyUser.Valid() && existingNodeAnyUser.UserID() != pak.User.ID { // Node exists but belongs to a different user // Create a NEW node for the new user (do not transfer) // This allows the same machine to have separate node identities per user oldUser := existingNodeAnyUser.User() log.Info(). Caller(). Str("existing.node.name", existingNodeAnyUser.Hostname()). Uint64("existing.node.id", existingNodeAnyUser.ID().Uint64()). Str("machine.key", machineKey.ShortString()). Str("old.user", oldUser.Username()). Str("new.user", pak.User.Username()). Msg("Creating new node for different user (same machine key exists for another user)") } // This is a new node for this user - create it // (Either completely new, or new for this user while existing for another user) // Create and save new node var err error finalNode, err = s.createAndSaveNewNode(newNodeParams{ User: pak.User, MachineKey: machineKey, NodeKey: regReq.NodeKey, DiscoKey: key.DiscoPublic{}, // DiscoKey not available in RegisterRequest Hostname: hostname, Hostinfo: validHostinfo, Endpoints: nil, // Endpoints not available in RegisterRequest Expiry: ®Req.Expiry, RegisterMethod: util.RegisterMethodAuthKey, PreAuthKey: pak, ExistingNodeForNetinfo: cmp.Or(existingNodeAnyUser, types.NodeView{}), }) if err != nil { return types.NodeView{}, change.EmptySet, fmt.Errorf("creating new node: %w", err) } } // Update policy managers usersChange, err := s.updatePolicyManagerUsers() if err != nil { return finalNode, change.NodeAdded(finalNode.ID()), fmt.Errorf("failed to update policy manager users: %w", err) } nodesChange, err := s.updatePolicyManagerNodes() if err != nil { return finalNode, change.NodeAdded(finalNode.ID()), fmt.Errorf("failed to update policy manager nodes: %w", err) } var c change.ChangeSet if !usersChange.Empty() || !nodesChange.Empty() { c = change.PolicyChange() } else { c = change.NodeAdded(finalNode.ID()) } return finalNode, c, nil } // updatePolicyManagerUsers updates the policy manager with current users. // Returns true if the policy changed and notifications should be sent. // TODO(kradalby): This is a temporary stepping stone, ultimately we should // have the list already available so it could go much quicker. Alternatively // the policy manager could have a remove or add list for users. // updatePolicyManagerUsers refreshes the policy manager with current user data. func (s *State) updatePolicyManagerUsers() (change.ChangeSet, error) { users, err := s.ListAllUsers() if err != nil { return change.EmptySet, fmt.Errorf("listing users for policy update: %w", err) } log.Debug().Caller().Int("user.count", len(users)).Msg("Policy manager user update initiated because user list modification detected") changed, err := s.polMan.SetUsers(users) if err != nil { return change.EmptySet, fmt.Errorf("updating policy manager users: %w", err) } log.Debug().Caller().Bool("policy.changed", changed).Msg("Policy manager user update completed because SetUsers operation finished") if changed { return change.PolicyChange(), nil } return change.EmptySet, nil } // updatePolicyManagerNodes updates the policy manager with current nodes. // Returns true if the policy changed and notifications should be sent. // TODO(kradalby): This is a temporary stepping stone, ultimately we should // have the list already available so it could go much quicker. Alternatively // the policy manager could have a remove or add list for nodes. // updatePolicyManagerNodes refreshes the policy manager with current node data. func (s *State) updatePolicyManagerNodes() (change.ChangeSet, error) { nodes := s.ListNodes() changed, err := s.polMan.SetNodes(nodes) if err != nil { return change.EmptySet, fmt.Errorf("updating policy manager nodes: %w", err) } if changed { return change.PolicyChange(), nil } return change.EmptySet, nil } // PingDB checks if the database connection is healthy. func (s *State) PingDB(ctx context.Context) error { return s.db.PingDB(ctx) } // autoApproveNodes mass approves routes on all nodes. It is _only_ intended for // use when the policy is replaced. It is not sending or reporting any changes // or updates as we send full updates after replacing the policy. // TODO(kradalby): This is kind of messy, maybe this is another +1 // for an event bus. See example comments here. // autoApproveNodes automatically approves nodes based on policy rules. func (s *State) autoApproveNodes() ([]change.ChangeSet, error) { nodes := s.ListNodes() // Approve routes concurrently, this should make it likely // that the writes end in the same batch in the nodestore write. var errg errgroup.Group var cs []change.ChangeSet var mu sync.Mutex for _, nv := range nodes.All() { errg.Go(func() error { approved, changed := policy.ApproveRoutesWithPolicy(s.polMan, nv, nv.ApprovedRoutes().AsSlice(), nv.AnnouncedRoutes()) if changed { log.Debug(). Uint64("node.id", nv.ID().Uint64()). Str("node.name", nv.Hostname()). Strs("routes.approved.old", util.PrefixesToString(nv.ApprovedRoutes().AsSlice())). Strs("routes.approved.new", util.PrefixesToString(approved)). Msg("Routes auto-approved by policy") _, c, err := s.SetApprovedRoutes(nv.ID(), approved) if err != nil { return err } mu.Lock() cs = append(cs, c) mu.Unlock() } return nil }) } err := errg.Wait() if err != nil { return nil, err } return cs, nil } // UpdateNodeFromMapRequest processes a MapRequest and updates the node. // TODO(kradalby): This is essentially a patch update that could be sent directly to nodes, // which means we could shortcut the whole change thing if there are no other important updates. // When a field is added to this function, remember to also add it to: // - node.PeerChangeFromMapRequest // - node.ApplyPeerChange // - logTracePeerChange in poll.go. func (s *State) UpdateNodeFromMapRequest(id types.NodeID, req tailcfg.MapRequest) (change.ChangeSet, error) { log.Trace(). Caller(). Uint64("node.id", id.Uint64()). Interface("request", req). Msg("Processing MapRequest for node") var routeChange bool var hostinfoChanged bool var needsRouteApproval bool // We need to ensure we update the node as it is in the NodeStore at // the time of the request. updatedNode, ok := s.nodeStore.UpdateNode(id, func(currentNode *types.Node) { peerChange := currentNode.PeerChangeFromMapRequest(req) hostinfoChanged = !hostinfoEqual(currentNode.View(), req.Hostinfo) // Get the correct NetInfo to use netInfo := netInfoFromMapRequest(id, currentNode.Hostinfo, req.Hostinfo) if req.Hostinfo != nil { req.Hostinfo.NetInfo = netInfo } else { req.Hostinfo = &tailcfg.Hostinfo{NetInfo: netInfo} } // Re-check hostinfoChanged after potential NetInfo preservation hostinfoChanged = !hostinfoEqual(currentNode.View(), req.Hostinfo) // If there is no changes and nothing to save, // return early. if peerChangeEmpty(peerChange) && !hostinfoChanged { return } // Calculate route approval before NodeStore update to avoid calling View() inside callback var autoApprovedRoutes []netip.Prefix var hasNewRoutes bool if hi := req.Hostinfo; hi != nil { hasNewRoutes = len(hi.RoutableIPs) > 0 } needsRouteApproval = hostinfoChanged && (routesChanged(currentNode.View(), req.Hostinfo) || (hasNewRoutes && len(currentNode.ApprovedRoutes) == 0)) if needsRouteApproval { // Extract announced routes from request var announcedRoutes []netip.Prefix if req.Hostinfo != nil { announcedRoutes = req.Hostinfo.RoutableIPs } // Apply policy-based auto-approval if routes are announced if len(announcedRoutes) > 0 { autoApprovedRoutes, routeChange = policy.ApproveRoutesWithPolicy( s.polMan, currentNode.View(), currentNode.ApprovedRoutes, announcedRoutes, ) } } // Log when routes change but approval doesn't if hostinfoChanged && !routeChange { if hi := req.Hostinfo; hi != nil { if routesChanged(currentNode.View(), hi) { log.Debug(). Caller(). Uint64("node.id", id.Uint64()). Strs("oldAnnouncedRoutes", util.PrefixesToString(currentNode.AnnouncedRoutes())). Strs("newAnnouncedRoutes", util.PrefixesToString(hi.RoutableIPs)). Strs("approvedRoutes", util.PrefixesToString(currentNode.ApprovedRoutes)). Bool("routeChange", routeChange). Msg("announced routes changed but approved routes did not") } } } currentNode.ApplyPeerChange(&peerChange) if hostinfoChanged { // The node might not set NetInfo if it has not changed and if // the full HostInfo object is overwritten, the information is lost. // If there is no NetInfo, keep the previous one. // From 1.66 the client only sends it if changed: // https://github.com/tailscale/tailscale/commit/e1011f138737286ecf5123ff887a7a5800d129a2 // TODO(kradalby): evaluate if we need better comparing of hostinfo // before we take the changes. // NetInfo preservation has already been handled above before early return check currentNode.Hostinfo = req.Hostinfo currentNode.ApplyHostnameFromHostInfo(req.Hostinfo) if routeChange { // Apply pre-calculated route approval // Always apply the route approval result to ensure consistency, // regardless of whether the policy evaluation detected changes. // This fixes the bug where routes weren't properly cleared when // auto-approvers were removed from the policy. log.Info(). Uint64("node.id", id.Uint64()). Strs("oldApprovedRoutes", util.PrefixesToString(currentNode.ApprovedRoutes)). Strs("newApprovedRoutes", util.PrefixesToString(autoApprovedRoutes)). Bool("routeChanged", routeChange). Msg("applying route approval results") currentNode.ApprovedRoutes = autoApprovedRoutes } } }) if !ok { return change.EmptySet, fmt.Errorf("node not found in NodeStore: %d", id) } nodeRouteChange := change.EmptySet // Handle route changes after NodeStore update // We need to update node routes if either: // 1. The approved routes changed (routeChange is true), OR // 2. The announced routes changed (even if approved routes stayed the same) // This is because SubnetRoutes is the intersection of announced AND approved routes. needsRouteUpdate := false var routesChangedButNotApproved bool if hostinfoChanged && needsRouteApproval && !routeChange { if hi := req.Hostinfo; hi != nil { routesChangedButNotApproved = true } } if routeChange { needsRouteUpdate = true log.Debug(). Caller(). Uint64("node.id", id.Uint64()). Msg("updating routes because approved routes changed") } else if routesChangedButNotApproved { needsRouteUpdate = true log.Debug(). Caller(). Uint64("node.id", id.Uint64()). Msg("updating routes because announced routes changed but approved routes did not") } if needsRouteUpdate { // SetNodeRoutes sets the active/distributed routes, so we must use SubnetRoutes() // which returns only the intersection of announced AND approved routes. // Using AnnouncedRoutes() would bypass the security model and auto-approve everything. log.Debug(). Caller(). Uint64("node.id", id.Uint64()). Strs("announcedRoutes", util.PrefixesToString(updatedNode.AnnouncedRoutes())). Strs("approvedRoutes", util.PrefixesToString(updatedNode.ApprovedRoutes().AsSlice())). Strs("subnetRoutes", util.PrefixesToString(updatedNode.SubnetRoutes())). Msg("updating node routes for distribution") nodeRouteChange = s.SetNodeRoutes(id, updatedNode.SubnetRoutes()...) } _, policyChange, err := s.persistNodeToDB(updatedNode) if err != nil { return change.EmptySet, fmt.Errorf("saving to database: %w", err) } if policyChange.IsFull() { return policyChange, nil } if !nodeRouteChange.Empty() { return nodeRouteChange, nil } return change.NodeAdded(id), nil } func hostinfoEqual(oldNode types.NodeView, new *tailcfg.Hostinfo) bool { if !oldNode.Valid() && new == nil { return true } if !oldNode.Valid() || new == nil { return false } old := oldNode.AsStruct().Hostinfo return old.Equal(new) } func routesChanged(oldNode types.NodeView, new *tailcfg.Hostinfo) bool { var oldRoutes []netip.Prefix if oldNode.Valid() && oldNode.AsStruct().Hostinfo != nil { oldRoutes = oldNode.AsStruct().Hostinfo.RoutableIPs } newRoutes := new.RoutableIPs if newRoutes == nil { newRoutes = []netip.Prefix{} } tsaddr.SortPrefixes(oldRoutes) tsaddr.SortPrefixes(newRoutes) return !slices.Equal(oldRoutes, newRoutes) } func peerChangeEmpty(peerChange tailcfg.PeerChange) bool { return peerChange.Key == nil && peerChange.DiscoKey == nil && peerChange.Online == nil && peerChange.Endpoints == nil && peerChange.DERPRegion == 0 && peerChange.LastSeen == nil && peerChange.KeyExpiry == nil }