From 2b38f7bef7e7302a9abd7e44a828080b63b1aad9 Mon Sep 17 00:00:00 2001 From: Kristoffer Dalby Date: Tue, 29 Apr 2025 17:27:41 +0300 Subject: [PATCH] policy/v2: make default (#2546) * policy/v2: make default Signed-off-by: Kristoffer Dalby * integration: do not run v1 tests Signed-off-by: Kristoffer Dalby * policy/v2: fix potential nil pointers Signed-off-by: Kristoffer Dalby * mapper: fix test failures in v2 Signed-off-by: Kristoffer Dalby --------- Signed-off-by: Kristoffer Dalby --- .../gh-action-integration-generator.go | 1 - .../workflows/test-integration-policyv2.yaml | 169 ------------------ .github/workflows/test-integration.yaml | 7 +- CHANGELOG.md | 24 ++- hscontrol/mapper/mapper_test.go | 4 +- hscontrol/policy/pm.go | 8 +- hscontrol/policy/v2/filter.go | 10 +- integration/hsic/hsic.go | 8 +- integration/scenario.go | 6 +- 9 files changed, 35 insertions(+), 202 deletions(-) delete mode 100644 .github/workflows/test-integration-policyv2.yaml diff --git a/.github/workflows/gh-action-integration-generator.go b/.github/workflows/gh-action-integration-generator.go index 471e3589..f94753b0 100644 --- a/.github/workflows/gh-action-integration-generator.go +++ b/.github/workflows/gh-action-integration-generator.go @@ -71,5 +71,4 @@ func main() { } updateYAML(quotedTests, "./test-integration.yaml") - updateYAML(quotedTests, "./test-integration-policyv2.yaml") } diff --git a/.github/workflows/test-integration-policyv2.yaml b/.github/workflows/test-integration-policyv2.yaml deleted file mode 100644 index c334a5a7..00000000 --- a/.github/workflows/test-integration-policyv2.yaml +++ /dev/null @@ -1,169 +0,0 @@ -name: Integration Tests (policy v2) -# To debug locally on a branch, and when needing secrets -# change this to include `push` so the build is ran on -# the main repository. -on: [pull_request] -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true -jobs: - integration-test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - test: - - TestACLHostsInNetMapTable - - TestACLAllowUser80Dst - - TestACLDenyAllPort80 - - TestACLAllowUserDst - - TestACLAllowStarDst - - TestACLNamedHostsCanReachBySubnet - - TestACLNamedHostsCanReach - - TestACLDevice1CanAccessDevice2 - - TestPolicyUpdateWhileRunningWithCLIInDatabase - - TestAuthKeyLogoutAndReloginSameUser - - TestAuthKeyLogoutAndReloginNewUser - - TestAuthKeyLogoutAndReloginSameUserExpiredKey - - TestOIDCAuthenticationPingAll - - TestOIDCExpireNodesBasedOnTokenExpiry - - TestOIDC024UserCreation - - TestOIDCAuthenticationWithPKCE - - TestOIDCReloginSameNodeNewUser - - TestAuthWebFlowAuthenticationPingAll - - TestAuthWebFlowLogoutAndRelogin - - TestUserCommand - - TestPreAuthKeyCommand - - TestPreAuthKeyCommandWithoutExpiry - - TestPreAuthKeyCommandReusableEphemeral - - TestPreAuthKeyCorrectUserLoggedInCommand - - TestApiKeyCommand - - TestNodeTagCommand - - TestNodeAdvertiseTagCommand - - TestNodeCommand - - TestNodeExpireCommand - - TestNodeRenameCommand - - TestNodeMoveCommand - - TestPolicyCommand - - TestPolicyBrokenConfigCommand - - TestDERPVerifyEndpoint - - TestResolveMagicDNS - - TestResolveMagicDNSExtraRecordsPath - - TestValidateResolvConf - - TestDERPServerScenario - - TestDERPServerWebsocketScenario - - TestPingAllByIP - - TestPingAllByIPPublicDERP - - TestEphemeral - - TestEphemeralInAlternateTimezone - - TestEphemeral2006DeletedTooQuickly - - TestPingAllByHostname - - TestTaildrop - - TestUpdateHostnameFromClient - - TestExpireNode - - TestNodeOnlineStatus - - TestPingAllByIPManyUpDown - - Test2118DeletingOnlineNodePanics - - TestEnablingRoutes - - TestHASubnetRouterFailover - - TestSubnetRouteACL - - TestEnablingExitRoutes - - TestSubnetRouterMultiNetwork - - TestSubnetRouterMultiNetworkExitNode - - TestAutoApproveMultiNetwork - - TestHeadscale - - TestTailscaleNodesJoiningHeadcale - - TestSSHOneUserToAll - - TestSSHMultipleUsersAllToAll - - TestSSHNoSSHConfigured - - TestSSHIsBlockedInACL - - TestSSHUserOnlyIsolation - database: [postgres, sqlite] - env: - # Github does not allow us to access secrets in pull requests, - # so this env var is used to check if we have the secret or not. - # If we have the secrets, meaning we are running on push in a fork, - # there might be secrets available for more debugging. - # If TS_OAUTH_CLIENT_ID and TS_OAUTH_SECRET is set, then the job - # will join a debug tailscale network, set up SSH and a tmux session. - # The SSH will be configured to use the SSH key of the Github user - # that triggered the build. - HAS_TAILSCALE_SECRET: ${{ secrets.TS_OAUTH_CLIENT_ID }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 2 - - name: Get changed files - id: changed-files - uses: dorny/paths-filter@v3 - with: - filters: | - files: - - '*.nix' - - 'go.*' - - '**/*.go' - - 'integration_test/' - - 'config-example.yaml' - - name: Tailscale - if: ${{ env.HAS_TAILSCALE_SECRET }} - uses: tailscale/github-action@v2 - with: - oauth-client-id: ${{ secrets.TS_OAUTH_CLIENT_ID }} - oauth-secret: ${{ secrets.TS_OAUTH_SECRET }} - tags: tag:gh - - name: Setup SSH server for Actor - if: ${{ env.HAS_TAILSCALE_SECRET }} - uses: alexellis/setup-sshd-actor@master - - uses: DeterminateSystems/nix-installer-action@main - if: steps.changed-files.outputs.files == 'true' - - uses: DeterminateSystems/magic-nix-cache-action@main - if: steps.changed-files.outputs.files == 'true' - - uses: satackey/action-docker-layer-caching@main - if: steps.changed-files.outputs.files == 'true' - continue-on-error: true - - name: Run Integration Test - uses: Wandalen/wretry.action@master - if: steps.changed-files.outputs.files == 'true' - env: - USE_POSTGRES: ${{ matrix.database == 'postgres' && '1' || '0' }} - with: - # Our integration tests are started like a thundering herd, often - # hitting limits of the various external repositories we depend on - # like docker hub. This will retry jobs every 5 min, 10 times, - # hopefully letting us avoid manual intervention and restarting jobs. - # One could of course argue that we should invest in trying to avoid - # this, but currently it seems like a larger investment to be cleverer - # about this. - # Some of the jobs might still require manual restart as they are really - # slow and this will cause them to eventually be killed by Github actions. - attempt_delay: 300000 # 5 min - attempt_limit: 10 - command: | - nix develop --command -- docker run \ - --tty --rm \ - --volume ~/.cache/hs-integration-go:/go \ - --name headscale-test-suite \ - --volume $PWD:$PWD -w $PWD/integration \ - --volume /var/run/docker.sock:/var/run/docker.sock \ - --volume $PWD/control_logs:/tmp/control \ - --env HEADSCALE_INTEGRATION_POSTGRES=${{env.USE_POSTGRES}} \ - --env HEADSCALE_EXPERIMENTAL_POLICY_V2=1 \ - golang:1 \ - go run gotest.tools/gotestsum@latest -- ./... \ - -failfast \ - -timeout 120m \ - -parallel 1 \ - -run "^${{ matrix.test }}$" - - uses: actions/upload-artifact@v4 - if: always() && steps.changed-files.outputs.files == 'true' - with: - name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-logs - path: "control_logs/*.log" - - uses: actions/upload-artifact@v4 - if: always() && steps.changed-files.outputs.files == 'true' - with: - name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-pprof - path: "control_logs/*.pprof.tar" - - name: Setup a blocking tmux session - if: ${{ env.HAS_TAILSCALE_SECRET }} - uses: alexellis/block-with-tmux-action@master diff --git a/.github/workflows/test-integration.yaml b/.github/workflows/test-integration.yaml index ba2a4e2e..cfe220ab 100644 --- a/.github/workflows/test-integration.yaml +++ b/.github/workflows/test-integration.yaml @@ -136,7 +136,7 @@ jobs: # about this. # Some of the jobs might still require manual restart as they are really # slow and this will cause them to eventually be killed by Github actions. - attempt_delay: 300000 # 5 min + attempt_delay: 300000 # 5 min attempt_limit: 10 command: | nix develop --command -- docker run \ @@ -147,7 +147,6 @@ jobs: --volume /var/run/docker.sock:/var/run/docker.sock \ --volume $PWD/control_logs:/tmp/control \ --env HEADSCALE_INTEGRATION_POSTGRES=${{env.USE_POSTGRES}} \ - --env HEADSCALE_EXPERIMENTAL_POLICY_V2=0 \ golang:1 \ go run gotest.tools/gotestsum@latest -- ./... \ -failfast \ @@ -157,12 +156,12 @@ jobs: - uses: actions/upload-artifact@v4 if: always() && steps.changed-files.outputs.files == 'true' with: - name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-logs + name: ${{ matrix.test }}-${{matrix.database}}-logs path: "control_logs/*.log" - uses: actions/upload-artifact@v4 if: always() && steps.changed-files.outputs.files == 'true' with: - name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-pprof + name: ${{ matrix.test }}-${{matrix.database}}-pprof path: "control_logs/*.pprof.tar" - name: Setup a blocking tmux session if: ${{ env.HAS_TAILSCALE_SECRET }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 0eff4ad7..2149ebaa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### BREAKING +#### Routes + Route internals have been rewritten, removing the dedicated route table in the database. This was done to simplify the codebase, which had grown unnecessarily complex after the routes were split into separate tables. The overhead of having @@ -35,14 +37,15 @@ will be approved. - Routes are now managed via the Node API [#2422](https://github.com/juanfont/headscale/pull/2422) -### Experimental Policy v2 +#### Policy v2 -This release introduces a new experimental version of Headscales policy -implementation. In this context, experimental means that the feature is not yet -fully tested and may contain bugs or unexpected behavior and that we are still -experimenting with how the final interface/behavior will be. +This release introduces a new policy implementation. The new policy is a +complete rewrite, and it introduces some significant quality and consistency +improvements. In principle, there are not really any new features, but some long +standing bugs should have been resolved, or be easier to fix in the future. The +new policy code passes all of our tests. -#### Breaking changes +**Changes** - The policy is validated and "resolved" when loading, providing errors for invalid rules and conditions. @@ -59,19 +62,14 @@ experimenting with how the final interface/behavior will be. `@` should be appended at the end. For example, if your user is `john`, it must be written as `john@` in the policy. -#### Current state +**Current state** The new policy is passing all tests, both integration and unit tests. This does not mean it is perfect, but it is a good start. Corner cases that is currently working in v1 and not tested might be broken in v2 (and vice versa). -**We do need help testing this code**, and we think that most of the user facing -API will not really change. We are not sure yet when this code will replace v1, -but we are confident that it will, and all new changes and fixes will be made -towards this code. +**We do need help testing this code** -The new policy can be used by setting the environment variable -`HEADSCALE_EXPERIMENTAL_POLICY_V2` to `1`. #### Other breaking diff --git a/hscontrol/mapper/mapper_test.go b/hscontrol/mapper/mapper_test.go index ced0c9f4..5d718b54 100644 --- a/hscontrol/mapper/mapper_test.go +++ b/hscontrol/mapper/mapper_test.go @@ -346,7 +346,7 @@ func Test_fullMapResponse(t *testing.T) { { "action": "accept", "src": ["100.64.0.2"], - "dst": ["user1:*"], + "dst": ["user1@:*"], }, ], } @@ -382,7 +382,7 @@ func Test_fullMapResponse(t *testing.T) { }, }, }, - SSHPolicy: &tailcfg.SSHPolicy{}, + SSHPolicy: nil, UserProfiles: []tailcfg.UserProfile{ {ID: tailcfg.UserID(user1.ID), LoginName: "user1", DisplayName: "user1"}, {ID: tailcfg.UserID(user2.ID), LoginName: "user2", DisplayName: "user2"}, diff --git a/hscontrol/policy/pm.go b/hscontrol/policy/pm.go index 24f68ca1..29b55fc1 100644 --- a/hscontrol/policy/pm.go +++ b/hscontrol/policy/pm.go @@ -11,7 +11,7 @@ import ( ) var ( - polv2 = envknob.Bool("HEADSCALE_EXPERIMENTAL_POLICY_V2") + polv1 = envknob.Bool("HEADSCALE_POLICY_V1") ) type PolicyManager interface { @@ -35,13 +35,13 @@ type PolicyManager interface { func NewPolicyManager(pol []byte, users []types.User, nodes types.Nodes) (PolicyManager, error) { var polMan PolicyManager var err error - if polv2 { - polMan, err = policyv2.NewPolicyManager(pol, users, nodes) + if polv1 { + polMan, err = policyv1.NewPolicyManager(pol, users, nodes) if err != nil { return nil, err } } else { - polMan, err = policyv1.NewPolicyManager(pol, users, nodes) + polMan, err = policyv2.NewPolicyManager(pol, users, nodes) if err != nil { return nil, err } diff --git a/hscontrol/policy/v2/filter.go b/hscontrol/policy/v2/filter.go index 2d6c3f12..b94620a3 100644 --- a/hscontrol/policy/v2/filter.go +++ b/hscontrol/policy/v2/filter.go @@ -38,7 +38,7 @@ func (pol *Policy) compileFilterRules( log.Trace().Err(err).Msgf("resolving source ips") } - if len(srcIPs.Prefixes()) == 0 { + if srcIPs == nil || len(srcIPs.Prefixes()) == 0 { continue } @@ -56,6 +56,10 @@ func (pol *Policy) compileFilterRules( log.Trace().Err(err).Msgf("resolving destination ips") } + if ips == nil { + continue + } + for _, pref := range ips.Prefixes() { for _, port := range dest.Ports { pr := tailcfg.NetPortRange{ @@ -162,6 +166,10 @@ func (pol *Policy) compileSSHPolicy( func ipSetToPrefixStringList(ips *netipx.IPSet) []string { var out []string + if ips == nil { + return out + } + for _, pref := range ips.Prefixes() { out = append(out, pref.String()) } diff --git a/integration/hsic/hsic.go b/integration/hsic/hsic.go index f60889f4..3f622e36 100644 --- a/integration/hsic/hsic.go +++ b/integration/hsic/hsic.go @@ -70,7 +70,6 @@ type HeadscaleInContainer struct { tlsKey []byte filesInContainer []fileInContainer postgres bool - policyV2 bool policyMode types.PolicyMode } @@ -188,11 +187,10 @@ func WithPostgres() Option { } } -// WithPolicyV2 tells the integration test to use the new v2 filter. -func WithPolicyV2() Option { +// WithPolicyV1 tells the integration test to use the old v1 filter. +func WithPolicyV1() Option { return func(hsic *HeadscaleInContainer) { - hsic.policyV2 = true - hsic.env["HEADSCALE_EXPERIMENTAL_POLICY_V2"] = "1" + hsic.env["HEADSCALE_POLICY_V1"] = "1" } } diff --git a/integration/scenario.go b/integration/scenario.go index 94b37e16..5ad02708 100644 --- a/integration/scenario.go +++ b/integration/scenario.go @@ -47,7 +47,7 @@ const ( ) var usePostgresForTest = envknob.Bool("HEADSCALE_INTEGRATION_POSTGRES") -var usePolicyV2ForTest = envknob.Bool("HEADSCALE_EXPERIMENTAL_POLICY_V2") +var usePolicyV1ForTest = envknob.Bool("HEADSCALE_POLICY_V1") var ( errNoHeadscaleAvailable = errors.New("no headscale available") @@ -408,8 +408,8 @@ func (s *Scenario) Headscale(opts ...hsic.Option) (ControlServer, error) { opts = append(opts, hsic.WithPostgres()) } - if usePolicyV2ForTest { - opts = append(opts, hsic.WithPolicyV2()) + if usePolicyV1ForTest { + opts = append(opts, hsic.WithPolicyV1()) } headscale, err := hsic.New(s.pool, s.Networks(), opts...)