policy/v2: make default (#2546)

* policy/v2: make default

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

* integration: do not run v1 tests

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

* policy/v2: fix potential nil pointers

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

* mapper: fix test failures in v2

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

---------

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
Kristoffer Dalby 2025-04-29 17:27:41 +03:00 committed by GitHub
parent 9a4d0e1a99
commit 2b38f7bef7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 35 additions and 202 deletions

View File

@ -71,5 +71,4 @@ func main() {
} }
updateYAML(quotedTests, "./test-integration.yaml") updateYAML(quotedTests, "./test-integration.yaml")
updateYAML(quotedTests, "./test-integration-policyv2.yaml")
} }

View File

@ -1,169 +0,0 @@
name: Integration Tests (policy v2)
# To debug locally on a branch, and when needing secrets
# change this to include `push` so the build is ran on
# the main repository.
on: [pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
integration-test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
test:
- TestACLHostsInNetMapTable
- TestACLAllowUser80Dst
- TestACLDenyAllPort80
- TestACLAllowUserDst
- TestACLAllowStarDst
- TestACLNamedHostsCanReachBySubnet
- TestACLNamedHostsCanReach
- TestACLDevice1CanAccessDevice2
- TestPolicyUpdateWhileRunningWithCLIInDatabase
- TestAuthKeyLogoutAndReloginSameUser
- TestAuthKeyLogoutAndReloginNewUser
- TestAuthKeyLogoutAndReloginSameUserExpiredKey
- TestOIDCAuthenticationPingAll
- TestOIDCExpireNodesBasedOnTokenExpiry
- TestOIDC024UserCreation
- TestOIDCAuthenticationWithPKCE
- TestOIDCReloginSameNodeNewUser
- TestAuthWebFlowAuthenticationPingAll
- TestAuthWebFlowLogoutAndRelogin
- TestUserCommand
- TestPreAuthKeyCommand
- TestPreAuthKeyCommandWithoutExpiry
- TestPreAuthKeyCommandReusableEphemeral
- TestPreAuthKeyCorrectUserLoggedInCommand
- TestApiKeyCommand
- TestNodeTagCommand
- TestNodeAdvertiseTagCommand
- TestNodeCommand
- TestNodeExpireCommand
- TestNodeRenameCommand
- TestNodeMoveCommand
- TestPolicyCommand
- TestPolicyBrokenConfigCommand
- TestDERPVerifyEndpoint
- TestResolveMagicDNS
- TestResolveMagicDNSExtraRecordsPath
- TestValidateResolvConf
- TestDERPServerScenario
- TestDERPServerWebsocketScenario
- TestPingAllByIP
- TestPingAllByIPPublicDERP
- TestEphemeral
- TestEphemeralInAlternateTimezone
- TestEphemeral2006DeletedTooQuickly
- TestPingAllByHostname
- TestTaildrop
- TestUpdateHostnameFromClient
- TestExpireNode
- TestNodeOnlineStatus
- TestPingAllByIPManyUpDown
- Test2118DeletingOnlineNodePanics
- TestEnablingRoutes
- TestHASubnetRouterFailover
- TestSubnetRouteACL
- TestEnablingExitRoutes
- TestSubnetRouterMultiNetwork
- TestSubnetRouterMultiNetworkExitNode
- TestAutoApproveMultiNetwork
- TestHeadscale
- TestTailscaleNodesJoiningHeadcale
- TestSSHOneUserToAll
- TestSSHMultipleUsersAllToAll
- TestSSHNoSSHConfigured
- TestSSHIsBlockedInACL
- TestSSHUserOnlyIsolation
database: [postgres, sqlite]
env:
# Github does not allow us to access secrets in pull requests,
# so this env var is used to check if we have the secret or not.
# If we have the secrets, meaning we are running on push in a fork,
# there might be secrets available for more debugging.
# If TS_OAUTH_CLIENT_ID and TS_OAUTH_SECRET is set, then the job
# will join a debug tailscale network, set up SSH and a tmux session.
# The SSH will be configured to use the SSH key of the Github user
# that triggered the build.
HAS_TAILSCALE_SECRET: ${{ secrets.TS_OAUTH_CLIENT_ID }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Get changed files
id: changed-files
uses: dorny/paths-filter@v3
with:
filters: |
files:
- '*.nix'
- 'go.*'
- '**/*.go'
- 'integration_test/'
- 'config-example.yaml'
- name: Tailscale
if: ${{ env.HAS_TAILSCALE_SECRET }}
uses: tailscale/github-action@v2
with:
oauth-client-id: ${{ secrets.TS_OAUTH_CLIENT_ID }}
oauth-secret: ${{ secrets.TS_OAUTH_SECRET }}
tags: tag:gh
- name: Setup SSH server for Actor
if: ${{ env.HAS_TAILSCALE_SECRET }}
uses: alexellis/setup-sshd-actor@master
- uses: DeterminateSystems/nix-installer-action@main
if: steps.changed-files.outputs.files == 'true'
- uses: DeterminateSystems/magic-nix-cache-action@main
if: steps.changed-files.outputs.files == 'true'
- uses: satackey/action-docker-layer-caching@main
if: steps.changed-files.outputs.files == 'true'
continue-on-error: true
- name: Run Integration Test
uses: Wandalen/wretry.action@master
if: steps.changed-files.outputs.files == 'true'
env:
USE_POSTGRES: ${{ matrix.database == 'postgres' && '1' || '0' }}
with:
# Our integration tests are started like a thundering herd, often
# hitting limits of the various external repositories we depend on
# like docker hub. This will retry jobs every 5 min, 10 times,
# hopefully letting us avoid manual intervention and restarting jobs.
# One could of course argue that we should invest in trying to avoid
# this, but currently it seems like a larger investment to be cleverer
# about this.
# Some of the jobs might still require manual restart as they are really
# slow and this will cause them to eventually be killed by Github actions.
attempt_delay: 300000 # 5 min
attempt_limit: 10
command: |
nix develop --command -- docker run \
--tty --rm \
--volume ~/.cache/hs-integration-go:/go \
--name headscale-test-suite \
--volume $PWD:$PWD -w $PWD/integration \
--volume /var/run/docker.sock:/var/run/docker.sock \
--volume $PWD/control_logs:/tmp/control \
--env HEADSCALE_INTEGRATION_POSTGRES=${{env.USE_POSTGRES}} \
--env HEADSCALE_EXPERIMENTAL_POLICY_V2=1 \
golang:1 \
go run gotest.tools/gotestsum@latest -- ./... \
-failfast \
-timeout 120m \
-parallel 1 \
-run "^${{ matrix.test }}$"
- uses: actions/upload-artifact@v4
if: always() && steps.changed-files.outputs.files == 'true'
with:
name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-logs
path: "control_logs/*.log"
- uses: actions/upload-artifact@v4
if: always() && steps.changed-files.outputs.files == 'true'
with:
name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-pprof
path: "control_logs/*.pprof.tar"
- name: Setup a blocking tmux session
if: ${{ env.HAS_TAILSCALE_SECRET }}
uses: alexellis/block-with-tmux-action@master

View File

@ -136,7 +136,7 @@ jobs:
# about this. # about this.
# Some of the jobs might still require manual restart as they are really # Some of the jobs might still require manual restart as they are really
# slow and this will cause them to eventually be killed by Github actions. # slow and this will cause them to eventually be killed by Github actions.
attempt_delay: 300000 # 5 min attempt_delay: 300000 # 5 min
attempt_limit: 10 attempt_limit: 10
command: | command: |
nix develop --command -- docker run \ nix develop --command -- docker run \
@ -147,7 +147,6 @@ jobs:
--volume /var/run/docker.sock:/var/run/docker.sock \ --volume /var/run/docker.sock:/var/run/docker.sock \
--volume $PWD/control_logs:/tmp/control \ --volume $PWD/control_logs:/tmp/control \
--env HEADSCALE_INTEGRATION_POSTGRES=${{env.USE_POSTGRES}} \ --env HEADSCALE_INTEGRATION_POSTGRES=${{env.USE_POSTGRES}} \
--env HEADSCALE_EXPERIMENTAL_POLICY_V2=0 \
golang:1 \ golang:1 \
go run gotest.tools/gotestsum@latest -- ./... \ go run gotest.tools/gotestsum@latest -- ./... \
-failfast \ -failfast \
@ -157,12 +156,12 @@ jobs:
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
if: always() && steps.changed-files.outputs.files == 'true' if: always() && steps.changed-files.outputs.files == 'true'
with: with:
name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-logs name: ${{ matrix.test }}-${{matrix.database}}-logs
path: "control_logs/*.log" path: "control_logs/*.log"
- uses: actions/upload-artifact@v4 - uses: actions/upload-artifact@v4
if: always() && steps.changed-files.outputs.files == 'true' if: always() && steps.changed-files.outputs.files == 'true'
with: with:
name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-pprof name: ${{ matrix.test }}-${{matrix.database}}-pprof
path: "control_logs/*.pprof.tar" path: "control_logs/*.pprof.tar"
- name: Setup a blocking tmux session - name: Setup a blocking tmux session
if: ${{ env.HAS_TAILSCALE_SECRET }} if: ${{ env.HAS_TAILSCALE_SECRET }}

View File

@ -4,6 +4,8 @@
### BREAKING ### BREAKING
#### Routes
Route internals have been rewritten, removing the dedicated route table in the Route internals have been rewritten, removing the dedicated route table in the
database. This was done to simplify the codebase, which had grown unnecessarily database. This was done to simplify the codebase, which had grown unnecessarily
complex after the routes were split into separate tables. The overhead of having complex after the routes were split into separate tables. The overhead of having
@ -35,14 +37,15 @@ will be approved.
- Routes are now managed via the Node API - Routes are now managed via the Node API
[#2422](https://github.com/juanfont/headscale/pull/2422) [#2422](https://github.com/juanfont/headscale/pull/2422)
### Experimental Policy v2 #### Policy v2
This release introduces a new experimental version of Headscales policy This release introduces a new policy implementation. The new policy is a
implementation. In this context, experimental means that the feature is not yet complete rewrite, and it introduces some significant quality and consistency
fully tested and may contain bugs or unexpected behavior and that we are still improvements. In principle, there are not really any new features, but some long
experimenting with how the final interface/behavior will be. standing bugs should have been resolved, or be easier to fix in the future. The
new policy code passes all of our tests.
#### Breaking changes **Changes**
- The policy is validated and "resolved" when loading, providing errors for - The policy is validated and "resolved" when loading, providing errors for
invalid rules and conditions. invalid rules and conditions.
@ -59,19 +62,14 @@ experimenting with how the final interface/behavior will be.
`@` should be appended at the end. For example, if your user is `john`, it `@` should be appended at the end. For example, if your user is `john`, it
must be written as `john@` in the policy. must be written as `john@` in the policy.
#### Current state **Current state**
The new policy is passing all tests, both integration and unit tests. This does The new policy is passing all tests, both integration and unit tests. This does
not mean it is perfect, but it is a good start. Corner cases that is currently not mean it is perfect, but it is a good start. Corner cases that is currently
working in v1 and not tested might be broken in v2 (and vice versa). working in v1 and not tested might be broken in v2 (and vice versa).
**We do need help testing this code**, and we think that most of the user facing **We do need help testing this code**
API will not really change. We are not sure yet when this code will replace v1,
but we are confident that it will, and all new changes and fixes will be made
towards this code.
The new policy can be used by setting the environment variable
`HEADSCALE_EXPERIMENTAL_POLICY_V2` to `1`.
#### Other breaking #### Other breaking

View File

@ -346,7 +346,7 @@ func Test_fullMapResponse(t *testing.T) {
{ {
"action": "accept", "action": "accept",
"src": ["100.64.0.2"], "src": ["100.64.0.2"],
"dst": ["user1:*"], "dst": ["user1@:*"],
}, },
], ],
} }
@ -382,7 +382,7 @@ func Test_fullMapResponse(t *testing.T) {
}, },
}, },
}, },
SSHPolicy: &tailcfg.SSHPolicy{}, SSHPolicy: nil,
UserProfiles: []tailcfg.UserProfile{ UserProfiles: []tailcfg.UserProfile{
{ID: tailcfg.UserID(user1.ID), LoginName: "user1", DisplayName: "user1"}, {ID: tailcfg.UserID(user1.ID), LoginName: "user1", DisplayName: "user1"},
{ID: tailcfg.UserID(user2.ID), LoginName: "user2", DisplayName: "user2"}, {ID: tailcfg.UserID(user2.ID), LoginName: "user2", DisplayName: "user2"},

View File

@ -11,7 +11,7 @@ import (
) )
var ( var (
polv2 = envknob.Bool("HEADSCALE_EXPERIMENTAL_POLICY_V2") polv1 = envknob.Bool("HEADSCALE_POLICY_V1")
) )
type PolicyManager interface { type PolicyManager interface {
@ -35,13 +35,13 @@ type PolicyManager interface {
func NewPolicyManager(pol []byte, users []types.User, nodes types.Nodes) (PolicyManager, error) { func NewPolicyManager(pol []byte, users []types.User, nodes types.Nodes) (PolicyManager, error) {
var polMan PolicyManager var polMan PolicyManager
var err error var err error
if polv2 { if polv1 {
polMan, err = policyv2.NewPolicyManager(pol, users, nodes) polMan, err = policyv1.NewPolicyManager(pol, users, nodes)
if err != nil { if err != nil {
return nil, err return nil, err
} }
} else { } else {
polMan, err = policyv1.NewPolicyManager(pol, users, nodes) polMan, err = policyv2.NewPolicyManager(pol, users, nodes)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -38,7 +38,7 @@ func (pol *Policy) compileFilterRules(
log.Trace().Err(err).Msgf("resolving source ips") log.Trace().Err(err).Msgf("resolving source ips")
} }
if len(srcIPs.Prefixes()) == 0 { if srcIPs == nil || len(srcIPs.Prefixes()) == 0 {
continue continue
} }
@ -56,6 +56,10 @@ func (pol *Policy) compileFilterRules(
log.Trace().Err(err).Msgf("resolving destination ips") log.Trace().Err(err).Msgf("resolving destination ips")
} }
if ips == nil {
continue
}
for _, pref := range ips.Prefixes() { for _, pref := range ips.Prefixes() {
for _, port := range dest.Ports { for _, port := range dest.Ports {
pr := tailcfg.NetPortRange{ pr := tailcfg.NetPortRange{
@ -162,6 +166,10 @@ func (pol *Policy) compileSSHPolicy(
func ipSetToPrefixStringList(ips *netipx.IPSet) []string { func ipSetToPrefixStringList(ips *netipx.IPSet) []string {
var out []string var out []string
if ips == nil {
return out
}
for _, pref := range ips.Prefixes() { for _, pref := range ips.Prefixes() {
out = append(out, pref.String()) out = append(out, pref.String())
} }

View File

@ -70,7 +70,6 @@ type HeadscaleInContainer struct {
tlsKey []byte tlsKey []byte
filesInContainer []fileInContainer filesInContainer []fileInContainer
postgres bool postgres bool
policyV2 bool
policyMode types.PolicyMode policyMode types.PolicyMode
} }
@ -188,11 +187,10 @@ func WithPostgres() Option {
} }
} }
// WithPolicyV2 tells the integration test to use the new v2 filter. // WithPolicyV1 tells the integration test to use the old v1 filter.
func WithPolicyV2() Option { func WithPolicyV1() Option {
return func(hsic *HeadscaleInContainer) { return func(hsic *HeadscaleInContainer) {
hsic.policyV2 = true hsic.env["HEADSCALE_POLICY_V1"] = "1"
hsic.env["HEADSCALE_EXPERIMENTAL_POLICY_V2"] = "1"
} }
} }

View File

@ -47,7 +47,7 @@ const (
) )
var usePostgresForTest = envknob.Bool("HEADSCALE_INTEGRATION_POSTGRES") var usePostgresForTest = envknob.Bool("HEADSCALE_INTEGRATION_POSTGRES")
var usePolicyV2ForTest = envknob.Bool("HEADSCALE_EXPERIMENTAL_POLICY_V2") var usePolicyV1ForTest = envknob.Bool("HEADSCALE_POLICY_V1")
var ( var (
errNoHeadscaleAvailable = errors.New("no headscale available") errNoHeadscaleAvailable = errors.New("no headscale available")
@ -408,8 +408,8 @@ func (s *Scenario) Headscale(opts ...hsic.Option) (ControlServer, error) {
opts = append(opts, hsic.WithPostgres()) opts = append(opts, hsic.WithPostgres())
} }
if usePolicyV2ForTest { if usePolicyV1ForTest {
opts = append(opts, hsic.WithPolicyV2()) opts = append(opts, hsic.WithPolicyV1())
} }
headscale, err := hsic.New(s.pool, s.Networks(), opts...) headscale, err := hsic.New(s.pool, s.Networks(), opts...)