policy/v2: make default (#2546)

* policy/v2: make default

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

* integration: do not run v1 tests

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

* policy/v2: fix potential nil pointers

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

* mapper: fix test failures in v2

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>

---------

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
Kristoffer Dalby 2025-04-29 17:27:41 +03:00 committed by GitHub
parent 9a4d0e1a99
commit 2b38f7bef7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 35 additions and 202 deletions

View File

@ -71,5 +71,4 @@ func main() {
}
updateYAML(quotedTests, "./test-integration.yaml")
updateYAML(quotedTests, "./test-integration-policyv2.yaml")
}

View File

@ -1,169 +0,0 @@
name: Integration Tests (policy v2)
# To debug locally on a branch, and when needing secrets
# change this to include `push` so the build is ran on
# the main repository.
on: [pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
integration-test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
test:
- TestACLHostsInNetMapTable
- TestACLAllowUser80Dst
- TestACLDenyAllPort80
- TestACLAllowUserDst
- TestACLAllowStarDst
- TestACLNamedHostsCanReachBySubnet
- TestACLNamedHostsCanReach
- TestACLDevice1CanAccessDevice2
- TestPolicyUpdateWhileRunningWithCLIInDatabase
- TestAuthKeyLogoutAndReloginSameUser
- TestAuthKeyLogoutAndReloginNewUser
- TestAuthKeyLogoutAndReloginSameUserExpiredKey
- TestOIDCAuthenticationPingAll
- TestOIDCExpireNodesBasedOnTokenExpiry
- TestOIDC024UserCreation
- TestOIDCAuthenticationWithPKCE
- TestOIDCReloginSameNodeNewUser
- TestAuthWebFlowAuthenticationPingAll
- TestAuthWebFlowLogoutAndRelogin
- TestUserCommand
- TestPreAuthKeyCommand
- TestPreAuthKeyCommandWithoutExpiry
- TestPreAuthKeyCommandReusableEphemeral
- TestPreAuthKeyCorrectUserLoggedInCommand
- TestApiKeyCommand
- TestNodeTagCommand
- TestNodeAdvertiseTagCommand
- TestNodeCommand
- TestNodeExpireCommand
- TestNodeRenameCommand
- TestNodeMoveCommand
- TestPolicyCommand
- TestPolicyBrokenConfigCommand
- TestDERPVerifyEndpoint
- TestResolveMagicDNS
- TestResolveMagicDNSExtraRecordsPath
- TestValidateResolvConf
- TestDERPServerScenario
- TestDERPServerWebsocketScenario
- TestPingAllByIP
- TestPingAllByIPPublicDERP
- TestEphemeral
- TestEphemeralInAlternateTimezone
- TestEphemeral2006DeletedTooQuickly
- TestPingAllByHostname
- TestTaildrop
- TestUpdateHostnameFromClient
- TestExpireNode
- TestNodeOnlineStatus
- TestPingAllByIPManyUpDown
- Test2118DeletingOnlineNodePanics
- TestEnablingRoutes
- TestHASubnetRouterFailover
- TestSubnetRouteACL
- TestEnablingExitRoutes
- TestSubnetRouterMultiNetwork
- TestSubnetRouterMultiNetworkExitNode
- TestAutoApproveMultiNetwork
- TestHeadscale
- TestTailscaleNodesJoiningHeadcale
- TestSSHOneUserToAll
- TestSSHMultipleUsersAllToAll
- TestSSHNoSSHConfigured
- TestSSHIsBlockedInACL
- TestSSHUserOnlyIsolation
database: [postgres, sqlite]
env:
# Github does not allow us to access secrets in pull requests,
# so this env var is used to check if we have the secret or not.
# If we have the secrets, meaning we are running on push in a fork,
# there might be secrets available for more debugging.
# If TS_OAUTH_CLIENT_ID and TS_OAUTH_SECRET is set, then the job
# will join a debug tailscale network, set up SSH and a tmux session.
# The SSH will be configured to use the SSH key of the Github user
# that triggered the build.
HAS_TAILSCALE_SECRET: ${{ secrets.TS_OAUTH_CLIENT_ID }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Get changed files
id: changed-files
uses: dorny/paths-filter@v3
with:
filters: |
files:
- '*.nix'
- 'go.*'
- '**/*.go'
- 'integration_test/'
- 'config-example.yaml'
- name: Tailscale
if: ${{ env.HAS_TAILSCALE_SECRET }}
uses: tailscale/github-action@v2
with:
oauth-client-id: ${{ secrets.TS_OAUTH_CLIENT_ID }}
oauth-secret: ${{ secrets.TS_OAUTH_SECRET }}
tags: tag:gh
- name: Setup SSH server for Actor
if: ${{ env.HAS_TAILSCALE_SECRET }}
uses: alexellis/setup-sshd-actor@master
- uses: DeterminateSystems/nix-installer-action@main
if: steps.changed-files.outputs.files == 'true'
- uses: DeterminateSystems/magic-nix-cache-action@main
if: steps.changed-files.outputs.files == 'true'
- uses: satackey/action-docker-layer-caching@main
if: steps.changed-files.outputs.files == 'true'
continue-on-error: true
- name: Run Integration Test
uses: Wandalen/wretry.action@master
if: steps.changed-files.outputs.files == 'true'
env:
USE_POSTGRES: ${{ matrix.database == 'postgres' && '1' || '0' }}
with:
# Our integration tests are started like a thundering herd, often
# hitting limits of the various external repositories we depend on
# like docker hub. This will retry jobs every 5 min, 10 times,
# hopefully letting us avoid manual intervention and restarting jobs.
# One could of course argue that we should invest in trying to avoid
# this, but currently it seems like a larger investment to be cleverer
# about this.
# Some of the jobs might still require manual restart as they are really
# slow and this will cause them to eventually be killed by Github actions.
attempt_delay: 300000 # 5 min
attempt_limit: 10
command: |
nix develop --command -- docker run \
--tty --rm \
--volume ~/.cache/hs-integration-go:/go \
--name headscale-test-suite \
--volume $PWD:$PWD -w $PWD/integration \
--volume /var/run/docker.sock:/var/run/docker.sock \
--volume $PWD/control_logs:/tmp/control \
--env HEADSCALE_INTEGRATION_POSTGRES=${{env.USE_POSTGRES}} \
--env HEADSCALE_EXPERIMENTAL_POLICY_V2=1 \
golang:1 \
go run gotest.tools/gotestsum@latest -- ./... \
-failfast \
-timeout 120m \
-parallel 1 \
-run "^${{ matrix.test }}$"
- uses: actions/upload-artifact@v4
if: always() && steps.changed-files.outputs.files == 'true'
with:
name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-logs
path: "control_logs/*.log"
- uses: actions/upload-artifact@v4
if: always() && steps.changed-files.outputs.files == 'true'
with:
name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-pprof
path: "control_logs/*.pprof.tar"
- name: Setup a blocking tmux session
if: ${{ env.HAS_TAILSCALE_SECRET }}
uses: alexellis/block-with-tmux-action@master

View File

@ -136,7 +136,7 @@ jobs:
# about this.
# Some of the jobs might still require manual restart as they are really
# slow and this will cause them to eventually be killed by Github actions.
attempt_delay: 300000 # 5 min
attempt_delay: 300000 # 5 min
attempt_limit: 10
command: |
nix develop --command -- docker run \
@ -147,7 +147,6 @@ jobs:
--volume /var/run/docker.sock:/var/run/docker.sock \
--volume $PWD/control_logs:/tmp/control \
--env HEADSCALE_INTEGRATION_POSTGRES=${{env.USE_POSTGRES}} \
--env HEADSCALE_EXPERIMENTAL_POLICY_V2=0 \
golang:1 \
go run gotest.tools/gotestsum@latest -- ./... \
-failfast \
@ -157,12 +156,12 @@ jobs:
- uses: actions/upload-artifact@v4
if: always() && steps.changed-files.outputs.files == 'true'
with:
name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-logs
name: ${{ matrix.test }}-${{matrix.database}}-logs
path: "control_logs/*.log"
- uses: actions/upload-artifact@v4
if: always() && steps.changed-files.outputs.files == 'true'
with:
name: ${{ matrix.test }}-${{matrix.database}}-${{matrix.policy}}-pprof
name: ${{ matrix.test }}-${{matrix.database}}-pprof
path: "control_logs/*.pprof.tar"
- name: Setup a blocking tmux session
if: ${{ env.HAS_TAILSCALE_SECRET }}

View File

@ -4,6 +4,8 @@
### BREAKING
#### Routes
Route internals have been rewritten, removing the dedicated route table in the
database. This was done to simplify the codebase, which had grown unnecessarily
complex after the routes were split into separate tables. The overhead of having
@ -35,14 +37,15 @@ will be approved.
- Routes are now managed via the Node API
[#2422](https://github.com/juanfont/headscale/pull/2422)
### Experimental Policy v2
#### Policy v2
This release introduces a new experimental version of Headscales policy
implementation. In this context, experimental means that the feature is not yet
fully tested and may contain bugs or unexpected behavior and that we are still
experimenting with how the final interface/behavior will be.
This release introduces a new policy implementation. The new policy is a
complete rewrite, and it introduces some significant quality and consistency
improvements. In principle, there are not really any new features, but some long
standing bugs should have been resolved, or be easier to fix in the future. The
new policy code passes all of our tests.
#### Breaking changes
**Changes**
- The policy is validated and "resolved" when loading, providing errors for
invalid rules and conditions.
@ -59,19 +62,14 @@ experimenting with how the final interface/behavior will be.
`@` should be appended at the end. For example, if your user is `john`, it
must be written as `john@` in the policy.
#### Current state
**Current state**
The new policy is passing all tests, both integration and unit tests. This does
not mean it is perfect, but it is a good start. Corner cases that is currently
working in v1 and not tested might be broken in v2 (and vice versa).
**We do need help testing this code**, and we think that most of the user facing
API will not really change. We are not sure yet when this code will replace v1,
but we are confident that it will, and all new changes and fixes will be made
towards this code.
**We do need help testing this code**
The new policy can be used by setting the environment variable
`HEADSCALE_EXPERIMENTAL_POLICY_V2` to `1`.
#### Other breaking

View File

@ -346,7 +346,7 @@ func Test_fullMapResponse(t *testing.T) {
{
"action": "accept",
"src": ["100.64.0.2"],
"dst": ["user1:*"],
"dst": ["user1@:*"],
},
],
}
@ -382,7 +382,7 @@ func Test_fullMapResponse(t *testing.T) {
},
},
},
SSHPolicy: &tailcfg.SSHPolicy{},
SSHPolicy: nil,
UserProfiles: []tailcfg.UserProfile{
{ID: tailcfg.UserID(user1.ID), LoginName: "user1", DisplayName: "user1"},
{ID: tailcfg.UserID(user2.ID), LoginName: "user2", DisplayName: "user2"},

View File

@ -11,7 +11,7 @@ import (
)
var (
polv2 = envknob.Bool("HEADSCALE_EXPERIMENTAL_POLICY_V2")
polv1 = envknob.Bool("HEADSCALE_POLICY_V1")
)
type PolicyManager interface {
@ -35,13 +35,13 @@ type PolicyManager interface {
func NewPolicyManager(pol []byte, users []types.User, nodes types.Nodes) (PolicyManager, error) {
var polMan PolicyManager
var err error
if polv2 {
polMan, err = policyv2.NewPolicyManager(pol, users, nodes)
if polv1 {
polMan, err = policyv1.NewPolicyManager(pol, users, nodes)
if err != nil {
return nil, err
}
} else {
polMan, err = policyv1.NewPolicyManager(pol, users, nodes)
polMan, err = policyv2.NewPolicyManager(pol, users, nodes)
if err != nil {
return nil, err
}

View File

@ -38,7 +38,7 @@ func (pol *Policy) compileFilterRules(
log.Trace().Err(err).Msgf("resolving source ips")
}
if len(srcIPs.Prefixes()) == 0 {
if srcIPs == nil || len(srcIPs.Prefixes()) == 0 {
continue
}
@ -56,6 +56,10 @@ func (pol *Policy) compileFilterRules(
log.Trace().Err(err).Msgf("resolving destination ips")
}
if ips == nil {
continue
}
for _, pref := range ips.Prefixes() {
for _, port := range dest.Ports {
pr := tailcfg.NetPortRange{
@ -162,6 +166,10 @@ func (pol *Policy) compileSSHPolicy(
func ipSetToPrefixStringList(ips *netipx.IPSet) []string {
var out []string
if ips == nil {
return out
}
for _, pref := range ips.Prefixes() {
out = append(out, pref.String())
}

View File

@ -70,7 +70,6 @@ type HeadscaleInContainer struct {
tlsKey []byte
filesInContainer []fileInContainer
postgres bool
policyV2 bool
policyMode types.PolicyMode
}
@ -188,11 +187,10 @@ func WithPostgres() Option {
}
}
// WithPolicyV2 tells the integration test to use the new v2 filter.
func WithPolicyV2() Option {
// WithPolicyV1 tells the integration test to use the old v1 filter.
func WithPolicyV1() Option {
return func(hsic *HeadscaleInContainer) {
hsic.policyV2 = true
hsic.env["HEADSCALE_EXPERIMENTAL_POLICY_V2"] = "1"
hsic.env["HEADSCALE_POLICY_V1"] = "1"
}
}

View File

@ -47,7 +47,7 @@ const (
)
var usePostgresForTest = envknob.Bool("HEADSCALE_INTEGRATION_POSTGRES")
var usePolicyV2ForTest = envknob.Bool("HEADSCALE_EXPERIMENTAL_POLICY_V2")
var usePolicyV1ForTest = envknob.Bool("HEADSCALE_POLICY_V1")
var (
errNoHeadscaleAvailable = errors.New("no headscale available")
@ -408,8 +408,8 @@ func (s *Scenario) Headscale(opts ...hsic.Option) (ControlServer, error) {
opts = append(opts, hsic.WithPostgres())
}
if usePolicyV2ForTest {
opts = append(opts, hsic.WithPolicyV2())
if usePolicyV1ForTest {
opts = append(opts, hsic.WithPolicyV1())
}
headscale, err := hsic.New(s.pool, s.Networks(), opts...)