implement selfupdate and pass expiry (#1647)

This commit is contained in:
Kristoffer Dalby 2024-01-05 10:41:56 +01:00 committed by GitHub
parent a592ae56b4
commit 3b103280ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 114 additions and 52 deletions

View File

@ -26,7 +26,7 @@ after improving the test harness as part of adopting [#1460](https://github.com/
- Code reorganisation, a lot of code has moved, please review the following PRs accordingly [#1473](https://github.com/juanfont/headscale/pull/1473)
- API: Machine is now Node [#1553](https://github.com/juanfont/headscale/pull/1553)
- Remove support for older Tailscale clients [#1611](https://github.com/juanfont/headscale/pull/1611)
- The latest supported client is 1.32
- The latest supported client is 1.36
- Headscale checks that _at least_ one DERP is defined at start [#1564](https://github.com/juanfont/headscale/pull/1564)
- If no DERP is configured, the server will fail to start, this can be because it cannot load the DERPMap from file or url.
- Embedded DERP server requires a private key [#1611](https://github.com/juanfont/headscale/pull/1611)

View File

@ -340,6 +340,16 @@ func (hsdb *HSDatabase) nodeSetExpiry(node *types.Node, expiry time.Time) error
)
}
node.Expiry = &expiry
stateSelfUpdate := types.StateUpdate{
Type: types.StateSelfUpdate,
ChangeNodes: types.Nodes{node},
}
if stateSelfUpdate.Valid() {
hsdb.notifier.NotifyByMachineKey(stateSelfUpdate, node.MachineKey)
}
stateUpdate := types.StateUpdate{
Type: types.StatePeerChangedPatch,
ChangePatches: []*tailcfg.PeerChange{
@ -350,7 +360,7 @@ func (hsdb *HSDatabase) nodeSetExpiry(node *types.Node, expiry time.Time) error
},
}
if stateUpdate.Valid() {
hsdb.notifier.NotifyAll(stateUpdate)
hsdb.notifier.NotifyWithIgnore(stateUpdate, node.MachineKey.String())
}
return nil
@ -856,7 +866,7 @@ func (hsdb *HSDatabase) ExpireExpiredNodes(lastCheck time.Time) time.Time {
// checked everything.
started := time.Now()
expired := make([]*tailcfg.PeerChange, 0)
expiredNodes := make([]*types.Node, 0)
nodes, err := hsdb.listNodes()
if err != nil {
@ -872,17 +882,13 @@ func (hsdb *HSDatabase) ExpireExpiredNodes(lastCheck time.Time) time.Time {
// It will notify about all nodes that has been expired.
// It should only notify about expired nodes since _last check_.
node.Expiry.After(lastCheck) {
expired = append(expired, &tailcfg.PeerChange{
NodeID: tailcfg.NodeID(node.ID),
KeyExpiry: node.Expiry,
})
expiredNodes = append(expiredNodes, &nodes[index])
now := time.Now()
// Do not use setNodeExpiry as that has a notifier hook, which
// can cause a deadlock, we are updating all changed nodes later
// and there is no point in notifiying twice.
if err := hsdb.db.Model(nodes[index]).Updates(types.Node{
Expiry: &now,
Expiry: &started,
}).Error; err != nil {
log.Error().
Err(err).
@ -898,6 +904,15 @@ func (hsdb *HSDatabase) ExpireExpiredNodes(lastCheck time.Time) time.Time {
}
}
expired := make([]*tailcfg.PeerChange, len(expiredNodes))
for idx, node := range expiredNodes {
expired[idx] = &tailcfg.PeerChange{
NodeID: tailcfg.NodeID(node.ID),
KeyExpiry: &started,
}
}
// Inform the peers of a node with a lightweight update.
stateUpdate := types.StateUpdate{
Type: types.StatePeerChangedPatch,
ChangePatches: expired,
@ -906,5 +921,16 @@ func (hsdb *HSDatabase) ExpireExpiredNodes(lastCheck time.Time) time.Time {
hsdb.notifier.NotifyAll(stateUpdate)
}
// Inform the node itself that it has expired.
for _, node := range expiredNodes {
stateSelfUpdate := types.StateUpdate{
Type: types.StateSelfUpdate,
ChangeNodes: types.Nodes{node},
}
if stateSelfUpdate.Valid() {
hsdb.notifier.NotifyByMachineKey(stateSelfUpdate, node.MachineKey)
}
}
return started
}

View File

@ -21,7 +21,6 @@ import (
"github.com/juanfont/headscale/hscontrol/util"
"github.com/klauspost/compress/zstd"
"github.com/rs/zerolog/log"
"github.com/samber/lo"
"golang.org/x/exp/maps"
"tailscale.com/envknob"
"tailscale.com/smallzstd"
@ -459,6 +458,8 @@ func (m *Mapper) marshalMapResponse(
switch {
case resp.Peers != nil && len(resp.Peers) > 0:
responseType = "full"
case resp.Peers == nil && resp.PeersChanged == nil && resp.PeersChangedPatch == nil:
responseType = "lite"
case resp.PeersChanged != nil && len(resp.PeersChanged) > 0:
responseType = "changed"
case resp.PeersChangedPatch != nil && len(resp.PeersChangedPatch) > 0:
@ -593,15 +594,6 @@ func nodeMapToList(nodes map[uint64]*types.Node) types.Nodes {
return ret
}
func filterExpiredAndNotReady(peers types.Nodes) types.Nodes {
return lo.Filter(peers, func(item *types.Node, index int) bool {
// Filter out nodes that are expired OR
// nodes that has no endpoints, this typically means they have
// registered, but are not configured.
return !item.IsExpired() || len(item.Endpoints) > 0
})
}
// appendPeerChanges mutates a tailcfg.MapResponse with all the
// necessary changes when peers have changed.
func appendPeerChanges(
@ -627,9 +619,6 @@ func appendPeerChanges(
return err
}
// Filter out peers that have expired.
changed = filterExpiredAndNotReady(changed)
// If there are filter rules present, see if there are any nodes that cannot
// access eachother at all and remove them from the peers.
if len(rules) > 0 {

View File

@ -95,6 +95,21 @@ func (n *Notifier) NotifyWithIgnore(update types.StateUpdate, ignore ...string)
}
}
func (n *Notifier) NotifyByMachineKey(update types.StateUpdate, mKey key.MachinePublic) {
log.Trace().Caller().Interface("type", update.Type).Msg("acquiring lock to notify")
defer log.Trace().
Caller().
Interface("type", update.Type).
Msg("releasing lock, finished notifing")
n.l.RLock()
defer n.l.RUnlock()
if c, ok := n.nodes[mKey.String()]; ok {
c <- update
}
}
func (n *Notifier) String() string {
n.l.RLock()
defer n.l.RUnlock()

View File

@ -397,6 +397,14 @@ func (h *Headscale) handlePoll(
case types.StatePeerRemoved:
logInfo("Sending PeerRemoved MapResponse")
data, err = mapp.PeerRemovedResponse(mapRequest, node, update.Removed)
case types.StateSelfUpdate:
if len(update.ChangeNodes) == 1 {
logInfo("Sending SelfUpdate MapResponse")
node = update.ChangeNodes[0]
data, err = mapp.LiteMapResponse(mapRequest, node, h.ACLPolicy)
} else {
logInfo("SelfUpdate contained too many nodes, this is likely a bug in the code, please report.")
}
case types.StateDERPUpdated:
logInfo("Sending DERPUpdate MapResponse")
data, err = mapp.DERPMapResponse(mapRequest, node, update.DERPMap)

View File

@ -13,7 +13,7 @@ import (
)
const (
MinimumCapVersion tailcfg.CapabilityVersion = 36
MinimumCapVersion tailcfg.CapabilityVersion = 56
)
// NoisePollNetMapHandler takes care of /machine/:id/map using the Noise protocol

View File

@ -91,6 +91,12 @@ const (
StatePeerChanged
StatePeerChangedPatch
StatePeerRemoved
// StateSelfUpdate is used to indicate that the node
// has changed in control, and the client needs to be
// informed.
// The updated node is inside the ChangeNodes field
// which should have a length of one.
StateSelfUpdate
StateDERPUpdated
)
@ -142,6 +148,10 @@ func (su *StateUpdate) Valid() bool {
if su.Removed == nil {
panic("Mandatory field Removed is not set on StatePeerRemove update")
}
case StateSelfUpdate:
if su.ChangeNodes == nil || len(su.ChangeNodes) != 1 {
panic("Mandatory field ChangeNodes is not set for StateSelfUpdate or has more than one node")
}
case StateDERPUpdated:
if su.DERPMap == nil {
panic("Mandatory field DERPMap is not set on StateDERPUpdated update")

View File

@ -537,7 +537,7 @@ func TestExpireNode(t *testing.T) {
assertNoErr(t, err)
// Assert that we have the original count - self
assert.Len(t, status.Peers(), len(MustTestVersions)-1)
assert.Len(t, status.Peers(), spec["user1"]-1)
}
headscale, err := scenario.Headscale()
@ -560,7 +560,7 @@ func TestExpireNode(t *testing.T) {
t.Logf("Node %s with node_key %s has been expired", node.GetName(), expiredNodeKey.String())
time.Sleep(30 * time.Second)
time.Sleep(2 * time.Minute)
now := time.Now()
@ -572,21 +572,33 @@ func TestExpireNode(t *testing.T) {
if client.Hostname() != node.GetName() {
t.Logf("available peers of %s: %v", client.Hostname(), status.Peers())
// In addition to marking nodes expired, we filter them out during the map response
// this check ensures that the node is either not present, or that it is expired
// if it is in the map response.
// Ensures that the node is present, and that it is expired.
if peerStatus, ok := status.Peer[expiredNodeKey]; ok {
assertNotNil(t, peerStatus.Expired)
assert.Truef(t, peerStatus.KeyExpiry.Before(now), "node %s should have a key expire before %s, was %s", peerStatus.HostName, now.String(), peerStatus.KeyExpiry)
assert.Truef(t, peerStatus.Expired, "node %s should be expired, expired is %v", peerStatus.HostName, peerStatus.Expired)
assert.NotNil(t, peerStatus.KeyExpiry)
t.Logf("node %q should have a key expire before %s, was %s", peerStatus.HostName, now.String(), peerStatus.KeyExpiry)
if peerStatus.KeyExpiry != nil {
assert.Truef(t, peerStatus.KeyExpiry.Before(now), "node %q should have a key expire before %s, was %s", peerStatus.HostName, now.String(), peerStatus.KeyExpiry)
}
// TODO(kradalby): We do not propogate expiry correctly, nodes should be aware
// of their status, and this should be sent directly to the node when its
// expired. This needs a notifier that goes directly to the node (currently we only do peers)
// so fix this in a follow up PR.
// } else {
// assert.True(t, status.Self.Expired)
assert.Truef(t, peerStatus.Expired, "node %q should be expired, expired is %v", peerStatus.HostName, peerStatus.Expired)
_, stderr, _ := client.Execute([]string{"tailscale", "ping", node.GetName()})
if !strings.Contains(stderr, "node key has expired") {
t.Errorf("expected to be unable to ping expired host %q from %q", node.GetName(), client.Hostname())
}
} else {
t.Errorf("failed to find node %q with nodekey (%s) in mapresponse, should be present even if it is expired", node.GetName(), expiredNodeKey)
}
} else {
if status.Self.KeyExpiry != nil {
assert.Truef(t, status.Self.KeyExpiry.Before(now), "node %q should have a key expire before %s, was %s", status.Self.HostName, now.String(), status.Self.KeyExpiry)
}
// NeedsLogin means that the node has understood that it is no longer
// valid.
assert.Equal(t, "NeedsLogin", status.BackendState)
}
}
}

View File

@ -13,8 +13,10 @@ run_tests() {
for ((i = 1; i <= num_tests; i++)); do
docker network prune -f >/dev/null 2>&1
docker rm headscale-test-suite || true
docker kill "$(docker ps -q)" || true
docker rm headscale-test-suite >/dev/null 2>&1 || true
docker kill "$(docker ps -q)" >/dev/null 2>&1 || true
echo "Run $i"
start=$(date +%s)
docker run \

View File

@ -57,9 +57,9 @@ var (
"1.42": true, // CapVer: 61
"1.40": true, // CapVer: 61
"1.38": true, // CapVer: 58
"1.36": true, // CapVer: 56
"1.34": true, // CapVer: 51
"1.32": true, // Oldest supported version, CapVer: 46
"1.36": true, // Oldest supported version, CapVer: 56
"1.34": false, // CapVer: 51
"1.32": false, // CapVer: 46
"1.30": false,
}

View File

@ -142,7 +142,7 @@ func TestTailscaleNodesJoiningHeadcale(t *testing.T) {
})
t.Run("create-tailscale", func(t *testing.T) {
err := scenario.CreateTailscaleNodesInUser(user, "1.30.2", count)
err := scenario.CreateTailscaleNodesInUser(user, "unstable", count)
if err != nil {
t.Fatalf("failed to add tailscale nodes: %s", err)
}