Merge pull request #142 from kradalby/loopy-loop

This commit is contained in:
Kristoffer Dalby 2021-10-07 16:08:33 +01:00 committed by GitHub
commit fd8d888ddb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 89 additions and 170 deletions

2
api.go
View File

@ -277,7 +277,7 @@ func (h *Headscale) getMapResponse(mKey wgkey.Key, req tailcfg.MapRequest, m *Ma
log.Trace().
Str("func", "getMapResponse").
Str("machine", req.Hostinfo.Hostname).
Interface("payload", resp).
// Interface("payload", resp).
Msgf("Generated map response: %s", tailMapResponseToString(resp))
var respBody []byte

31
app.go
View File

@ -5,6 +5,7 @@ import (
"fmt"
"net/http"
"os"
"sort"
"strings"
"sync"
"time"
@ -65,9 +66,6 @@ type Headscale struct {
aclPolicy *ACLPolicy
aclRules *[]tailcfg.FilterRule
clientsUpdateChannels sync.Map
clientsUpdateChannelMutex sync.Mutex
lastStateChange sync.Map
}
@ -145,10 +143,9 @@ func (h *Headscale) expireEphemeralNodesWorker() {
if err != nil {
log.Error().Err(err).Str("machine", m.Name).Msg("🤮 Cannot delete ephemeral machine from the database")
}
updateRequestsFromNode.WithLabelValues("ephemeral-node-update").Inc()
h.notifyChangesToPeers(&m)
}
}
h.setLastStateChangeToNow(ns.Name)
}
}
@ -251,14 +248,28 @@ func (h *Headscale) setLastStateChangeToNow(namespace string) {
h.lastStateChange.Store(namespace, now)
}
func (h *Headscale) getLastStateChange(namespace string) time.Time {
func (h *Headscale) getLastStateChange(namespaces ...string) time.Time {
times := []time.Time{}
for _, namespace := range namespaces {
if wrapped, ok := h.lastStateChange.Load(namespace); ok {
lastChange, _ := wrapped.(time.Time)
return lastChange
times = append(times, lastChange)
}
}
now := time.Now().UTC()
h.lastStateChange.Store(namespace, now)
return now
sort.Slice(times, func(i, j int) bool {
return times[i].After(times[j])
})
log.Trace().Msgf("Latest times %#v", times)
if len(times) == 0 {
return time.Now().UTC()
} else {
return times[0]
}
}

1
go.mod
View File

@ -10,6 +10,7 @@ require (
github.com/docker/cli v20.10.8+incompatible // indirect
github.com/docker/docker v20.10.8+incompatible // indirect
github.com/efekarakus/termcolor v1.0.1
github.com/fatih/set v0.2.1 // indirect
github.com/gin-gonic/gin v1.7.4
github.com/gofrs/uuid v4.0.0+incompatible
github.com/google/go-github v17.0.0+incompatible // indirect

2
go.sum
View File

@ -204,6 +204,8 @@ github.com/fanliao/go-promise v0.0.0-20141029170127-1890db352a72/go.mod h1:Pjfxu
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=
github.com/fatih/set v0.2.1 h1:nn2CaJyknWE/6txyUDGwysr3G5QC6xWB/PtVjPBbeaA=
github.com/fatih/set v0.2.1/go.mod h1:+RKtMCH+favT2+3YecHGxcc0b4KyVWA1QWWJUs4E0CI=
github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4=
github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20=

View File

@ -2,13 +2,13 @@ package headscale
import (
"encoding/json"
"errors"
"fmt"
"sort"
"strconv"
"strings"
"time"
"github.com/fatih/set"
"github.com/rs/zerolog/log"
"gorm.io/datatypes"
@ -66,7 +66,7 @@ func (h *Headscale) getDirectPeers(m *Machine) (Machines, error) {
if err := h.db.Where("namespace_id = ? AND machine_key <> ? AND registered",
m.NamespaceID, m.MachineKey).Find(&machines).Error; err != nil {
log.Error().Err(err).Msg("Error accessing db")
return nil, err
return Machines{}, err
}
sort.Slice(machines, func(i, j int) bool { return machines[i].ID < machines[j].ID })
@ -88,7 +88,7 @@ func (h *Headscale) getShared(m *Machine) (Machines, error) {
sharedMachines := []SharedMachine{}
if err := h.db.Preload("Namespace").Preload("Machine").Where("namespace_id = ?",
m.NamespaceID).Find(&sharedMachines).Error; err != nil {
return nil, err
return Machines{}, err
}
peers := make(Machines, 0)
@ -112,7 +112,7 @@ func (h *Headscale) getPeers(m *Machine) (Machines, error) {
Str("func", "getPeers").
Err(err).
Msg("Cannot fetch peers")
return nil, err
return Machines{}, err
}
shared, err := h.getShared(m)
@ -121,7 +121,7 @@ func (h *Headscale) getPeers(m *Machine) (Machines, error) {
Str("func", "getDirectPeers").
Err(err).
Msg("Cannot fetch peers")
return nil, err
return Machines{}, err
}
peers := append(direct, shared...)
@ -214,118 +214,31 @@ func (m *Machine) GetHostInfo() (*tailcfg.Hostinfo, error) {
return &hostinfo, nil
}
func (h *Headscale) notifyChangesToPeers(m *Machine) {
peers, err := h.getPeers(m)
if err != nil {
log.Error().
Str("func", "notifyChangesToPeers").
Str("machine", m.Name).
Msgf("Error getting peers: %s", err)
return
}
for _, peer := range peers {
log.Info().
Str("func", "notifyChangesToPeers").
Str("machine", m.Name).
Str("peer", peer.Name).
Str("address", peer.IPAddress).
Msgf("Notifying peer %s (%s)", peer.Name, peer.IPAddress)
err := h.sendRequestOnUpdateChannel(&peer)
if err != nil {
log.Info().
Str("func", "notifyChangesToPeers").
Str("machine", m.Name).
Str("peer", peer.Name).
Msgf("Peer %s does not have an open update client, skipping.", peer.Name)
continue
}
log.Trace().
Str("func", "notifyChangesToPeers").
Str("machine", m.Name).
Str("peer", peer.Name).
Str("address", peer.IPAddress).
Msgf("Notified peer %s (%s)", peer.Name, peer.IPAddress)
}
}
func (h *Headscale) getOrOpenUpdateChannel(m *Machine) <-chan struct{} {
var updateChan chan struct{}
if storedChan, ok := h.clientsUpdateChannels.Load(m.ID); ok {
if unwrapped, ok := storedChan.(chan struct{}); ok {
updateChan = unwrapped
} else {
log.Error().
Str("handler", "openUpdateChannel").
Str("machine", m.Name).
Msg("Failed to convert update channel to struct{}")
}
} else {
log.Debug().
Str("handler", "openUpdateChannel").
Str("machine", m.Name).
Msg("Update channel not found, creating")
updateChan = make(chan struct{})
h.clientsUpdateChannels.Store(m.ID, updateChan)
}
return updateChan
}
func (h *Headscale) closeUpdateChannel(m *Machine) {
h.clientsUpdateChannelMutex.Lock()
defer h.clientsUpdateChannelMutex.Unlock()
if storedChan, ok := h.clientsUpdateChannels.Load(m.ID); ok {
if unwrapped, ok := storedChan.(chan struct{}); ok {
close(unwrapped)
}
}
h.clientsUpdateChannels.Delete(m.ID)
}
func (h *Headscale) sendRequestOnUpdateChannel(m *Machine) error {
h.clientsUpdateChannelMutex.Lock()
defer h.clientsUpdateChannelMutex.Unlock()
pUp, ok := h.clientsUpdateChannels.Load(uint64(m.ID))
if ok {
log.Info().
Str("func", "requestUpdate").
Str("machine", m.Name).
Msgf("Notifying peer %s", m.Name)
if update, ok := pUp.(chan struct{}); ok {
log.Trace().
Str("func", "requestUpdate").
Str("machine", m.Name).
Msgf("Update channel is %#v", update)
updateRequestsToNode.Inc()
update <- struct{}{}
log.Trace().
Str("func", "requestUpdate").
Str("machine", m.Name).
Msgf("Notified machine %s", m.Name)
}
} else {
err := errors.New("machine does not have an open update channel")
log.Info().
Str("func", "requestUpdate").
Str("machine", m.Name).
Msgf("Machine %s does not have an open update channel", m.Name)
return err
}
return nil
}
func (h *Headscale) isOutdated(m *Machine) bool {
err := h.UpdateMachine(m)
if err != nil {
// It does not seem meaningful to propagate this error as the end result
// will have to be that the machine has to be considered outdated.
return true
}
lastChange := h.getLastStateChange(m.Namespace.Name)
sharedMachines, _ := h.getShared(m)
namespaceSet := set.New(set.ThreadSafe)
namespaceSet.Add(m.Namespace.Name)
// Check if any of our shared namespaces has updates that we have
// not propagated.
for _, sharedMachine := range sharedMachines {
namespaceSet.Add(sharedMachine.Namespace.Name)
}
namespaces := make([]string, namespaceSet.Size())
for index, namespace := range namespaceSet.List() {
namespaces[index] = namespace.(string)
}
lastChange := h.getLastStateChange(namespaces...)
log.Trace().
Str("func", "keepAlive").
Str("machine", m.Name).

View File

@ -26,16 +26,16 @@ var (
Namespace: prometheusNamespace,
Name: "update_request_from_node_total",
Help: "The number of updates requested by a node/update function",
}, []string{"state"})
updateRequestsToNode = promauto.NewCounter(prometheus.CounterOpts{
}, []string{"namespace", "machine", "state"})
updateRequestsSentToNode = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "update_request_to_node_total",
Name: "update_request_sent_to_node_total",
Help: "The number of calls/messages issued on a specific nodes update channel",
})
}, []string{"namespace", "machine", "status"})
//TODO(kradalby): This is very debugging, we might want to remove it.
updateRequestsReceivedOnChannel = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "update_request_received_on_channel_total",
Help: "The number of update requests received on an update channel",
}, []string{"machine"})
}, []string{"namespace", "machine"})
)

View File

@ -176,24 +176,17 @@ func (h *Headscale) checkForNamespacesPendingUpdates() {
return
}
names := []string{}
err = json.Unmarshal([]byte(v), &names)
namespaces := []string{}
err = json.Unmarshal([]byte(v), &namespaces)
if err != nil {
return
}
for _, name := range names {
for _, namespace := range namespaces {
log.Trace().
Str("func", "RequestMapUpdates").
Str("machine", name).
Msg("Sending updates to nodes in namespace")
machines, err := h.ListMachinesInNamespace(name)
if err != nil {
continue
}
for _, m := range *machines {
updateRequestsFromNode.WithLabelValues("namespace-update").Inc()
h.notifyChangesToPeers(&m)
}
Str("machine", namespace).
Msg("Sending updates to nodes in namespacespace")
h.setLastStateChangeToNow(namespace)
}
newV, err := h.getValue("namespaces_pending_updates")
if err != nil {

39
poll.go
View File

@ -140,10 +140,9 @@ func (h *Headscale) PollNetMapHandler(c *gin.Context) {
Str("id", c.Param("id")).
Str("machine", m.Name).
Msg("Loading or creating update channel")
updateChan := h.getOrOpenUpdateChannel(m)
updateChan := make(chan struct{})
pollDataChan := make(chan []byte)
// defer close(pollData)
keepAliveChan := make(chan []byte)
@ -159,8 +158,8 @@ func (h *Headscale) PollNetMapHandler(c *gin.Context) {
// It sounds like we should update the nodes when we have received a endpoint update
// even tho the comments in the tailscale code dont explicitly say so.
updateRequestsFromNode.WithLabelValues("endpoint-update").Inc()
go h.notifyChangesToPeers(m)
updateRequestsFromNode.WithLabelValues(m.Name, m.Namespace.Name, "endpoint-update").Inc()
go func() { updateChan <- struct{}{} }()
return
} else if req.OmitPeers && req.Stream {
log.Warn().
@ -185,8 +184,8 @@ func (h *Headscale) PollNetMapHandler(c *gin.Context) {
Str("handler", "PollNetMap").
Str("machine", m.Name).
Msg("Notifying peers")
updateRequestsFromNode.WithLabelValues("full-update").Inc()
go h.notifyChangesToPeers(m)
updateRequestsFromNode.WithLabelValues(m.Name, m.Namespace.Name, "full-update").Inc()
go func() { updateChan <- struct{}{} }()
h.PollNetMapStream(c, m, req, mKey, pollDataChan, keepAliveChan, updateChan, cancelKeepAlive)
log.Trace().
@ -206,10 +205,10 @@ func (h *Headscale) PollNetMapStream(
mKey wgkey.Key,
pollDataChan chan []byte,
keepAliveChan chan []byte,
updateChan <-chan struct{},
updateChan chan struct{},
cancelKeepAlive chan struct{},
) {
go h.scheduledPollWorker(cancelKeepAlive, keepAliveChan, mKey, req, m)
go h.scheduledPollWorker(cancelKeepAlive, updateChan, keepAliveChan, mKey, req, m)
c.Stream(func(w io.Writer) bool {
log.Trace().
@ -325,7 +324,7 @@ func (h *Headscale) PollNetMapStream(
Str("machine", m.Name).
Str("channel", "update").
Msg("Received a request for update")
updateRequestsReceivedOnChannel.WithLabelValues(m.Name).Inc()
updateRequestsReceivedOnChannel.WithLabelValues(m.Name, m.Namespace.Name).Inc()
if h.isOutdated(m) {
log.Debug().
Str("handler", "PollNetMapStream").
@ -350,6 +349,7 @@ func (h *Headscale) PollNetMapStream(
Str("channel", "update").
Err(err).
Msg("Could not write the map response")
updateRequestsSentToNode.WithLabelValues(m.Name, m.Namespace.Name, "failed").Inc()
return false
}
log.Trace().
@ -357,6 +357,7 @@ func (h *Headscale) PollNetMapStream(
Str("machine", m.Name).
Str("channel", "update").
Msg("Updated Map has been sent")
updateRequestsSentToNode.WithLabelValues(m.Name, m.Namespace.Name, "success").Inc()
// Keep track of the last successful update,
// we sometimes end in a state were the update
@ -423,7 +424,8 @@ func (h *Headscale) PollNetMapStream(
Str("machine", m.Name).
Str("channel", "Done").
Msg("Closing update channel")
h.closeUpdateChannel(m)
//h.closeUpdateChannel(m)
close(updateChan)
log.Trace().
Str("handler", "PollNetMapStream").
@ -446,13 +448,14 @@ func (h *Headscale) PollNetMapStream(
func (h *Headscale) scheduledPollWorker(
cancelChan <-chan struct{},
updateChan chan<- struct{},
keepAliveChan chan<- []byte,
mKey wgkey.Key,
req tailcfg.MapRequest,
m *Machine,
) {
keepAliveTicker := time.NewTicker(60 * time.Second)
updateCheckerTicker := time.NewTicker(30 * time.Second)
updateCheckerTicker := time.NewTicker(10 * time.Second)
for {
select {
@ -476,16 +479,12 @@ func (h *Headscale) scheduledPollWorker(
keepAliveChan <- data
case <-updateCheckerTicker.C:
// Send an update request regardless of outdated or not, if data is sent
// to the node is determined in the updateChan consumer block
err := h.sendRequestOnUpdateChannel(m)
if err != nil {
log.Error().
Str("func", "keepAlive").
log.Debug().
Str("func", "scheduledPollWorker").
Str("machine", m.Name).
Err(err).
Msgf("Failed to send update request to %s", m.Name)
}
Msg("Sending update request")
updateRequestsFromNode.WithLabelValues(m.Name, m.Namespace.Name, "scheduled-update").Inc()
updateChan <- struct{}{}
}
}
}