make stream shutdown if self-node has been removed (#2125)
* add shutdown that asserts if headscale had panics Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * add test case producing 2118 panic Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> * make stream shutdown if self-node has been removed Currently we will read the node from database, and since it is deleted, the id might be set to nil. Keep the node around and just shutdown, so it is cleanly removed from notifier. Fixes #2118 Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com> --------- Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
This commit is contained in:
parent
4b02dc9565
commit
64319f79ff
|
@ -52,6 +52,7 @@ jobs:
|
|||
- TestExpireNode
|
||||
- TestNodeOnlineStatus
|
||||
- TestPingAllByIPManyUpDown
|
||||
- Test2118DeletingOnlineNodePanics
|
||||
- TestEnablingRoutes
|
||||
- TestHASubnetRouterFailover
|
||||
- TestEnableDisableAutoApprovedRoute
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"fmt"
|
||||
"math/rand/v2"
|
||||
"net/http"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
@ -273,6 +274,12 @@ func (m *mapSession) serveLongPoll() {
|
|||
return
|
||||
}
|
||||
|
||||
// If the node has been removed from headscale, close the stream
|
||||
if slices.Contains(update.Removed, m.node.ID) {
|
||||
m.tracef("node removed, closing stream")
|
||||
return
|
||||
}
|
||||
|
||||
m.tracef("received stream update: %s %s", update.Type.String(), update.Message)
|
||||
mapResponseUpdateReceived.WithLabelValues(update.Type.String()).Inc()
|
||||
|
||||
|
|
|
@ -6,8 +6,8 @@ import (
|
|||
)
|
||||
|
||||
type ControlServer interface {
|
||||
Shutdown() error
|
||||
SaveLog(string) error
|
||||
Shutdown() (string, string, error)
|
||||
SaveLog(string) (string, string, error)
|
||||
SaveProfile(string) error
|
||||
Execute(command []string) (string, error)
|
||||
WriteFile(path string, content []byte) error
|
||||
|
|
|
@ -17,10 +17,10 @@ func SaveLog(
|
|||
pool *dockertest.Pool,
|
||||
resource *dockertest.Resource,
|
||||
basePath string,
|
||||
) error {
|
||||
) (string, string, error) {
|
||||
err := os.MkdirAll(basePath, os.ModePerm)
|
||||
if err != nil {
|
||||
return err
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
var stdout bytes.Buffer
|
||||
|
@ -41,28 +41,30 @@ func SaveLog(
|
|||
},
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
log.Printf("Saving logs for %s to %s\n", resource.Container.Name, basePath)
|
||||
|
||||
stdoutPath := path.Join(basePath, resource.Container.Name+".stdout.log")
|
||||
err = os.WriteFile(
|
||||
path.Join(basePath, resource.Container.Name+".stdout.log"),
|
||||
stdoutPath,
|
||||
stdout.Bytes(),
|
||||
filePerm,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
stderrPath := path.Join(basePath, resource.Container.Name+".stderr.log")
|
||||
err = os.WriteFile(
|
||||
path.Join(basePath, resource.Container.Name+".stderr.log"),
|
||||
stderrPath,
|
||||
stderr.Bytes(),
|
||||
filePerm,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
return nil
|
||||
return stdoutPath, stderrPath, nil
|
||||
}
|
||||
|
|
|
@ -954,3 +954,102 @@ func TestPingAllByIPManyUpDown(t *testing.T) {
|
|||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
}
|
||||
}
|
||||
|
||||
func Test2118DeletingOnlineNodePanics(t *testing.T) {
|
||||
IntegrationSkip(t)
|
||||
t.Parallel()
|
||||
|
||||
scenario, err := NewScenario(dockertestMaxWait())
|
||||
assertNoErr(t, err)
|
||||
defer scenario.ShutdownAssertNoPanics(t)
|
||||
|
||||
// TODO(kradalby): it does not look like the user thing works, only second
|
||||
// get created? maybe only when many?
|
||||
spec := map[string]int{
|
||||
"user1": 1,
|
||||
"user2": 1,
|
||||
}
|
||||
|
||||
err = scenario.CreateHeadscaleEnv(spec,
|
||||
[]tsic.Option{},
|
||||
hsic.WithTestName("deletenocrash"),
|
||||
hsic.WithEmbeddedDERPServerOnly(),
|
||||
hsic.WithTLS(),
|
||||
hsic.WithHostnameAsServerURL(),
|
||||
)
|
||||
assertNoErrHeadscaleEnv(t, err)
|
||||
|
||||
allClients, err := scenario.ListTailscaleClients()
|
||||
assertNoErrListClients(t, err)
|
||||
|
||||
allIps, err := scenario.ListTailscaleClientsIPs()
|
||||
assertNoErrListClientIPs(t, err)
|
||||
|
||||
err = scenario.WaitForTailscaleSync()
|
||||
assertNoErrSync(t, err)
|
||||
|
||||
allAddrs := lo.Map(allIps, func(x netip.Addr, index int) string {
|
||||
return x.String()
|
||||
})
|
||||
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
|
||||
headscale, err := scenario.Headscale()
|
||||
assertNoErr(t, err)
|
||||
|
||||
// Test list all nodes after added otherUser
|
||||
var nodeList []v1.Node
|
||||
err = executeAndUnmarshal(
|
||||
headscale,
|
||||
[]string{
|
||||
"headscale",
|
||||
"nodes",
|
||||
"list",
|
||||
"--output",
|
||||
"json",
|
||||
},
|
||||
&nodeList,
|
||||
)
|
||||
assert.Nil(t, err)
|
||||
assert.Len(t, nodeList, 2)
|
||||
assert.True(t, nodeList[0].Online)
|
||||
assert.True(t, nodeList[1].Online)
|
||||
|
||||
// Delete the first node, which is online
|
||||
_, err = headscale.Execute(
|
||||
[]string{
|
||||
"headscale",
|
||||
"nodes",
|
||||
"delete",
|
||||
"--identifier",
|
||||
// Delete the last added machine
|
||||
fmt.Sprintf("%d", nodeList[0].Id),
|
||||
"--output",
|
||||
"json",
|
||||
"--force",
|
||||
},
|
||||
)
|
||||
assert.Nil(t, err)
|
||||
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
// Ensure that the node has been deleted, this did not occur due to a panic.
|
||||
var nodeListAfter []v1.Node
|
||||
err = executeAndUnmarshal(
|
||||
headscale,
|
||||
[]string{
|
||||
"headscale",
|
||||
"nodes",
|
||||
"list",
|
||||
"--output",
|
||||
"json",
|
||||
},
|
||||
&nodeListAfter,
|
||||
)
|
||||
assert.Nil(t, err)
|
||||
assert.Len(t, nodeListAfter, 1)
|
||||
assert.True(t, nodeListAfter[0].Online)
|
||||
assert.Equal(t, nodeList[1].Id, nodeListAfter[0].Id)
|
||||
|
||||
}
|
||||
|
|
|
@ -398,8 +398,8 @@ func (t *HeadscaleInContainer) hasTLS() bool {
|
|||
}
|
||||
|
||||
// Shutdown stops and cleans up the Headscale container.
|
||||
func (t *HeadscaleInContainer) Shutdown() error {
|
||||
err := t.SaveLog("/tmp/control")
|
||||
func (t *HeadscaleInContainer) Shutdown() (string, string, error) {
|
||||
stdoutPath, stderrPath, err := t.SaveLog("/tmp/control")
|
||||
if err != nil {
|
||||
log.Printf(
|
||||
"Failed to save log from control: %s",
|
||||
|
@ -458,12 +458,12 @@ func (t *HeadscaleInContainer) Shutdown() error {
|
|||
t.pool.Purge(t.pgContainer)
|
||||
}
|
||||
|
||||
return t.pool.Purge(t.container)
|
||||
return stdoutPath, stderrPath, t.pool.Purge(t.container)
|
||||
}
|
||||
|
||||
// SaveLog saves the current stdout log of the container to a path
|
||||
// on the host system.
|
||||
func (t *HeadscaleInContainer) SaveLog(path string) error {
|
||||
func (t *HeadscaleInContainer) SaveLog(path string) (string, string, error) {
|
||||
return dockertestutil.SaveLog(t.pool, t.container, path)
|
||||
}
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"os"
|
||||
"sort"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
v1 "github.com/juanfont/headscale/gen/go/headscale/v1"
|
||||
|
@ -18,6 +19,7 @@ import (
|
|||
"github.com/ory/dockertest/v3"
|
||||
"github.com/puzpuzpuz/xsync/v3"
|
||||
"github.com/samber/lo"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"tailscale.com/envknob"
|
||||
)
|
||||
|
@ -187,13 +189,9 @@ func NewScenario(maxWait time.Duration) (*Scenario, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
// Shutdown shuts down and cleans up all the containers (ControlServer, TailscaleClient)
|
||||
// and networks associated with it.
|
||||
// In addition, it will save the logs of the ControlServer to `/tmp/control` in the
|
||||
// environment running the tests.
|
||||
func (s *Scenario) Shutdown() {
|
||||
func (s *Scenario) ShutdownAssertNoPanics(t *testing.T) {
|
||||
s.controlServers.Range(func(_ string, control ControlServer) bool {
|
||||
err := control.Shutdown()
|
||||
stdoutPath, stderrPath, err := control.Shutdown()
|
||||
if err != nil {
|
||||
log.Printf(
|
||||
"Failed to shut down control: %s",
|
||||
|
@ -201,6 +199,16 @@ func (s *Scenario) Shutdown() {
|
|||
)
|
||||
}
|
||||
|
||||
if t != nil {
|
||||
stdout, err := os.ReadFile(stdoutPath)
|
||||
assert.NoError(t, err)
|
||||
assert.NotContains(t, string(stdout), "panic")
|
||||
|
||||
stderr, err := os.ReadFile(stderrPath)
|
||||
assert.NoError(t, err)
|
||||
assert.NotContains(t, string(stderr), "panic")
|
||||
}
|
||||
|
||||
return true
|
||||
})
|
||||
|
||||
|
@ -224,6 +232,14 @@ func (s *Scenario) Shutdown() {
|
|||
// }
|
||||
}
|
||||
|
||||
// Shutdown shuts down and cleans up all the containers (ControlServer, TailscaleClient)
|
||||
// and networks associated with it.
|
||||
// In addition, it will save the logs of the ControlServer to `/tmp/control` in the
|
||||
// environment running the tests.
|
||||
func (s *Scenario) Shutdown() {
|
||||
s.ShutdownAssertNoPanics(nil)
|
||||
}
|
||||
|
||||
// Users returns the name of all users associated with the Scenario.
|
||||
func (s *Scenario) Users() []string {
|
||||
users := make([]string, 0)
|
||||
|
|
|
@ -998,7 +998,9 @@ func (t *TailscaleInContainer) WriteFile(path string, data []byte) error {
|
|||
// SaveLog saves the current stdout log of the container to a path
|
||||
// on the host system.
|
||||
func (t *TailscaleInContainer) SaveLog(path string) error {
|
||||
return dockertestutil.SaveLog(t.pool, t.container, path)
|
||||
// TODO(kradalby): Assert if tailscale logs contains panics.
|
||||
_, _, err := dockertestutil.SaveLog(t.pool, t.container, path)
|
||||
return err
|
||||
}
|
||||
|
||||
// ReadFile reads a file from the Tailscale container.
|
||||
|
|
Loading…
Reference in New Issue