mirror of
https://github.com/juanfont/headscale.git
synced 2025-11-09 05:34:51 -05:00
stability and race conditions in auth and node store (#2781)
This PR addresses some consistency issues that was introduced or discovered with the nodestore. nodestore: Now returns the node that is being put or updated when it is finished. This closes a race condition where when we read it back, we do not necessarily get the node with the given change and it ensures we get all the other updates from that batch write. auth: Authentication paths have been unified and simplified. It removes a lot of bad branches and ensures we only do the minimal work. A comprehensive auth test set has been created so we do not have to run integration tests to validate auth and it has allowed us to generate test cases for all the branches we currently know of. integration: added a lot more tooling and checks to validate that nodes reach the expected state when they come up and down. Standardised between the different auth models. A lot of this is to support or detect issues in the changes to nodestore (races) and auth (inconsistencies after login and reaching correct state) This PR was assisted, particularly tests, by claude code.
This commit is contained in:
@@ -1,7 +1,11 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/netip"
|
||||
"runtime"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -249,7 +253,9 @@ func TestNodeStoreOperations(t *testing.T) {
|
||||
name: "add first node",
|
||||
action: func(store *NodeStore) {
|
||||
node := createTestNode(1, 1, "user1", "node1")
|
||||
store.PutNode(node)
|
||||
resultNode := store.PutNode(node)
|
||||
assert.True(t, resultNode.Valid(), "PutNode should return valid node")
|
||||
assert.Equal(t, node.ID, resultNode.ID())
|
||||
|
||||
snapshot := store.data.Load()
|
||||
assert.Len(t, snapshot.nodesByID, 1)
|
||||
@@ -288,7 +294,9 @@ func TestNodeStoreOperations(t *testing.T) {
|
||||
name: "add second node same user",
|
||||
action: func(store *NodeStore) {
|
||||
node2 := createTestNode(2, 1, "user1", "node2")
|
||||
store.PutNode(node2)
|
||||
resultNode := store.PutNode(node2)
|
||||
assert.True(t, resultNode.Valid(), "PutNode should return valid node")
|
||||
assert.Equal(t, types.NodeID(2), resultNode.ID())
|
||||
|
||||
snapshot := store.data.Load()
|
||||
assert.Len(t, snapshot.nodesByID, 2)
|
||||
@@ -308,7 +316,9 @@ func TestNodeStoreOperations(t *testing.T) {
|
||||
name: "add third node different user",
|
||||
action: func(store *NodeStore) {
|
||||
node3 := createTestNode(3, 2, "user2", "node3")
|
||||
store.PutNode(node3)
|
||||
resultNode := store.PutNode(node3)
|
||||
assert.True(t, resultNode.Valid(), "PutNode should return valid node")
|
||||
assert.Equal(t, types.NodeID(3), resultNode.ID())
|
||||
|
||||
snapshot := store.data.Load()
|
||||
assert.Len(t, snapshot.nodesByID, 3)
|
||||
@@ -409,10 +419,14 @@ func TestNodeStoreOperations(t *testing.T) {
|
||||
{
|
||||
name: "update node hostname",
|
||||
action: func(store *NodeStore) {
|
||||
store.UpdateNode(1, func(n *types.Node) {
|
||||
resultNode, ok := store.UpdateNode(1, func(n *types.Node) {
|
||||
n.Hostname = "updated-node1"
|
||||
n.GivenName = "updated-node1"
|
||||
})
|
||||
assert.True(t, ok, "UpdateNode should return true for existing node")
|
||||
assert.True(t, resultNode.Valid(), "Result node should be valid")
|
||||
assert.Equal(t, "updated-node1", resultNode.Hostname())
|
||||
assert.Equal(t, "updated-node1", resultNode.GivenName())
|
||||
|
||||
snapshot := store.data.Load()
|
||||
assert.Equal(t, "updated-node1", snapshot.nodesByID[1].Hostname)
|
||||
@@ -436,10 +450,14 @@ func TestNodeStoreOperations(t *testing.T) {
|
||||
name: "add nodes with odd-even filtering",
|
||||
action: func(store *NodeStore) {
|
||||
// Add nodes in sequence
|
||||
store.PutNode(createTestNode(1, 1, "user1", "node1"))
|
||||
store.PutNode(createTestNode(2, 2, "user2", "node2"))
|
||||
store.PutNode(createTestNode(3, 3, "user3", "node3"))
|
||||
store.PutNode(createTestNode(4, 4, "user4", "node4"))
|
||||
n1 := store.PutNode(createTestNode(1, 1, "user1", "node1"))
|
||||
assert.True(t, n1.Valid())
|
||||
n2 := store.PutNode(createTestNode(2, 2, "user2", "node2"))
|
||||
assert.True(t, n2.Valid())
|
||||
n3 := store.PutNode(createTestNode(3, 3, "user3", "node3"))
|
||||
assert.True(t, n3.Valid())
|
||||
n4 := store.PutNode(createTestNode(4, 4, "user4", "node4"))
|
||||
assert.True(t, n4.Valid())
|
||||
|
||||
snapshot := store.data.Load()
|
||||
assert.Len(t, snapshot.nodesByID, 4)
|
||||
@@ -478,6 +496,328 @@ func TestNodeStoreOperations(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "test batch modifications return correct node state",
|
||||
setupFunc: func(t *testing.T) *NodeStore {
|
||||
node1 := createTestNode(1, 1, "user1", "node1")
|
||||
node2 := createTestNode(2, 1, "user1", "node2")
|
||||
initialNodes := types.Nodes{&node1, &node2}
|
||||
return NewNodeStore(initialNodes, allowAllPeersFunc)
|
||||
},
|
||||
steps: []testStep{
|
||||
{
|
||||
name: "verify initial state",
|
||||
action: func(store *NodeStore) {
|
||||
snapshot := store.data.Load()
|
||||
assert.Len(t, snapshot.nodesByID, 2)
|
||||
assert.Equal(t, "node1", snapshot.nodesByID[1].Hostname)
|
||||
assert.Equal(t, "node2", snapshot.nodesByID[2].Hostname)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "concurrent updates should reflect all batch changes",
|
||||
action: func(store *NodeStore) {
|
||||
// Start multiple updates that will be batched together
|
||||
done1 := make(chan struct{})
|
||||
done2 := make(chan struct{})
|
||||
done3 := make(chan struct{})
|
||||
|
||||
var resultNode1, resultNode2 types.NodeView
|
||||
var newNode3 types.NodeView
|
||||
var ok1, ok2 bool
|
||||
|
||||
// These should all be processed in the same batch
|
||||
go func() {
|
||||
resultNode1, ok1 = store.UpdateNode(1, func(n *types.Node) {
|
||||
n.Hostname = "batch-updated-node1"
|
||||
n.GivenName = "batch-given-1"
|
||||
})
|
||||
close(done1)
|
||||
}()
|
||||
|
||||
go func() {
|
||||
resultNode2, ok2 = store.UpdateNode(2, func(n *types.Node) {
|
||||
n.Hostname = "batch-updated-node2"
|
||||
n.GivenName = "batch-given-2"
|
||||
})
|
||||
close(done2)
|
||||
}()
|
||||
|
||||
go func() {
|
||||
node3 := createTestNode(3, 1, "user1", "node3")
|
||||
newNode3 = store.PutNode(node3)
|
||||
close(done3)
|
||||
}()
|
||||
|
||||
// Wait for all operations to complete
|
||||
<-done1
|
||||
<-done2
|
||||
<-done3
|
||||
|
||||
// Verify the returned nodes reflect the batch state
|
||||
assert.True(t, ok1, "UpdateNode should succeed for node 1")
|
||||
assert.True(t, ok2, "UpdateNode should succeed for node 2")
|
||||
assert.True(t, resultNode1.Valid())
|
||||
assert.True(t, resultNode2.Valid())
|
||||
assert.True(t, newNode3.Valid())
|
||||
|
||||
// Check that returned nodes have the updated values
|
||||
assert.Equal(t, "batch-updated-node1", resultNode1.Hostname())
|
||||
assert.Equal(t, "batch-given-1", resultNode1.GivenName())
|
||||
assert.Equal(t, "batch-updated-node2", resultNode2.Hostname())
|
||||
assert.Equal(t, "batch-given-2", resultNode2.GivenName())
|
||||
assert.Equal(t, "node3", newNode3.Hostname())
|
||||
|
||||
// Verify the snapshot also reflects all changes
|
||||
snapshot := store.data.Load()
|
||||
assert.Len(t, snapshot.nodesByID, 3)
|
||||
assert.Equal(t, "batch-updated-node1", snapshot.nodesByID[1].Hostname)
|
||||
assert.Equal(t, "batch-updated-node2", snapshot.nodesByID[2].Hostname)
|
||||
assert.Equal(t, "node3", snapshot.nodesByID[3].Hostname)
|
||||
|
||||
// Verify peer relationships are updated correctly with new node
|
||||
assert.Len(t, snapshot.peersByNode[1], 2) // sees nodes 2 and 3
|
||||
assert.Len(t, snapshot.peersByNode[2], 2) // sees nodes 1 and 3
|
||||
assert.Len(t, snapshot.peersByNode[3], 2) // sees nodes 1 and 2
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "update non-existent node returns invalid view",
|
||||
action: func(store *NodeStore) {
|
||||
resultNode, ok := store.UpdateNode(999, func(n *types.Node) {
|
||||
n.Hostname = "should-not-exist"
|
||||
})
|
||||
|
||||
assert.False(t, ok, "UpdateNode should return false for non-existent node")
|
||||
assert.False(t, resultNode.Valid(), "Result should be invalid NodeView")
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple updates to same node in batch all see final state",
|
||||
action: func(store *NodeStore) {
|
||||
// This test verifies that when multiple updates to the same node
|
||||
// are batched together, each returned node reflects ALL changes
|
||||
// in the batch, not just the individual update's changes.
|
||||
|
||||
done1 := make(chan struct{})
|
||||
done2 := make(chan struct{})
|
||||
done3 := make(chan struct{})
|
||||
|
||||
var resultNode1, resultNode2, resultNode3 types.NodeView
|
||||
var ok1, ok2, ok3 bool
|
||||
|
||||
// These updates all modify node 1 and should be batched together
|
||||
// The final state should have all three modifications applied
|
||||
go func() {
|
||||
resultNode1, ok1 = store.UpdateNode(1, func(n *types.Node) {
|
||||
n.Hostname = "multi-update-hostname"
|
||||
})
|
||||
close(done1)
|
||||
}()
|
||||
|
||||
go func() {
|
||||
resultNode2, ok2 = store.UpdateNode(1, func(n *types.Node) {
|
||||
n.GivenName = "multi-update-givenname"
|
||||
})
|
||||
close(done2)
|
||||
}()
|
||||
|
||||
go func() {
|
||||
resultNode3, ok3 = store.UpdateNode(1, func(n *types.Node) {
|
||||
n.ForcedTags = []string{"tag1", "tag2"}
|
||||
})
|
||||
close(done3)
|
||||
}()
|
||||
|
||||
// Wait for all operations to complete
|
||||
<-done1
|
||||
<-done2
|
||||
<-done3
|
||||
|
||||
// All updates should succeed
|
||||
assert.True(t, ok1, "First update should succeed")
|
||||
assert.True(t, ok2, "Second update should succeed")
|
||||
assert.True(t, ok3, "Third update should succeed")
|
||||
|
||||
// CRITICAL: Each returned node should reflect ALL changes from the batch
|
||||
// not just the change from its specific update call
|
||||
|
||||
// resultNode1 (from hostname update) should also have the givenname and tags changes
|
||||
assert.Equal(t, "multi-update-hostname", resultNode1.Hostname())
|
||||
assert.Equal(t, "multi-update-givenname", resultNode1.GivenName())
|
||||
assert.Equal(t, []string{"tag1", "tag2"}, resultNode1.ForcedTags().AsSlice())
|
||||
|
||||
// resultNode2 (from givenname update) should also have the hostname and tags changes
|
||||
assert.Equal(t, "multi-update-hostname", resultNode2.Hostname())
|
||||
assert.Equal(t, "multi-update-givenname", resultNode2.GivenName())
|
||||
assert.Equal(t, []string{"tag1", "tag2"}, resultNode2.ForcedTags().AsSlice())
|
||||
|
||||
// resultNode3 (from tags update) should also have the hostname and givenname changes
|
||||
assert.Equal(t, "multi-update-hostname", resultNode3.Hostname())
|
||||
assert.Equal(t, "multi-update-givenname", resultNode3.GivenName())
|
||||
assert.Equal(t, []string{"tag1", "tag2"}, resultNode3.ForcedTags().AsSlice())
|
||||
|
||||
// Verify the snapshot also has all changes
|
||||
snapshot := store.data.Load()
|
||||
finalNode := snapshot.nodesByID[1]
|
||||
assert.Equal(t, "multi-update-hostname", finalNode.Hostname)
|
||||
assert.Equal(t, "multi-update-givenname", finalNode.GivenName)
|
||||
assert.Equal(t, []string{"tag1", "tag2"}, finalNode.ForcedTags)
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "test UpdateNode result is immutable for database save",
|
||||
setupFunc: func(t *testing.T) *NodeStore {
|
||||
node1 := createTestNode(1, 1, "user1", "node1")
|
||||
node2 := createTestNode(2, 1, "user1", "node2")
|
||||
initialNodes := types.Nodes{&node1, &node2}
|
||||
return NewNodeStore(initialNodes, allowAllPeersFunc)
|
||||
},
|
||||
steps: []testStep{
|
||||
{
|
||||
name: "verify returned node is complete and consistent",
|
||||
action: func(store *NodeStore) {
|
||||
// Update a node and verify the returned view is complete
|
||||
resultNode, ok := store.UpdateNode(1, func(n *types.Node) {
|
||||
n.Hostname = "db-save-hostname"
|
||||
n.GivenName = "db-save-given"
|
||||
n.ForcedTags = []string{"db-tag1", "db-tag2"}
|
||||
})
|
||||
|
||||
assert.True(t, ok, "UpdateNode should succeed")
|
||||
assert.True(t, resultNode.Valid(), "Result should be valid")
|
||||
|
||||
// Verify the returned node has all expected values
|
||||
assert.Equal(t, "db-save-hostname", resultNode.Hostname())
|
||||
assert.Equal(t, "db-save-given", resultNode.GivenName())
|
||||
assert.Equal(t, []string{"db-tag1", "db-tag2"}, resultNode.ForcedTags().AsSlice())
|
||||
|
||||
// Convert to struct as would be done for database save
|
||||
nodePtr := resultNode.AsStruct()
|
||||
assert.NotNil(t, nodePtr)
|
||||
assert.Equal(t, "db-save-hostname", nodePtr.Hostname)
|
||||
assert.Equal(t, "db-save-given", nodePtr.GivenName)
|
||||
assert.Equal(t, []string{"db-tag1", "db-tag2"}, nodePtr.ForcedTags)
|
||||
|
||||
// Verify the snapshot also reflects the same state
|
||||
snapshot := store.data.Load()
|
||||
storedNode := snapshot.nodesByID[1]
|
||||
assert.Equal(t, "db-save-hostname", storedNode.Hostname)
|
||||
assert.Equal(t, "db-save-given", storedNode.GivenName)
|
||||
assert.Equal(t, []string{"db-tag1", "db-tag2"}, storedNode.ForcedTags)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "concurrent updates all return consistent final state for DB save",
|
||||
action: func(store *NodeStore) {
|
||||
// Multiple goroutines updating the same node
|
||||
// All should receive the final batch state suitable for DB save
|
||||
done1 := make(chan struct{})
|
||||
done2 := make(chan struct{})
|
||||
done3 := make(chan struct{})
|
||||
|
||||
var result1, result2, result3 types.NodeView
|
||||
var ok1, ok2, ok3 bool
|
||||
|
||||
// Start concurrent updates
|
||||
go func() {
|
||||
result1, ok1 = store.UpdateNode(1, func(n *types.Node) {
|
||||
n.Hostname = "concurrent-db-hostname"
|
||||
})
|
||||
close(done1)
|
||||
}()
|
||||
|
||||
go func() {
|
||||
result2, ok2 = store.UpdateNode(1, func(n *types.Node) {
|
||||
n.GivenName = "concurrent-db-given"
|
||||
})
|
||||
close(done2)
|
||||
}()
|
||||
|
||||
go func() {
|
||||
result3, ok3 = store.UpdateNode(1, func(n *types.Node) {
|
||||
n.ForcedTags = []string{"concurrent-tag"}
|
||||
})
|
||||
close(done3)
|
||||
}()
|
||||
|
||||
// Wait for all to complete
|
||||
<-done1
|
||||
<-done2
|
||||
<-done3
|
||||
|
||||
assert.True(t, ok1 && ok2 && ok3, "All updates should succeed")
|
||||
|
||||
// All results should be valid and suitable for database save
|
||||
assert.True(t, result1.Valid())
|
||||
assert.True(t, result2.Valid())
|
||||
assert.True(t, result3.Valid())
|
||||
|
||||
// Convert each to struct as would be done for DB save
|
||||
nodePtr1 := result1.AsStruct()
|
||||
nodePtr2 := result2.AsStruct()
|
||||
nodePtr3 := result3.AsStruct()
|
||||
|
||||
// All should have the complete final state
|
||||
assert.Equal(t, "concurrent-db-hostname", nodePtr1.Hostname)
|
||||
assert.Equal(t, "concurrent-db-given", nodePtr1.GivenName)
|
||||
assert.Equal(t, []string{"concurrent-tag"}, nodePtr1.ForcedTags)
|
||||
|
||||
assert.Equal(t, "concurrent-db-hostname", nodePtr2.Hostname)
|
||||
assert.Equal(t, "concurrent-db-given", nodePtr2.GivenName)
|
||||
assert.Equal(t, []string{"concurrent-tag"}, nodePtr2.ForcedTags)
|
||||
|
||||
assert.Equal(t, "concurrent-db-hostname", nodePtr3.Hostname)
|
||||
assert.Equal(t, "concurrent-db-given", nodePtr3.GivenName)
|
||||
assert.Equal(t, []string{"concurrent-tag"}, nodePtr3.ForcedTags)
|
||||
|
||||
// Verify consistency with stored state
|
||||
snapshot := store.data.Load()
|
||||
storedNode := snapshot.nodesByID[1]
|
||||
assert.Equal(t, nodePtr1.Hostname, storedNode.Hostname)
|
||||
assert.Equal(t, nodePtr1.GivenName, storedNode.GivenName)
|
||||
assert.Equal(t, nodePtr1.ForcedTags, storedNode.ForcedTags)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "verify returned node preserves all fields for DB save",
|
||||
action: func(store *NodeStore) {
|
||||
// Get initial state
|
||||
snapshot := store.data.Load()
|
||||
originalNode := snapshot.nodesByID[2]
|
||||
originalIPv4 := originalNode.IPv4
|
||||
originalIPv6 := originalNode.IPv6
|
||||
originalCreatedAt := originalNode.CreatedAt
|
||||
originalUser := originalNode.User
|
||||
|
||||
// Update only hostname
|
||||
resultNode, ok := store.UpdateNode(2, func(n *types.Node) {
|
||||
n.Hostname = "preserve-test-hostname"
|
||||
})
|
||||
|
||||
assert.True(t, ok, "Update should succeed")
|
||||
|
||||
// Convert to struct for DB save
|
||||
nodeForDB := resultNode.AsStruct()
|
||||
|
||||
// Verify all fields are preserved
|
||||
assert.Equal(t, "preserve-test-hostname", nodeForDB.Hostname)
|
||||
assert.Equal(t, originalIPv4, nodeForDB.IPv4)
|
||||
assert.Equal(t, originalIPv6, nodeForDB.IPv6)
|
||||
assert.Equal(t, originalCreatedAt, nodeForDB.CreatedAt)
|
||||
assert.Equal(t, originalUser.Name, nodeForDB.User.Name)
|
||||
assert.Equal(t, types.NodeID(2), nodeForDB.ID)
|
||||
|
||||
// These fields should be suitable for direct database save
|
||||
assert.NotNil(t, nodeForDB.IPv4)
|
||||
assert.NotNil(t, nodeForDB.IPv6)
|
||||
assert.False(t, nodeForDB.CreatedAt.IsZero())
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
@@ -499,3 +839,302 @@ type testStep struct {
|
||||
name string
|
||||
action func(store *NodeStore)
|
||||
}
|
||||
|
||||
// --- Additional NodeStore concurrency, batching, race, resource, timeout, and allocation tests ---
|
||||
|
||||
// Helper for concurrent test nodes
|
||||
func createConcurrentTestNode(id types.NodeID, hostname string) types.Node {
|
||||
machineKey := key.NewMachine()
|
||||
nodeKey := key.NewNode()
|
||||
return types.Node{
|
||||
ID: id,
|
||||
Hostname: hostname,
|
||||
MachineKey: machineKey.Public(),
|
||||
NodeKey: nodeKey.Public(),
|
||||
UserID: 1,
|
||||
User: types.User{
|
||||
Name: "concurrent-test-user",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// --- Concurrency: concurrent PutNode operations ---
|
||||
func TestNodeStoreConcurrentPutNode(t *testing.T) {
|
||||
const concurrentOps = 20
|
||||
store := NewNodeStore(nil, allowAllPeersFunc)
|
||||
store.Start()
|
||||
defer store.Stop()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
results := make(chan bool, concurrentOps)
|
||||
for i := 0; i < concurrentOps; i++ {
|
||||
wg.Add(1)
|
||||
go func(nodeID int) {
|
||||
defer wg.Done()
|
||||
node := createConcurrentTestNode(types.NodeID(nodeID), "concurrent-node")
|
||||
resultNode := store.PutNode(node)
|
||||
results <- resultNode.Valid()
|
||||
}(i + 1)
|
||||
}
|
||||
wg.Wait()
|
||||
close(results)
|
||||
|
||||
successCount := 0
|
||||
for success := range results {
|
||||
if success {
|
||||
successCount++
|
||||
}
|
||||
}
|
||||
require.Equal(t, concurrentOps, successCount, "All concurrent PutNode operations should succeed")
|
||||
}
|
||||
|
||||
// --- Batching: concurrent ops fit in one batch ---
|
||||
func TestNodeStoreBatchingEfficiency(t *testing.T) {
|
||||
const batchSize = 10
|
||||
const ops = 15 // more than batchSize
|
||||
store := NewNodeStore(nil, allowAllPeersFunc)
|
||||
store.Start()
|
||||
defer store.Stop()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
results := make(chan bool, ops)
|
||||
for i := 0; i < ops; i++ {
|
||||
wg.Add(1)
|
||||
go func(nodeID int) {
|
||||
defer wg.Done()
|
||||
node := createConcurrentTestNode(types.NodeID(nodeID), "batch-node")
|
||||
resultNode := store.PutNode(node)
|
||||
results <- resultNode.Valid()
|
||||
}(i + 1)
|
||||
}
|
||||
wg.Wait()
|
||||
close(results)
|
||||
|
||||
successCount := 0
|
||||
for success := range results {
|
||||
if success {
|
||||
successCount++
|
||||
}
|
||||
}
|
||||
require.Equal(t, ops, successCount, "All batch PutNode operations should succeed")
|
||||
}
|
||||
|
||||
// --- Race conditions: many goroutines on same node ---
|
||||
func TestNodeStoreRaceConditions(t *testing.T) {
|
||||
store := NewNodeStore(nil, allowAllPeersFunc)
|
||||
store.Start()
|
||||
defer store.Stop()
|
||||
|
||||
nodeID := types.NodeID(1)
|
||||
node := createConcurrentTestNode(nodeID, "race-node")
|
||||
resultNode := store.PutNode(node)
|
||||
require.True(t, resultNode.Valid())
|
||||
|
||||
const numGoroutines = 30
|
||||
const opsPerGoroutine = 10
|
||||
var wg sync.WaitGroup
|
||||
errors := make(chan error, numGoroutines*opsPerGoroutine)
|
||||
|
||||
for i := 0; i < numGoroutines; i++ {
|
||||
wg.Add(1)
|
||||
go func(gid int) {
|
||||
defer wg.Done()
|
||||
for j := 0; j < opsPerGoroutine; j++ {
|
||||
switch j % 3 {
|
||||
case 0:
|
||||
resultNode, _ := store.UpdateNode(nodeID, func(n *types.Node) {
|
||||
n.Hostname = "race-updated"
|
||||
})
|
||||
if !resultNode.Valid() {
|
||||
errors <- fmt.Errorf("UpdateNode failed in goroutine %d, op %d", gid, j)
|
||||
}
|
||||
case 1:
|
||||
retrieved, found := store.GetNode(nodeID)
|
||||
if !found || !retrieved.Valid() {
|
||||
errors <- fmt.Errorf("GetNode failed in goroutine %d, op %d", gid, j)
|
||||
}
|
||||
case 2:
|
||||
newNode := createConcurrentTestNode(nodeID, "race-put")
|
||||
resultNode := store.PutNode(newNode)
|
||||
if !resultNode.Valid() {
|
||||
errors <- fmt.Errorf("PutNode failed in goroutine %d, op %d", gid, j)
|
||||
}
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
close(errors)
|
||||
|
||||
errorCount := 0
|
||||
for err := range errors {
|
||||
t.Error(err)
|
||||
errorCount++
|
||||
}
|
||||
if errorCount > 0 {
|
||||
t.Fatalf("Race condition test failed with %d errors", errorCount)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Resource cleanup: goroutine leak detection ---
|
||||
func TestNodeStoreResourceCleanup(t *testing.T) {
|
||||
// initialGoroutines := runtime.NumGoroutine()
|
||||
store := NewNodeStore(nil, allowAllPeersFunc)
|
||||
store.Start()
|
||||
defer store.Stop()
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
afterStartGoroutines := runtime.NumGoroutine()
|
||||
|
||||
const ops = 100
|
||||
for i := 0; i < ops; i++ {
|
||||
nodeID := types.NodeID(i + 1)
|
||||
node := createConcurrentTestNode(nodeID, "cleanup-node")
|
||||
resultNode := store.PutNode(node)
|
||||
assert.True(t, resultNode.Valid())
|
||||
store.UpdateNode(nodeID, func(n *types.Node) {
|
||||
n.Hostname = "cleanup-updated"
|
||||
})
|
||||
retrieved, found := store.GetNode(nodeID)
|
||||
assert.True(t, found && retrieved.Valid())
|
||||
if i%10 == 9 {
|
||||
store.DeleteNode(nodeID)
|
||||
}
|
||||
}
|
||||
runtime.GC()
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
finalGoroutines := runtime.NumGoroutine()
|
||||
if finalGoroutines > afterStartGoroutines+2 {
|
||||
t.Errorf("Potential goroutine leak: started with %d, ended with %d", afterStartGoroutines, finalGoroutines)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Timeout/deadlock: operations complete within reasonable time ---
|
||||
func TestNodeStoreOperationTimeout(t *testing.T) {
|
||||
store := NewNodeStore(nil, allowAllPeersFunc)
|
||||
store.Start()
|
||||
defer store.Stop()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
|
||||
const ops = 30
|
||||
var wg sync.WaitGroup
|
||||
putResults := make([]error, ops)
|
||||
updateResults := make([]error, ops)
|
||||
|
||||
// Launch all PutNode operations concurrently
|
||||
for i := 1; i <= ops; i++ {
|
||||
nodeID := types.NodeID(i)
|
||||
wg.Add(1)
|
||||
go func(idx int, id types.NodeID) {
|
||||
defer wg.Done()
|
||||
startPut := time.Now()
|
||||
fmt.Printf("[TestNodeStoreOperationTimeout] %s: PutNode(%d) starting\n", startPut.Format("15:04:05.000"), id)
|
||||
node := createConcurrentTestNode(id, "timeout-node")
|
||||
resultNode := store.PutNode(node)
|
||||
endPut := time.Now()
|
||||
fmt.Printf("[TestNodeStoreOperationTimeout] %s: PutNode(%d) finished, valid=%v, duration=%v\n", endPut.Format("15:04:05.000"), id, resultNode.Valid(), endPut.Sub(startPut))
|
||||
if !resultNode.Valid() {
|
||||
putResults[idx-1] = fmt.Errorf("PutNode failed for node %d", id)
|
||||
}
|
||||
}(i, nodeID)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
// Launch all UpdateNode operations concurrently
|
||||
wg = sync.WaitGroup{}
|
||||
for i := 1; i <= ops; i++ {
|
||||
nodeID := types.NodeID(i)
|
||||
wg.Add(1)
|
||||
go func(idx int, id types.NodeID) {
|
||||
defer wg.Done()
|
||||
startUpdate := time.Now()
|
||||
fmt.Printf("[TestNodeStoreOperationTimeout] %s: UpdateNode(%d) starting\n", startUpdate.Format("15:04:05.000"), id)
|
||||
resultNode, ok := store.UpdateNode(id, func(n *types.Node) {
|
||||
n.Hostname = "timeout-updated"
|
||||
})
|
||||
endUpdate := time.Now()
|
||||
fmt.Printf("[TestNodeStoreOperationTimeout] %s: UpdateNode(%d) finished, valid=%v, ok=%v, duration=%v\n", endUpdate.Format("15:04:05.000"), id, resultNode.Valid(), ok, endUpdate.Sub(startUpdate))
|
||||
if !ok || !resultNode.Valid() {
|
||||
updateResults[idx-1] = fmt.Errorf("UpdateNode failed for node %d", id)
|
||||
}
|
||||
}(i, nodeID)
|
||||
}
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(done)
|
||||
}()
|
||||
select {
|
||||
case <-done:
|
||||
errorCount := 0
|
||||
for _, err := range putResults {
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
errorCount++
|
||||
}
|
||||
}
|
||||
for _, err := range updateResults {
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
errorCount++
|
||||
}
|
||||
}
|
||||
if errorCount == 0 {
|
||||
t.Log("All concurrent operations completed successfully within timeout")
|
||||
} else {
|
||||
t.Fatalf("Some concurrent operations failed: %d errors", errorCount)
|
||||
}
|
||||
case <-ctx.Done():
|
||||
fmt.Println("[TestNodeStoreOperationTimeout] Timeout reached, test failed")
|
||||
t.Fatal("Operations timed out - potential deadlock or resource issue")
|
||||
}
|
||||
}
|
||||
|
||||
// --- Edge case: update non-existent node ---
|
||||
func TestNodeStoreUpdateNonExistentNode(t *testing.T) {
|
||||
for i := 0; i < 10; i++ {
|
||||
store := NewNodeStore(nil, allowAllPeersFunc)
|
||||
store.Start()
|
||||
nonExistentID := types.NodeID(999 + i)
|
||||
updateCallCount := 0
|
||||
fmt.Printf("[TestNodeStoreUpdateNonExistentNode] UpdateNode(%d) starting\n", nonExistentID)
|
||||
resultNode, ok := store.UpdateNode(nonExistentID, func(n *types.Node) {
|
||||
updateCallCount++
|
||||
n.Hostname = "should-never-be-called"
|
||||
})
|
||||
fmt.Printf("[TestNodeStoreUpdateNonExistentNode] UpdateNode(%d) finished, valid=%v, ok=%v, updateCallCount=%d\n", nonExistentID, resultNode.Valid(), ok, updateCallCount)
|
||||
assert.False(t, ok, "UpdateNode should return false for non-existent node")
|
||||
assert.False(t, resultNode.Valid(), "UpdateNode should return invalid node for non-existent node")
|
||||
assert.Equal(t, 0, updateCallCount, "UpdateFn should not be called for non-existent node")
|
||||
store.Stop()
|
||||
}
|
||||
}
|
||||
|
||||
// --- Allocation benchmark ---
|
||||
func BenchmarkNodeStoreAllocations(b *testing.B) {
|
||||
store := NewNodeStore(nil, allowAllPeersFunc)
|
||||
store.Start()
|
||||
defer store.Stop()
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
nodeID := types.NodeID(i + 1)
|
||||
node := createConcurrentTestNode(nodeID, "bench-node")
|
||||
store.PutNode(node)
|
||||
store.UpdateNode(nodeID, func(n *types.Node) {
|
||||
n.Hostname = "bench-updated"
|
||||
})
|
||||
store.GetNode(nodeID)
|
||||
if i%10 == 9 {
|
||||
store.DeleteNode(nodeID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNodeStoreAllocationStats(t *testing.T) {
|
||||
res := testing.Benchmark(BenchmarkNodeStoreAllocations)
|
||||
allocs := res.AllocsPerOp()
|
||||
t.Logf("NodeStore allocations per op: %.2f", float64(allocs))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user