From f141cebe8d52fb9c30af93208f36e2a59e7287e5 Mon Sep 17 00:00:00 2001 From: Nate Brown Date: Thu, 30 Apr 2026 21:30:56 -0500 Subject: [PATCH] Run e2e tests in parallel, include a goroutine leak detector test (#1700) --- e2e/handshake_manager_test.go | 12 +++++++++ e2e/handshakes_test.go | 21 +++++++++++++++ e2e/leak_test.go | 51 +++++++++++++++++++++++++++++++++++ e2e/tunnels_test.go | 6 +++++ go.mod | 1 + 5 files changed, 91 insertions(+) create mode 100644 e2e/leak_test.go diff --git a/e2e/handshake_manager_test.go b/e2e/handshake_manager_test.go index 3fe784c1..1c6ebacc 100644 --- a/e2e/handshake_manager_test.go +++ b/e2e/handshake_manager_test.go @@ -28,6 +28,7 @@ func makeHandshakePacket(from, to netip.AddrPort, subtype header.MessageSubType, } func TestHandshakeRetransmitDuplicate(t *testing.T) { + t.Parallel() // Verify the responder correctly handles receiving the same msg1 multiple times // (retransmission). The duplicate goes through CheckAndComplete -> ErrAlreadySeen // and the cached response is resent. @@ -78,6 +79,7 @@ func TestHandshakeRetransmitDuplicate(t *testing.T) { } func TestHandshakeTruncatedPacketRecovery(t *testing.T) { + t.Parallel() // Verify that a truncated handshake packet is ignored and the real // packet can still complete the handshake. @@ -126,6 +128,7 @@ func TestHandshakeTruncatedPacketRecovery(t *testing.T) { } func TestHandshakeOrphanedMsg2Dropped(t *testing.T) { + t.Parallel() // A msg2 arriving with no matching pending index should be silently dropped // with no response sent and no state changes. @@ -168,6 +171,7 @@ func TestHandshakeOrphanedMsg2Dropped(t *testing.T) { } func TestHandshakeUnknownMessageCounter(t *testing.T) { + t.Parallel() // A handshake packet with an unexpected message counter should be silently // dropped with no side effects and no UDP response. @@ -199,6 +203,7 @@ func TestHandshakeUnknownMessageCounter(t *testing.T) { } func TestHandshakeUnknownSubtype(t *testing.T) { + t.Parallel() // A handshake packet with an unknown subtype should be silently dropped. ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) @@ -224,6 +229,7 @@ func TestHandshakeUnknownSubtype(t *testing.T) { } func TestHandshakeLateResponse(t *testing.T) { + t.Parallel() // After a handshake times out, a late response should be silently ignored // with no new tunnels created. @@ -273,6 +279,7 @@ func TestHandshakeLateResponse(t *testing.T) { } func TestHandshakeSelfConnectionRejected(t *testing.T) { + t.Parallel() // Verify that a node rejects a handshake containing its own VPN IP in the // peer cert. We do this by sending the initiator's own msg1 back to itself. @@ -321,6 +328,7 @@ func TestHandshakeSelfConnectionRejected(t *testing.T) { } func TestHandshakeMessageCounter0Dropped(t *testing.T) { + t.Parallel() // MessageCounter=0 is not a valid handshake message and should be dropped. ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) @@ -341,6 +349,7 @@ func TestHandshakeMessageCounter0Dropped(t *testing.T) { } func TestHandshakeRemoteAllowList(t *testing.T) { + t.Parallel() // Verify that a handshake from a blocked underlay IP is dropped with no // response and no state changes. Then verify the same packet from an // allowed IP succeeds. @@ -399,6 +408,7 @@ func TestHandshakeRemoteAllowList(t *testing.T) { } func TestHandshakeAlreadySeenPreferredRemote(t *testing.T) { + t.Parallel() // When a duplicate msg1 arrives via ErrAlreadySeen, verify the tunnel // remains functional and hostmap index count is stable. @@ -445,6 +455,7 @@ func TestHandshakeAlreadySeenPreferredRemote(t *testing.T) { } func TestHandshakeWrongResponderPacketStore(t *testing.T) { + t.Parallel() // Verify that when the wrong host responds, the cached packets are // transferred to the new handshake, the evil tunnel is closed, evil's // address is blocked, and the correct tunnel is eventually established. @@ -508,6 +519,7 @@ func TestHandshakeWrongResponderPacketStore(t *testing.T) { } func TestHandshakeRelayComplete(t *testing.T) { + t.Parallel() // Verify that a relay handshake completes correctly and relay state is // properly maintained on all three nodes. diff --git a/e2e/handshakes_test.go b/e2e/handshakes_test.go index 93f200ac..43fa72f2 100644 --- a/e2e/handshakes_test.go +++ b/e2e/handshakes_test.go @@ -84,6 +84,7 @@ func BenchmarkHotPathRelay(b *testing.B) { } func TestGoodHandshake(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, myUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "me", "10.128.0.1/24", nil) theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "them", "10.128.0.2/24", nil) @@ -134,6 +135,7 @@ func TestGoodHandshake(t *testing.T) { } func TestGoodHandshakeNoOverlap(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, myUdpAddr, _ := newSimpleServer(cert.Version2, ca, caKey, "me", "10.128.0.1/24", nil) theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServer(cert.Version2, ca, caKey, "them", "2001::69/24", nil) //look ma, cross-stack! @@ -169,6 +171,7 @@ func TestGoodHandshakeNoOverlap(t *testing.T) { } func TestWrongResponderHandshake(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, myUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "me", "10.128.0.100/24", nil) @@ -245,6 +248,7 @@ func TestWrongResponderHandshake(t *testing.T) { } func TestWrongResponderHandshakeStaticHostMap(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "them", "10.128.0.99/24", nil) @@ -327,6 +331,7 @@ func TestWrongResponderHandshakeStaticHostMap(t *testing.T) { } func TestStage1Race(t *testing.T) { + t.Parallel() // This tests ensures that two hosts handshaking with each other at the same time will allow traffic to flow // But will eventually collapse down to a single tunnel @@ -407,6 +412,7 @@ func TestStage1Race(t *testing.T) { } func TestUncleanShutdownRaceLoser(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, myUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "me ", "10.128.0.1/24", nil) theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "them", "10.128.0.2/24", nil) @@ -456,6 +462,7 @@ func TestUncleanShutdownRaceLoser(t *testing.T) { } func TestUncleanShutdownRaceWinner(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, myUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "me ", "10.128.0.1/24", nil) theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "them", "10.128.0.2/24", nil) @@ -507,6 +514,7 @@ func TestUncleanShutdownRaceWinner(t *testing.T) { } func TestRelays(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, _, _ := newSimpleServer(cert.Version1, ca, caKey, "me ", "10.128.0.1/24", m{"relay": m{"use_relays": true}}) relayControl, relayVpnIpNet, relayUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "relay ", "10.128.0.128/24", m{"relay": m{"am_relay": true}}) @@ -536,6 +544,7 @@ func TestRelays(t *testing.T) { } func TestRelaysDontCareAboutIps(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, _, _ := newSimpleServer(cert.Version2, ca, caKey, "me ", "10.128.0.1/24", m{"relay": m{"use_relays": true}}) relayControl, relayVpnIpNet, relayUdpAddr, _ := newSimpleServer(cert.Version2, ca, caKey, "relay ", "2001::9999/24", m{"relay": m{"am_relay": true}}) @@ -565,6 +574,7 @@ func TestRelaysDontCareAboutIps(t *testing.T) { } func TestReestablishRelays(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, _, _ := newSimpleServer(cert.Version1, ca, caKey, "me ", "10.128.0.1/24", m{"relay": m{"use_relays": true}}) relayControl, relayVpnIpNet, relayUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "relay ", "10.128.0.128/24", m{"relay": m{"am_relay": true}}) @@ -696,6 +706,7 @@ func TestReestablishRelays(t *testing.T) { } func TestStage1RaceRelays(t *testing.T) { + t.Parallel() //NOTE: this is a race between me and relay resulting in a full tunnel from me to them via relay ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, myUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "me ", "10.128.0.1/24", m{"relay": m{"use_relays": true}}) @@ -743,6 +754,7 @@ func TestStage1RaceRelays(t *testing.T) { } func TestStage1RaceRelays2(t *testing.T) { + t.Parallel() //NOTE: this is a race between me and relay resulting in a full tunnel from me to them via relay ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, myUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "me ", "10.128.0.1/24", m{"relay": m{"use_relays": true}}) @@ -819,6 +831,7 @@ func TestStage1RaceRelays2(t *testing.T) { } func TestRehandshakingRelays(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, _, _ := newSimpleServer(cert.Version1, ca, caKey, "me ", "10.128.0.1/24", m{"relay": m{"use_relays": true}}) relayControl, relayVpnIpNet, relayUdpAddr, relayConfig := newSimpleServer(cert.Version1, ca, caKey, "relay ", "10.128.0.128/24", m{"relay": m{"am_relay": true}}) @@ -922,6 +935,7 @@ func TestRehandshakingRelays(t *testing.T) { } func TestRehandshakingRelaysPrimary(t *testing.T) { + t.Parallel() // This test is the same as TestRehandshakingRelays but one of the terminal types is a primary swap winner ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, _, _ := newSimpleServer(cert.Version1, ca, caKey, "me ", "10.128.0.128/24", m{"relay": m{"use_relays": true}}) @@ -1026,6 +1040,7 @@ func TestRehandshakingRelaysPrimary(t *testing.T) { } func TestRehandshaking(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, myUdpAddr, myConfig := newSimpleServer(cert.Version1, ca, caKey, "me ", "10.128.0.2/24", nil) theirControl, theirVpnIpNet, theirUdpAddr, theirConfig := newSimpleServer(cert.Version1, ca, caKey, "them", "10.128.0.1/24", nil) @@ -1121,6 +1136,7 @@ func TestRehandshaking(t *testing.T) { } func TestRehandshakingLoser(t *testing.T) { + t.Parallel() // The purpose of this test is that the race loser renews their certificate and rehandshakes. The final tunnel // Should be the one with the new certificate ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) @@ -1219,6 +1235,7 @@ func TestRehandshakingLoser(t *testing.T) { } func TestRaceRegression(t *testing.T) { + t.Parallel() // This test forces stage 1, stage 2, stage 1 to be received by me from them // We had a bug where we were not finding the duplicate handshake and responding to the final stage 1 which // caused a cross-linked hostinfo @@ -1279,6 +1296,7 @@ func TestRaceRegression(t *testing.T) { } func TestV2NonPrimaryWithLighthouse(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) lhControl, lhVpnIpNet, lhUdpAddr, _ := newSimpleServer(cert.Version2, ca, caKey, "lh ", "10.128.0.1/24, ff::1/64", m{"lighthouse": m{"am_lighthouse": true}}) @@ -1319,6 +1337,7 @@ func TestV2NonPrimaryWithLighthouse(t *testing.T) { } func TestV2NonPrimaryWithOffNetLighthouse(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) lhControl, lhVpnIpNet, lhUdpAddr, _ := newSimpleServer(cert.Version2, ca, caKey, "lh ", "2001::1/64", m{"lighthouse": m{"am_lighthouse": true}}) @@ -1359,6 +1378,7 @@ func TestV2NonPrimaryWithOffNetLighthouse(t *testing.T) { } func TestLighthouseUpdateOnReload(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) // Create the lighthouse @@ -1434,6 +1454,7 @@ func TestLighthouseUpdateOnReload(t *testing.T) { } func TestGoodHandshakeUnsafeDest(t *testing.T) { + t.Parallel() unsafePrefix := "192.168.6.0/24" ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServerWithUdpAndUnsafeNetworks(cert.Version2, ca, caKey, "spooky", "10.128.0.2/24", netip.MustParseAddrPort("10.64.0.2:4242"), unsafePrefix, nil) diff --git a/e2e/leak_test.go b/e2e/leak_test.go new file mode 100644 index 00000000..ffb024fe --- /dev/null +++ b/e2e/leak_test.go @@ -0,0 +1,51 @@ +//go:build e2e_testing +// +build e2e_testing + +package e2e + +import ( + "testing" + "time" + + "github.com/slackhq/nebula/cert" + "github.com/slackhq/nebula/cert_test" + "github.com/slackhq/nebula/e2e/router" + "go.uber.org/goleak" +) + +// TestNoGoroutineLeaks brings up two nebula instances, completes a tunnel, +// stops both, and asserts no goroutines leak past the shutdown. goleak's +// retry mechanism gives the wg.Wait()-driven goroutines a moment to drain +// before failing the assertion. +// +// IgnoreCurrent is necessary in the parallelized suite: other tests can +// leave goroutines mid-shutdown when this one runs (Stop is async, the +// wg.Wait() drain is not blocking on test return). We're checking that +// *this* test's setup tears down cleanly, not that the whole suite is +// idle at this moment. Intentionally NOT t.Parallel()'d for the same +// reason — concurrent test goroutines would always show up. +func TestNoGoroutineLeaks(t *testing.T) { + defer goleak.VerifyNone(t, goleak.IgnoreCurrent()) + + ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) + myControl, myVpnIpNet, myUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "me", "10.128.0.1/24", nil) + theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "them", "10.128.0.2/24", nil) + + myControl.InjectLightHouseAddr(theirVpnIpNet[0].Addr(), theirUdpAddr) + theirControl.InjectLightHouseAddr(myVpnIpNet[0].Addr(), myUdpAddr) + + myControl.Start() + theirControl.Start() + + r := router.NewR(t, myControl, theirControl) + assertTunnel(t, myVpnIpNet[0].Addr(), theirVpnIpNet[0].Addr(), myControl, theirControl, r) + + myControl.Stop() + theirControl.Stop() + r.RenderFlow() + + // Settle period: Stop() is non-blocking; the wg-driven goroutines need + // a moment to drain. goleak retries internally too, but a short explicit + // settle reduces flakes when the suite is busy. + time.Sleep(50 * time.Millisecond) +} diff --git a/e2e/tunnels_test.go b/e2e/tunnels_test.go index e8e41945..63c655f3 100644 --- a/e2e/tunnels_test.go +++ b/e2e/tunnels_test.go @@ -19,6 +19,7 @@ import ( ) func TestDropInactiveTunnels(t *testing.T) { + t.Parallel() // The goal of this test is to ensure the shortest inactivity timeout will close the tunnel on both sides // under ideal conditions ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) @@ -63,6 +64,7 @@ func TestDropInactiveTunnels(t *testing.T) { } func TestCertUpgrade(t *testing.T) { + t.Parallel() // The goal of this test is to ensure the shortest inactivity timeout will close the tunnel on both sides // under ideal conditions ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) @@ -157,6 +159,7 @@ func TestCertUpgrade(t *testing.T) { } func TestCertDowngrade(t *testing.T) { + t.Parallel() // The goal of this test is to ensure the shortest inactivity timeout will close the tunnel on both sides // under ideal conditions ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) @@ -255,6 +258,7 @@ func TestCertDowngrade(t *testing.T) { } func TestCertMismatchCorrection(t *testing.T) { + t.Parallel() // The goal of this test is to ensure the shortest inactivity timeout will close the tunnel on both sides // under ideal conditions ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) @@ -322,6 +326,7 @@ func TestCertMismatchCorrection(t *testing.T) { } func TestCrossStackRelaysWork(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, _, _ := newSimpleServer(cert.Version2, ca, caKey, "me ", "10.128.0.1/24,fc00::1/64", m{"relay": m{"use_relays": true}}) relayControl, relayVpnIpNet, relayUdpAddr, _ := newSimpleServer(cert.Version2, ca, caKey, "relay ", "10.128.0.128/24,fc00::128/64", m{"relay": m{"am_relay": true}}) @@ -369,6 +374,7 @@ func TestCrossStackRelaysWork(t *testing.T) { } func TestCloseTunnelAuthenticated(t *testing.T) { + t.Parallel() ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{}) myControl, myVpnIpNet, myUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "me", "10.128.0.1/24", m{"tunnels": m{"drop_inactive": true, "inactivity_timeout": "5s"}}) theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "them", "10.128.0.2/24", m{"tunnels": m{"drop_inactive": true, "inactivity_timeout": "10m"}}) diff --git a/go.mod b/go.mod index 0de2df7d..24d901c5 100644 --- a/go.mod +++ b/go.mod @@ -22,6 +22,7 @@ require ( github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6 github.com/stretchr/testify v1.11.1 github.com/vishvananda/netlink v1.3.1 + go.uber.org/goleak v1.3.0 go.yaml.in/yaml/v3 v3.0.4 golang.org/x/crypto v0.50.0 golang.org/x/exp v0.0.0-20230725093048-515e97ebf090