From 6c7ebb08759ddfea0c628bcc7c3069d379edee1b Mon Sep 17 00:00:00 2001 From: Nate Brown Date: Fri, 15 May 2026 15:35:49 -0500 Subject: [PATCH] Reset static host list addresses on change (#1713) --- lighthouse.go | 25 +++++++-- lighthouse_test.go | 126 ++++++++++++++++++++++++++++++++++++++++++++ remote_list.go | 25 +++++++++ remote_list_test.go | 89 +++++++++++++++++++++++++++++++ 4 files changed, 261 insertions(+), 4 deletions(-) diff --git a/lighthouse.go b/lighthouse.go index 1a136a1b..d23e84b8 100644 --- a/lighthouse.go +++ b/lighthouse.go @@ -272,16 +272,18 @@ func (lh *LightHouse) reload(c *config.C, initial bool) error { //NOTE: many things will get much simpler when we combine static_host_map and lighthouse.hosts in config if initial || c.HasChanged("static_host_map") || c.HasChanged("static_map.cadence") || c.HasChanged("static_map.network") || c.HasChanged("static_map.lookup_timeout") { // Clean up. Entries still in the static_host_map will be re-built. - // Entries no longer present must have their (possible) background DNS goroutines stopped. - if existingStaticList := lh.staticList.Load(); existingStaticList != nil { + ourselves := lh.myVpnNetworks[0].Addr() + oldStaticList := lh.staticList.Load() + if oldStaticList != nil { lh.RLock() - for staticVpnAddr := range *existingStaticList { + for staticVpnAddr := range *oldStaticList { if am, ok := lh.addrMap[staticVpnAddr]; ok && am != nil { - am.hr.Cancel() + am.ResetForOwner(ourselves) } } lh.RUnlock() } + // Build a new list based on current config. staticList := make(map[netip.Addr]struct{}) err := lh.loadStaticMap(c, staticList) @@ -289,6 +291,21 @@ func (lh *LightHouse) reload(c *config.C, initial bool) error { return err } + // For entries removed from static_host_map, stop the DNS goroutine and drop the cached addrs. + // All addrs must come from the lighthouses now that it's no longer a static host. + if oldStaticList != nil { + lh.RLock() + for staticVpnAddr := range *oldStaticList { + if _, stillStatic := staticList[staticVpnAddr]; stillStatic { + continue + } + if am, ok := lh.addrMap[staticVpnAddr]; ok && am != nil { + am.ClearHostnameResults() + } + } + lh.RUnlock() + } + lh.staticList.Store(&staticList) if !initial { if c.HasChanged("static_host_map") { diff --git a/lighthouse_test.go b/lighthouse_test.go index c57c44ec..81c883ff 100644 --- a/lighthouse_test.go +++ b/lighthouse_test.go @@ -303,6 +303,132 @@ func TestLighthouse_reload(t *testing.T) { require.NoError(t, err) } +// TestLighthouse_reloadStaticHostMap verifies that reloading static_host_map applies the new +// config rather than appending to it. See issue #718. +func TestLighthouse_reloadStaticHostMap(t *testing.T) { + l := test.NewLogger() + c := config.NewC(l) + c.Settings["lighthouse"] = map[string]any{"am_lighthouse": true} + c.Settings["listen"] = map[string]any{"port": 4242} + c.Settings["static_host_map"] = map[string]any{ + "10.128.0.2": []any{"1.1.1.1:4242"}, + } + + myVpnNet := netip.MustParsePrefix("10.128.0.1/24") + nt := new(bart.Lite) + nt.Insert(myVpnNet) + cs := &CertState{ + myVpnNetworks: []netip.Prefix{myVpnNet}, + myVpnNetworksTable: nt, + } + + lh, err := NewLightHouseFromConfig(t.Context(), l, c, cs, nil, nil) + require.NoError(t, err) + + staticHost := netip.MustParseAddr("10.128.0.2") + otherHost := netip.MustParseAddr("10.128.0.3") + + // Capture the RemoteList pointer up front; an in-flight handshake would hold the same one + // on hostinfo.remotes, so it must reflect every reload below. + pinned := lh.Query(staticHost) + require.NotNil(t, pinned) + assert.Equal(t, []netip.AddrPort{netip.MustParseAddrPort("1.1.1.1:4242")}, pinned.CopyAddrs([]netip.Prefix{})) + + // Replace the remote address. The new address should be the only entry. + nc := map[string]any{ + "static_host_map": map[string]any{ + "10.128.0.2": []any{"2.2.2.2:4242"}, + }, + } + rc, err := yaml.Marshal(nc) + require.NoError(t, err) + require.NoError(t, c.ReloadConfigString(string(rc))) + + rl := lh.Query(staticHost) + require.NotNil(t, rl) + assert.Same(t, pinned, rl, "RemoteList pointer must stay stable so in-flight handshakes pick up the change") + assert.Equal(t, []netip.AddrPort{netip.MustParseAddrPort("2.2.2.2:4242")}, rl.CopyAddrs([]netip.Prefix{})) + + // Reload back to the original IP. Mirrors the round-trip in issue #718 step 6-8 where + // the buggy reload produced [1.1.1.1, 2.2.2.2, 1.1.1.1] instead of [1.1.1.1]. + nc = map[string]any{ + "static_host_map": map[string]any{ + "10.128.0.2": []any{"1.1.1.1:4242"}, + }, + } + rc, err = yaml.Marshal(nc) + require.NoError(t, err) + require.NoError(t, c.ReloadConfigString(string(rc))) + + rl = lh.Query(staticHost) + require.NotNil(t, rl) + assert.Same(t, pinned, rl) + assert.Equal(t, []netip.AddrPort{netip.MustParseAddrPort("1.1.1.1:4242")}, rl.CopyAddrs([]netip.Prefix{})) + + // Reload with the same config. An unchanged entry must not duplicate. + require.NoError(t, c.ReloadConfigString(string(rc))) + + rl = lh.Query(staticHost) + require.NotNil(t, rl) + assert.Same(t, pinned, rl) + assert.Equal(t, []netip.AddrPort{netip.MustParseAddrPort("1.1.1.1:4242")}, rl.CopyAddrs([]netip.Prefix{})) + + // Switch back to 2.2.2.2 so the rest of the test continues against a known address. + nc = map[string]any{ + "static_host_map": map[string]any{ + "10.128.0.2": []any{"2.2.2.2:4242"}, + }, + } + rc, err = yaml.Marshal(nc) + require.NoError(t, err) + require.NoError(t, c.ReloadConfigString(string(rc))) + + // Add a second host alongside the first. Both should be present, neither duplicated. + nc = map[string]any{ + "static_host_map": map[string]any{ + "10.128.0.2": []any{"2.2.2.2:4242"}, + "10.128.0.3": []any{"3.3.3.3:4242"}, + }, + } + rc, err = yaml.Marshal(nc) + require.NoError(t, err) + require.NoError(t, c.ReloadConfigString(string(rc))) + + rl = lh.Query(staticHost) + require.NotNil(t, rl) + assert.Same(t, pinned, rl, "adding a sibling entry must not displace the existing RemoteList") + assert.Equal(t, []netip.AddrPort{netip.MustParseAddrPort("2.2.2.2:4242")}, rl.CopyAddrs([]netip.Prefix{})) + + rl = lh.Query(otherHost) + require.NotNil(t, rl) + assert.Equal(t, []netip.AddrPort{netip.MustParseAddrPort("3.3.3.3:4242")}, rl.CopyAddrs([]netip.Prefix{})) + + // Drop the first host entirely. The vpnAddr is no longer marked static, our owner + // contribution is cleared, but the addrMap entry stays in place so non-static cache + // data (from lighthouse queries) on the same RemoteList isn't lost. In-flight handshakes + // that already had the pointer see an empty address list rather than retrying stale ones. + nc = map[string]any{ + "static_host_map": map[string]any{ + "10.128.0.3": []any{"3.3.3.3:4242"}, + }, + } + rc, err = yaml.Marshal(nc) + require.NoError(t, err) + require.NoError(t, c.ReloadConfigString(string(rc))) + + _, isStatic := lh.GetStaticHostList()[staticHost] + assert.False(t, isStatic) + + rl = lh.Query(staticHost) + require.NotNil(t, rl) + assert.Same(t, pinned, rl) + assert.Empty(t, rl.CopyAddrs([]netip.Prefix{})) + + rl = lh.Query(otherHost) + require.NotNil(t, rl) + assert.Equal(t, []netip.AddrPort{netip.MustParseAddrPort("3.3.3.3:4242")}, rl.CopyAddrs([]netip.Prefix{})) +} + func newLHHostRequest(fromAddr netip.AddrPort, myVpnIp, queryVpnIp netip.Addr, lhh *LightHouseHandler) testLhReply { req := &NebulaMeta{ Type: NebulaMeta_HostQuery, diff --git a/remote_list.go b/remote_list.go index 7b95de87..ef6eb794 100644 --- a/remote_list.go +++ b/remote_list.go @@ -239,6 +239,31 @@ func (r *RemoteList) unlockedSetHostnamesResults(hr *hostnamesResults) { r.hr = hr } +// ResetForOwner zeros the reported address slices for the given owner and marks the addrs list dirty. +// Any pending hostname resolution will be canceled. +func (r *RemoteList) ResetForOwner(ownerVpnAddr netip.Addr) { + r.Lock() + defer r.Unlock() + r.hr.Cancel() + if c, ok := r.cache[ownerVpnAddr]; ok { + if c.v4 != nil { + c.v4.reported = c.v4.reported[:0] + } + if c.v6 != nil { + c.v6.reported = c.v6.reported[:0] + } + } + r.shouldRebuild = true +} + +// ClearHostnameResults cancels the in-flight DNS resolver goroutine (if any) and drops the resolved IP cache. +func (r *RemoteList) ClearHostnameResults() { + r.Lock() + defer r.Unlock() + r.unlockedSetHostnamesResults(nil) + r.shouldRebuild = true +} + // Len locks and reports the size of the deduplicated address list // The deduplication work may need to occur here, so you must pass preferredRanges func (r *RemoteList) Len(preferredRanges []netip.Prefix) int { diff --git a/remote_list_test.go b/remote_list_test.go index 0caf86a4..0b5b7d5d 100644 --- a/remote_list_test.go +++ b/remote_list_test.go @@ -6,8 +6,22 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) +// trackedHostnameResults builds a *hostnamesResults with a known cancel function and a +// pre-populated ips map so tests can assert cancellation and verify previously-resolved +// IPs survive a cancel without spinning up a real DNS resolver. +func trackedHostnameResults(cancelFn func(), addrs ...string) *hostnamesResults { + hr := &hostnamesResults{cancelFn: cancelFn} + ips := map[netip.AddrPort]struct{}{} + for _, a := range addrs { + ips[netip.MustParseAddrPort(a)] = struct{}{} + } + hr.ips.Store(&ips) + return hr +} + func TestRemoteList_Rebuild(t *testing.T) { rl := NewRemoteList([]netip.Addr{netip.MustParseAddr("0.0.0.0")}, nil) rl.unlockedSetV4( @@ -112,6 +126,81 @@ func TestRemoteList_Rebuild(t *testing.T) { assert.Equal(t, "172.31.0.1:10101", rl.addrs[9].String()) } +func TestRemoteList_ResetForOwner(t *testing.T) { + ourselves := netip.MustParseAddr("10.0.0.1") + otherOwner := netip.MustParseAddr("10.0.0.2") + vpnAddr := netip.MustParseAddr("10.0.0.99") + + rl := NewRemoteList([]netip.Addr{vpnAddr}, nil) + rl.unlockedSetV4(ourselves, vpnAddr, + []*V4AddrPort{newIp4AndPortFromString("1.1.1.1:4242")}, + func(netip.Addr, *V4AddrPort) bool { return true }, + ) + rl.unlockedSetV6(ourselves, vpnAddr, + []*V6AddrPort{newIp6AndPortFromString("[1::1]:4242")}, + func(netip.Addr, *V6AddrPort) bool { return true }, + ) + rl.unlockedSetV4(otherOwner, vpnAddr, + []*V4AddrPort{newIp4AndPortFromString("2.2.2.2:4242")}, + func(netip.Addr, *V4AddrPort) bool { return true }, + ) + + canceled := 0 + hr := trackedHostnameResults(func() { canceled++ }, "3.3.3.3:4242") + rl.Lock() + rl.unlockedSetHostnamesResults(hr) + rl.Unlock() + + rl.ResetForOwner(ourselves) + + rl.RLock() + defer rl.RUnlock() + assert.Empty(t, rl.cache[ourselves].v4.reported, "our v4 reported should be cleared") + assert.Empty(t, rl.cache[ourselves].v6.reported, "our v6 reported should be cleared") + assert.Len(t, rl.cache[otherOwner].v4.reported, 1, "other owner's contribution must be preserved") + assert.Equal(t, "2.2.2.2:4242", protoV4AddrPortToNetAddrPort(rl.cache[otherOwner].v4.reported[0]).String()) + assert.Equal(t, 1, canceled, "DNS resolution goroutine should be canceled") + assert.Same(t, hr, rl.hr, "hostnamesResults must be preserved so DNS-resolved IPs keep feeding addrs until replaced") + assert.NotEmpty(t, rl.hr.GetAddrs(), "previously-resolved IPs should still be readable after cancel") + assert.True(t, rl.shouldRebuild, "shouldRebuild must be set so the next Rebuild recomputes addrs") +} + +func TestRemoteList_ResetForOwner_NoEntry(t *testing.T) { + // An owner with no cache entry must not panic; shouldRebuild is still set and any + // existing hostnamesResults is canceled. + rl := NewRemoteList([]netip.Addr{netip.MustParseAddr("10.0.0.99")}, nil) + canceled := 0 + rl.Lock() + rl.unlockedSetHostnamesResults(trackedHostnameResults(func() { canceled++ }, "3.3.3.3:4242")) + rl.Unlock() + + rl.ResetForOwner(netip.MustParseAddr("10.0.0.1")) + + rl.RLock() + defer rl.RUnlock() + assert.Equal(t, 1, canceled) + assert.True(t, rl.shouldRebuild) +} + +func TestRemoteList_ClearHostnameResults(t *testing.T) { + rl := NewRemoteList([]netip.Addr{netip.MustParseAddr("10.0.0.99")}, nil) + + canceled := 0 + hr := trackedHostnameResults(func() { canceled++ }, "3.3.3.3:4242") + rl.Lock() + rl.unlockedSetHostnamesResults(hr) + rl.Unlock() + require.NotEmpty(t, hr.GetAddrs(), "hostnamesResults should have its fastrack IPs populated") + + rl.ClearHostnameResults() + + rl.RLock() + defer rl.RUnlock() + assert.Equal(t, 1, canceled, "DNS resolution goroutine should be canceled") + assert.Nil(t, rl.hr, "hostnamesResults should be dropped") + assert.True(t, rl.shouldRebuild) +} + func BenchmarkFullRebuild(b *testing.B) { rl := NewRemoteList([]netip.Addr{netip.MustParseAddr("0.0.0.0")}, nil) rl.unlockedSetV4(