mirror of
https://github.com/slackhq/nebula.git
synced 2026-06-30 18:40:29 +02:00
Reduce relay log spam (#1733)
This commit is contained in:
@@ -83,6 +83,7 @@ type HandshakeHostInfo struct {
|
||||
initiatingVersionOverride cert.Version // Should we use a non-default cert version for this handshake?
|
||||
counter int64 // How many attempts have we made so far
|
||||
lastRemotes []netip.AddrPort // Remotes that we sent to during the previous attempt
|
||||
lastRelays []netip.Addr // Relays we attempted to use during the previous attempt
|
||||
packetStore []*cachedPacket // A set of packets to be transmitted once the handshake completes
|
||||
|
||||
hostinfo *HostInfo
|
||||
@@ -323,7 +324,7 @@ func (hm *HandshakeManager) handleOutbound(vpnIp netip.Addr, lighthouseTriggered
|
||||
)
|
||||
}
|
||||
|
||||
hm.f.relayManager.StartRelays(hm.f, vpnIp, hostinfo, stage0)
|
||||
hm.f.relayManager.StartRelays(hm.f, vpnIp, hh, stage0)
|
||||
|
||||
// If a lighthouse triggered this attempt then we are still in the timer wheel and do not need to re-add
|
||||
if !lighthouseTriggered {
|
||||
|
||||
+37
-18
@@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/netip"
|
||||
"slices"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/slackhq/nebula/cert"
|
||||
@@ -57,14 +58,25 @@ func (rm *relayManager) GetUseRelays() bool {
|
||||
// For each candidate relay it either kicks off a handshake to the relay, sends a CreateRelayRequest, retransmits
|
||||
// one that may have been lost, or, once the relay is Established, forwards the in-progress
|
||||
// stage 0 handshake packet for vpnIp through it.
|
||||
func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *HostInfo, stage0 []byte) {
|
||||
func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hh *HandshakeHostInfo, stage0 []byte) {
|
||||
hostinfo := hh.hostinfo
|
||||
if !rm.GetUseRelays() || len(hostinfo.remotes.relays) == 0 {
|
||||
hh.lastRelays = nil
|
||||
return
|
||||
}
|
||||
|
||||
hostinfo.logger(rm.l).Info("Attempt to relay through hosts", "relays", hostinfo.remotes.relays)
|
||||
relays := hostinfo.remotes.relays
|
||||
listLevel := slog.LevelDebug
|
||||
prior := hh.lastRelays
|
||||
if !slices.Equal(relays, prior) {
|
||||
listLevel = slog.LevelInfo
|
||||
hh.lastRelays = slices.Clone(relays)
|
||||
}
|
||||
hl := hostinfo.logger(rm.l)
|
||||
hl.Log(context.Background(), listLevel, "Attempt to relay through hosts", "relays", relays)
|
||||
|
||||
// Send a RelayRequest to all known Relay IP's
|
||||
for _, relay := range hostinfo.remotes.relays {
|
||||
for _, relay := range relays {
|
||||
// Don't relay through the host I'm trying to connect to
|
||||
if relay == vpnIp {
|
||||
continue
|
||||
@@ -75,12 +87,19 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
||||
continue
|
||||
}
|
||||
|
||||
// Each relay's per-attempt log fires at Info on the first time we hit it and Debug after that.
|
||||
level := slog.LevelInfo
|
||||
if slices.Contains(prior, relay) {
|
||||
level = slog.LevelDebug
|
||||
}
|
||||
|
||||
relayHostInfo := rm.hostmap.QueryVpnAddr(relay)
|
||||
if relayHostInfo == nil || !relayHostInfo.remote.IsValid() {
|
||||
hostinfo.logger(rm.l).Info("Establish tunnel to relay target", "relay", relay.String())
|
||||
hl.Log(context.Background(), level, "Establish tunnel to relay target", "relay", relay.String())
|
||||
f.Handshake(relay)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check the relay HostInfo to see if we already established a relay through
|
||||
existingRelay, ok := relayHostInfo.relayState.QueryRelayForByIp(vpnIp)
|
||||
if !ok {
|
||||
@@ -88,7 +107,7 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
||||
if relayHostInfo.remote.IsValid() {
|
||||
idx, err := AddRelay(rm.l, relayHostInfo, rm.hostmap, vpnIp, nil, TerminalType, Requested)
|
||||
if err != nil {
|
||||
hostinfo.logger(rm.l).Info("Failed to add relay to hostmap", "relay", relay.String(), "error", err)
|
||||
hl.Info("Failed to add relay to hostmap", "relay", relay.String(), "error", err)
|
||||
}
|
||||
|
||||
m := NebulaControl{
|
||||
@@ -99,12 +118,12 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
||||
switch relayHostInfo.GetCert().Certificate.Version() {
|
||||
case cert.Version1:
|
||||
if !f.myVpnAddrs[0].Is4() {
|
||||
hostinfo.logger(rm.l).Error("can not establish v1 relay with a v6 network because the relay is not running a current nebula version")
|
||||
hl.Error("can not establish v1 relay with a v6 network because the relay is not running a current nebula version")
|
||||
continue
|
||||
}
|
||||
|
||||
if !vpnIp.Is4() {
|
||||
hostinfo.logger(rm.l).Error("can not establish v1 relay with a v6 remote network because the relay is not running a current nebula version")
|
||||
hl.Error("can not establish v1 relay with a v6 remote network because the relay is not running a current nebula version")
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -116,16 +135,16 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
||||
m.RelayFromAddr = netAddrToProtoAddr(f.myVpnAddrs[0])
|
||||
m.RelayToAddr = netAddrToProtoAddr(vpnIp)
|
||||
default:
|
||||
hostinfo.logger(rm.l).Error("Unknown certificate version found while creating relay")
|
||||
hl.Error("Unknown certificate version found while creating relay")
|
||||
continue
|
||||
}
|
||||
|
||||
msg, err := m.Marshal()
|
||||
if err != nil {
|
||||
hostinfo.logger(rm.l).Error("Failed to marshal Control message to create relay", "error", err)
|
||||
hl.Error("Failed to marshal Control message to create relay", "error", err)
|
||||
} else {
|
||||
f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
|
||||
rm.l.Info("send CreateRelayRequest",
|
||||
rm.l.Log(context.Background(), level, "send CreateRelayRequest",
|
||||
"relayFrom", f.myVpnAddrs[0],
|
||||
"relayTo", vpnIp,
|
||||
"initiatorRelayIndex", idx,
|
||||
@@ -138,14 +157,14 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
||||
|
||||
switch existingRelay.State {
|
||||
case Established:
|
||||
hostinfo.logger(rm.l).Info("Send handshake via relay", "relay", relay.String())
|
||||
hl.Log(context.Background(), level, "Send handshake via relay", "relay", relay.String())
|
||||
f.SendVia(relayHostInfo, existingRelay, stage0, make([]byte, 12), make([]byte, mtu), false)
|
||||
case Disestablished:
|
||||
// Mark this relay as 'requested'
|
||||
relayHostInfo.relayState.UpdateRelayForByIpState(vpnIp, Requested)
|
||||
fallthrough
|
||||
case Requested:
|
||||
hostinfo.logger(rm.l).Info("Re-send CreateRelay request", "relay", relay.String())
|
||||
hl.Log(context.Background(), level, "Re-send CreateRelay request", "relay", relay.String())
|
||||
// Re-send the CreateRelay request, in case the previous one was lost.
|
||||
m := NebulaControl{
|
||||
Type: NebulaControl_CreateRelayRequest,
|
||||
@@ -155,12 +174,12 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
||||
switch relayHostInfo.GetCert().Certificate.Version() {
|
||||
case cert.Version1:
|
||||
if !f.myVpnAddrs[0].Is4() {
|
||||
hostinfo.logger(rm.l).Error("can not establish v1 relay with a v6 network because the relay is not running a current nebula version")
|
||||
hl.Error("can not establish v1 relay with a v6 network because the relay is not running a current nebula version")
|
||||
continue
|
||||
}
|
||||
|
||||
if !vpnIp.Is4() {
|
||||
hostinfo.logger(rm.l).Error("can not establish v1 relay with a v6 remote network because the relay is not running a current nebula version")
|
||||
hl.Error("can not establish v1 relay with a v6 remote network because the relay is not running a current nebula version")
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -172,16 +191,16 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
||||
m.RelayFromAddr = netAddrToProtoAddr(f.myVpnAddrs[0])
|
||||
m.RelayToAddr = netAddrToProtoAddr(vpnIp)
|
||||
default:
|
||||
hostinfo.logger(rm.l).Error("Unknown certificate version found while creating relay")
|
||||
hl.Error("Unknown certificate version found while creating relay")
|
||||
continue
|
||||
}
|
||||
msg, err := m.Marshal()
|
||||
if err != nil {
|
||||
hostinfo.logger(rm.l).Error("Failed to marshal Control message to create relay", "error", err)
|
||||
hl.Error("Failed to marshal Control message to create relay", "error", err)
|
||||
} else {
|
||||
// This must send over the hostinfo, not over hm.Hosts[ip]
|
||||
f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
|
||||
rm.l.Info("send CreateRelayRequest",
|
||||
rm.l.Log(context.Background(), level, "send CreateRelayRequest",
|
||||
"relayFrom", f.myVpnAddrs[0],
|
||||
"relayTo", vpnIp,
|
||||
"initiatorRelayIndex", existingRelay.LocalIndex,
|
||||
@@ -192,7 +211,7 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
||||
// PeerRequested only occurs in Forwarding relays, not Terminal relays, and this is a Terminal relay case.
|
||||
fallthrough
|
||||
default:
|
||||
hostinfo.logger(rm.l).Error("Relay unexpected state",
|
||||
hl.Error("Relay unexpected state",
|
||||
"vpnIp", vpnIp,
|
||||
"state", existingRelay.State,
|
||||
"relay", relay,
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
package nebula
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"log/slog"
|
||||
"net/netip"
|
||||
"testing"
|
||||
|
||||
"github.com/gaissmai/bart"
|
||||
"github.com/slackhq/nebula/test"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// TestStartRelaysLogDedupe verifies that repeated attempts with the same relay set drop the log
|
||||
// chatter to Debug, mirroring how the normal handshake retry loop quiets down once it's already
|
||||
// announced its targets.
|
||||
func TestStartRelaysLogDedupe(t *testing.T) {
|
||||
vpnIp := netip.MustParseAddr("100.64.99.4")
|
||||
otherRelay := netip.MustParseAddr("100.64.99.5")
|
||||
|
||||
newHH := func() *HandshakeHostInfo {
|
||||
// Use the target's own vpnIp as the "relay" so the loop body skips it without
|
||||
// touching any sender-side state. That isolates the test to the level-selection
|
||||
// behavior of the top-level "Attempt to relay through hosts" log.
|
||||
hostinfo := &HostInfo{
|
||||
vpnAddrs: []netip.Addr{vpnIp},
|
||||
localIndexId: 1,
|
||||
remotes: NewRemoteList([]netip.Addr{vpnIp}, nil),
|
||||
}
|
||||
hostinfo.remotes.relays = []netip.Addr{vpnIp}
|
||||
return &HandshakeHostInfo{hostinfo: hostinfo}
|
||||
}
|
||||
|
||||
// Park any extra relay addresses we'll introduce mid-test in myVpnAddrsTable so the loop
|
||||
// body always skips before touching f.Handshake (which would need a real handshakeManager).
|
||||
addrTable := new(bart.Lite)
|
||||
addrTable.Insert(netip.PrefixFrom(otherRelay, otherRelay.BitLen()))
|
||||
f := &Interface{myVpnAddrsTable: addrTable}
|
||||
|
||||
newRM := func(buf *bytes.Buffer) *relayManager {
|
||||
l := test.NewLoggerWithOutputAndLevel(buf, slog.LevelDebug)
|
||||
rm := &relayManager{l: l, hostmap: newHostMap(l)}
|
||||
rm.useRelays.Store(true)
|
||||
return rm
|
||||
}
|
||||
|
||||
const msg = `msg="Attempt to relay through hosts"`
|
||||
|
||||
t.Run("first attempt logs at Info", func(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
rm := newRM(&buf)
|
||||
hh := newHH()
|
||||
rm.StartRelays(f, vpnIp, hh, nil)
|
||||
assert.Equal(t, []netip.Addr{vpnIp}, hh.lastRelays, "lastRelays should record the relay set we just attempted")
|
||||
assert.Contains(t, buf.String(), "level=INFO "+msg, "expected Info level on first attempt")
|
||||
})
|
||||
|
||||
t.Run("repeat attempt with same relays drops to Debug", func(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
rm := newRM(&buf)
|
||||
hh := newHH()
|
||||
rm.StartRelays(f, vpnIp, hh, nil)
|
||||
first := append([]netip.Addr(nil), hh.lastRelays...)
|
||||
buf.Reset()
|
||||
rm.StartRelays(f, vpnIp, hh, nil)
|
||||
assert.Equal(t, first, hh.lastRelays)
|
||||
assert.Contains(t, buf.String(), "level=DEBUG "+msg, "expected Debug level on identical retry")
|
||||
assert.NotContains(t, buf.String(), "level=INFO "+msg, "Info should not fire on identical retry")
|
||||
})
|
||||
|
||||
t.Run("changed relay list bumps back to Info", func(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
rm := newRM(&buf)
|
||||
hh := newHH()
|
||||
rm.StartRelays(f, vpnIp, hh, nil)
|
||||
buf.Reset()
|
||||
|
||||
// The lighthouse handed us a new set this round.
|
||||
hh.hostinfo.remotes.relays = []netip.Addr{vpnIp, otherRelay}
|
||||
|
||||
rm.StartRelays(f, vpnIp, hh, nil)
|
||||
assert.Equal(t, []netip.Addr{vpnIp, otherRelay}, hh.lastRelays)
|
||||
assert.Contains(t, buf.String(), "level=INFO "+msg, "expected Info when the relay list changes")
|
||||
})
|
||||
|
||||
t.Run("disabled relays clears lastRelays and emits no Attempt log", func(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
rm := newRM(&buf)
|
||||
rm.useRelays.Store(false)
|
||||
hh := newHH()
|
||||
hh.lastRelays = []netip.Addr{vpnIp}
|
||||
|
||||
rm.StartRelays(f, vpnIp, hh, nil)
|
||||
assert.Nil(t, hh.lastRelays, "with relays disabled lastRelays should be cleared")
|
||||
assert.NotContains(t, buf.String(), msg, "should not log when we shortcut out")
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user