mirror of
https://github.com/slackhq/nebula.git
synced 2026-06-30 18:40:29 +02:00
Reduce relay log spam (#1733)
This commit is contained in:
@@ -83,6 +83,7 @@ type HandshakeHostInfo struct {
|
|||||||
initiatingVersionOverride cert.Version // Should we use a non-default cert version for this handshake?
|
initiatingVersionOverride cert.Version // Should we use a non-default cert version for this handshake?
|
||||||
counter int64 // How many attempts have we made so far
|
counter int64 // How many attempts have we made so far
|
||||||
lastRemotes []netip.AddrPort // Remotes that we sent to during the previous attempt
|
lastRemotes []netip.AddrPort // Remotes that we sent to during the previous attempt
|
||||||
|
lastRelays []netip.Addr // Relays we attempted to use during the previous attempt
|
||||||
packetStore []*cachedPacket // A set of packets to be transmitted once the handshake completes
|
packetStore []*cachedPacket // A set of packets to be transmitted once the handshake completes
|
||||||
|
|
||||||
hostinfo *HostInfo
|
hostinfo *HostInfo
|
||||||
@@ -323,7 +324,7 @@ func (hm *HandshakeManager) handleOutbound(vpnIp netip.Addr, lighthouseTriggered
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
hm.f.relayManager.StartRelays(hm.f, vpnIp, hostinfo, stage0)
|
hm.f.relayManager.StartRelays(hm.f, vpnIp, hh, stage0)
|
||||||
|
|
||||||
// If a lighthouse triggered this attempt then we are still in the timer wheel and do not need to re-add
|
// If a lighthouse triggered this attempt then we are still in the timer wheel and do not need to re-add
|
||||||
if !lighthouseTriggered {
|
if !lighthouseTriggered {
|
||||||
|
|||||||
+37
-18
@@ -7,6 +7,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
|
"slices"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
|
||||||
"github.com/slackhq/nebula/cert"
|
"github.com/slackhq/nebula/cert"
|
||||||
@@ -57,14 +58,25 @@ func (rm *relayManager) GetUseRelays() bool {
|
|||||||
// For each candidate relay it either kicks off a handshake to the relay, sends a CreateRelayRequest, retransmits
|
// For each candidate relay it either kicks off a handshake to the relay, sends a CreateRelayRequest, retransmits
|
||||||
// one that may have been lost, or, once the relay is Established, forwards the in-progress
|
// one that may have been lost, or, once the relay is Established, forwards the in-progress
|
||||||
// stage 0 handshake packet for vpnIp through it.
|
// stage 0 handshake packet for vpnIp through it.
|
||||||
func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *HostInfo, stage0 []byte) {
|
func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hh *HandshakeHostInfo, stage0 []byte) {
|
||||||
|
hostinfo := hh.hostinfo
|
||||||
if !rm.GetUseRelays() || len(hostinfo.remotes.relays) == 0 {
|
if !rm.GetUseRelays() || len(hostinfo.remotes.relays) == 0 {
|
||||||
|
hh.lastRelays = nil
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
hostinfo.logger(rm.l).Info("Attempt to relay through hosts", "relays", hostinfo.remotes.relays)
|
relays := hostinfo.remotes.relays
|
||||||
|
listLevel := slog.LevelDebug
|
||||||
|
prior := hh.lastRelays
|
||||||
|
if !slices.Equal(relays, prior) {
|
||||||
|
listLevel = slog.LevelInfo
|
||||||
|
hh.lastRelays = slices.Clone(relays)
|
||||||
|
}
|
||||||
|
hl := hostinfo.logger(rm.l)
|
||||||
|
hl.Log(context.Background(), listLevel, "Attempt to relay through hosts", "relays", relays)
|
||||||
|
|
||||||
// Send a RelayRequest to all known Relay IP's
|
// Send a RelayRequest to all known Relay IP's
|
||||||
for _, relay := range hostinfo.remotes.relays {
|
for _, relay := range relays {
|
||||||
// Don't relay through the host I'm trying to connect to
|
// Don't relay through the host I'm trying to connect to
|
||||||
if relay == vpnIp {
|
if relay == vpnIp {
|
||||||
continue
|
continue
|
||||||
@@ -75,12 +87,19 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Each relay's per-attempt log fires at Info on the first time we hit it and Debug after that.
|
||||||
|
level := slog.LevelInfo
|
||||||
|
if slices.Contains(prior, relay) {
|
||||||
|
level = slog.LevelDebug
|
||||||
|
}
|
||||||
|
|
||||||
relayHostInfo := rm.hostmap.QueryVpnAddr(relay)
|
relayHostInfo := rm.hostmap.QueryVpnAddr(relay)
|
||||||
if relayHostInfo == nil || !relayHostInfo.remote.IsValid() {
|
if relayHostInfo == nil || !relayHostInfo.remote.IsValid() {
|
||||||
hostinfo.logger(rm.l).Info("Establish tunnel to relay target", "relay", relay.String())
|
hl.Log(context.Background(), level, "Establish tunnel to relay target", "relay", relay.String())
|
||||||
f.Handshake(relay)
|
f.Handshake(relay)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check the relay HostInfo to see if we already established a relay through
|
// Check the relay HostInfo to see if we already established a relay through
|
||||||
existingRelay, ok := relayHostInfo.relayState.QueryRelayForByIp(vpnIp)
|
existingRelay, ok := relayHostInfo.relayState.QueryRelayForByIp(vpnIp)
|
||||||
if !ok {
|
if !ok {
|
||||||
@@ -88,7 +107,7 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
|||||||
if relayHostInfo.remote.IsValid() {
|
if relayHostInfo.remote.IsValid() {
|
||||||
idx, err := AddRelay(rm.l, relayHostInfo, rm.hostmap, vpnIp, nil, TerminalType, Requested)
|
idx, err := AddRelay(rm.l, relayHostInfo, rm.hostmap, vpnIp, nil, TerminalType, Requested)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
hostinfo.logger(rm.l).Info("Failed to add relay to hostmap", "relay", relay.String(), "error", err)
|
hl.Info("Failed to add relay to hostmap", "relay", relay.String(), "error", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
m := NebulaControl{
|
m := NebulaControl{
|
||||||
@@ -99,12 +118,12 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
|||||||
switch relayHostInfo.GetCert().Certificate.Version() {
|
switch relayHostInfo.GetCert().Certificate.Version() {
|
||||||
case cert.Version1:
|
case cert.Version1:
|
||||||
if !f.myVpnAddrs[0].Is4() {
|
if !f.myVpnAddrs[0].Is4() {
|
||||||
hostinfo.logger(rm.l).Error("can not establish v1 relay with a v6 network because the relay is not running a current nebula version")
|
hl.Error("can not establish v1 relay with a v6 network because the relay is not running a current nebula version")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if !vpnIp.Is4() {
|
if !vpnIp.Is4() {
|
||||||
hostinfo.logger(rm.l).Error("can not establish v1 relay with a v6 remote network because the relay is not running a current nebula version")
|
hl.Error("can not establish v1 relay with a v6 remote network because the relay is not running a current nebula version")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -116,16 +135,16 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
|||||||
m.RelayFromAddr = netAddrToProtoAddr(f.myVpnAddrs[0])
|
m.RelayFromAddr = netAddrToProtoAddr(f.myVpnAddrs[0])
|
||||||
m.RelayToAddr = netAddrToProtoAddr(vpnIp)
|
m.RelayToAddr = netAddrToProtoAddr(vpnIp)
|
||||||
default:
|
default:
|
||||||
hostinfo.logger(rm.l).Error("Unknown certificate version found while creating relay")
|
hl.Error("Unknown certificate version found while creating relay")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
msg, err := m.Marshal()
|
msg, err := m.Marshal()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
hostinfo.logger(rm.l).Error("Failed to marshal Control message to create relay", "error", err)
|
hl.Error("Failed to marshal Control message to create relay", "error", err)
|
||||||
} else {
|
} else {
|
||||||
f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
|
f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
|
||||||
rm.l.Info("send CreateRelayRequest",
|
rm.l.Log(context.Background(), level, "send CreateRelayRequest",
|
||||||
"relayFrom", f.myVpnAddrs[0],
|
"relayFrom", f.myVpnAddrs[0],
|
||||||
"relayTo", vpnIp,
|
"relayTo", vpnIp,
|
||||||
"initiatorRelayIndex", idx,
|
"initiatorRelayIndex", idx,
|
||||||
@@ -138,14 +157,14 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
|||||||
|
|
||||||
switch existingRelay.State {
|
switch existingRelay.State {
|
||||||
case Established:
|
case Established:
|
||||||
hostinfo.logger(rm.l).Info("Send handshake via relay", "relay", relay.String())
|
hl.Log(context.Background(), level, "Send handshake via relay", "relay", relay.String())
|
||||||
f.SendVia(relayHostInfo, existingRelay, stage0, make([]byte, 12), make([]byte, mtu), false)
|
f.SendVia(relayHostInfo, existingRelay, stage0, make([]byte, 12), make([]byte, mtu), false)
|
||||||
case Disestablished:
|
case Disestablished:
|
||||||
// Mark this relay as 'requested'
|
// Mark this relay as 'requested'
|
||||||
relayHostInfo.relayState.UpdateRelayForByIpState(vpnIp, Requested)
|
relayHostInfo.relayState.UpdateRelayForByIpState(vpnIp, Requested)
|
||||||
fallthrough
|
fallthrough
|
||||||
case Requested:
|
case Requested:
|
||||||
hostinfo.logger(rm.l).Info("Re-send CreateRelay request", "relay", relay.String())
|
hl.Log(context.Background(), level, "Re-send CreateRelay request", "relay", relay.String())
|
||||||
// Re-send the CreateRelay request, in case the previous one was lost.
|
// Re-send the CreateRelay request, in case the previous one was lost.
|
||||||
m := NebulaControl{
|
m := NebulaControl{
|
||||||
Type: NebulaControl_CreateRelayRequest,
|
Type: NebulaControl_CreateRelayRequest,
|
||||||
@@ -155,12 +174,12 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
|||||||
switch relayHostInfo.GetCert().Certificate.Version() {
|
switch relayHostInfo.GetCert().Certificate.Version() {
|
||||||
case cert.Version1:
|
case cert.Version1:
|
||||||
if !f.myVpnAddrs[0].Is4() {
|
if !f.myVpnAddrs[0].Is4() {
|
||||||
hostinfo.logger(rm.l).Error("can not establish v1 relay with a v6 network because the relay is not running a current nebula version")
|
hl.Error("can not establish v1 relay with a v6 network because the relay is not running a current nebula version")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if !vpnIp.Is4() {
|
if !vpnIp.Is4() {
|
||||||
hostinfo.logger(rm.l).Error("can not establish v1 relay with a v6 remote network because the relay is not running a current nebula version")
|
hl.Error("can not establish v1 relay with a v6 remote network because the relay is not running a current nebula version")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -172,16 +191,16 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
|||||||
m.RelayFromAddr = netAddrToProtoAddr(f.myVpnAddrs[0])
|
m.RelayFromAddr = netAddrToProtoAddr(f.myVpnAddrs[0])
|
||||||
m.RelayToAddr = netAddrToProtoAddr(vpnIp)
|
m.RelayToAddr = netAddrToProtoAddr(vpnIp)
|
||||||
default:
|
default:
|
||||||
hostinfo.logger(rm.l).Error("Unknown certificate version found while creating relay")
|
hl.Error("Unknown certificate version found while creating relay")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
msg, err := m.Marshal()
|
msg, err := m.Marshal()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
hostinfo.logger(rm.l).Error("Failed to marshal Control message to create relay", "error", err)
|
hl.Error("Failed to marshal Control message to create relay", "error", err)
|
||||||
} else {
|
} else {
|
||||||
// This must send over the hostinfo, not over hm.Hosts[ip]
|
// This must send over the hostinfo, not over hm.Hosts[ip]
|
||||||
f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
|
f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
|
||||||
rm.l.Info("send CreateRelayRequest",
|
rm.l.Log(context.Background(), level, "send CreateRelayRequest",
|
||||||
"relayFrom", f.myVpnAddrs[0],
|
"relayFrom", f.myVpnAddrs[0],
|
||||||
"relayTo", vpnIp,
|
"relayTo", vpnIp,
|
||||||
"initiatorRelayIndex", existingRelay.LocalIndex,
|
"initiatorRelayIndex", existingRelay.LocalIndex,
|
||||||
@@ -192,7 +211,7 @@ func (rm *relayManager) StartRelays(f *Interface, vpnIp netip.Addr, hostinfo *Ho
|
|||||||
// PeerRequested only occurs in Forwarding relays, not Terminal relays, and this is a Terminal relay case.
|
// PeerRequested only occurs in Forwarding relays, not Terminal relays, and this is a Terminal relay case.
|
||||||
fallthrough
|
fallthrough
|
||||||
default:
|
default:
|
||||||
hostinfo.logger(rm.l).Error("Relay unexpected state",
|
hl.Error("Relay unexpected state",
|
||||||
"vpnIp", vpnIp,
|
"vpnIp", vpnIp,
|
||||||
"state", existingRelay.State,
|
"state", existingRelay.State,
|
||||||
"relay", relay,
|
"relay", relay,
|
||||||
|
|||||||
@@ -0,0 +1,97 @@
|
|||||||
|
package nebula
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"log/slog"
|
||||||
|
"net/netip"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/gaissmai/bart"
|
||||||
|
"github.com/slackhq/nebula/test"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestStartRelaysLogDedupe verifies that repeated attempts with the same relay set drop the log
|
||||||
|
// chatter to Debug, mirroring how the normal handshake retry loop quiets down once it's already
|
||||||
|
// announced its targets.
|
||||||
|
func TestStartRelaysLogDedupe(t *testing.T) {
|
||||||
|
vpnIp := netip.MustParseAddr("100.64.99.4")
|
||||||
|
otherRelay := netip.MustParseAddr("100.64.99.5")
|
||||||
|
|
||||||
|
newHH := func() *HandshakeHostInfo {
|
||||||
|
// Use the target's own vpnIp as the "relay" so the loop body skips it without
|
||||||
|
// touching any sender-side state. That isolates the test to the level-selection
|
||||||
|
// behavior of the top-level "Attempt to relay through hosts" log.
|
||||||
|
hostinfo := &HostInfo{
|
||||||
|
vpnAddrs: []netip.Addr{vpnIp},
|
||||||
|
localIndexId: 1,
|
||||||
|
remotes: NewRemoteList([]netip.Addr{vpnIp}, nil),
|
||||||
|
}
|
||||||
|
hostinfo.remotes.relays = []netip.Addr{vpnIp}
|
||||||
|
return &HandshakeHostInfo{hostinfo: hostinfo}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Park any extra relay addresses we'll introduce mid-test in myVpnAddrsTable so the loop
|
||||||
|
// body always skips before touching f.Handshake (which would need a real handshakeManager).
|
||||||
|
addrTable := new(bart.Lite)
|
||||||
|
addrTable.Insert(netip.PrefixFrom(otherRelay, otherRelay.BitLen()))
|
||||||
|
f := &Interface{myVpnAddrsTable: addrTable}
|
||||||
|
|
||||||
|
newRM := func(buf *bytes.Buffer) *relayManager {
|
||||||
|
l := test.NewLoggerWithOutputAndLevel(buf, slog.LevelDebug)
|
||||||
|
rm := &relayManager{l: l, hostmap: newHostMap(l)}
|
||||||
|
rm.useRelays.Store(true)
|
||||||
|
return rm
|
||||||
|
}
|
||||||
|
|
||||||
|
const msg = `msg="Attempt to relay through hosts"`
|
||||||
|
|
||||||
|
t.Run("first attempt logs at Info", func(t *testing.T) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
rm := newRM(&buf)
|
||||||
|
hh := newHH()
|
||||||
|
rm.StartRelays(f, vpnIp, hh, nil)
|
||||||
|
assert.Equal(t, []netip.Addr{vpnIp}, hh.lastRelays, "lastRelays should record the relay set we just attempted")
|
||||||
|
assert.Contains(t, buf.String(), "level=INFO "+msg, "expected Info level on first attempt")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("repeat attempt with same relays drops to Debug", func(t *testing.T) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
rm := newRM(&buf)
|
||||||
|
hh := newHH()
|
||||||
|
rm.StartRelays(f, vpnIp, hh, nil)
|
||||||
|
first := append([]netip.Addr(nil), hh.lastRelays...)
|
||||||
|
buf.Reset()
|
||||||
|
rm.StartRelays(f, vpnIp, hh, nil)
|
||||||
|
assert.Equal(t, first, hh.lastRelays)
|
||||||
|
assert.Contains(t, buf.String(), "level=DEBUG "+msg, "expected Debug level on identical retry")
|
||||||
|
assert.NotContains(t, buf.String(), "level=INFO "+msg, "Info should not fire on identical retry")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("changed relay list bumps back to Info", func(t *testing.T) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
rm := newRM(&buf)
|
||||||
|
hh := newHH()
|
||||||
|
rm.StartRelays(f, vpnIp, hh, nil)
|
||||||
|
buf.Reset()
|
||||||
|
|
||||||
|
// The lighthouse handed us a new set this round.
|
||||||
|
hh.hostinfo.remotes.relays = []netip.Addr{vpnIp, otherRelay}
|
||||||
|
|
||||||
|
rm.StartRelays(f, vpnIp, hh, nil)
|
||||||
|
assert.Equal(t, []netip.Addr{vpnIp, otherRelay}, hh.lastRelays)
|
||||||
|
assert.Contains(t, buf.String(), "level=INFO "+msg, "expected Info when the relay list changes")
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("disabled relays clears lastRelays and emits no Attempt log", func(t *testing.T) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
rm := newRM(&buf)
|
||||||
|
rm.useRelays.Store(false)
|
||||||
|
hh := newHH()
|
||||||
|
hh.lastRelays = []netip.Addr{vpnIp}
|
||||||
|
|
||||||
|
rm.StartRelays(f, vpnIp, hh, nil)
|
||||||
|
assert.Nil(t, hh.lastRelays, "with relays disabled lastRelays should be cleared")
|
||||||
|
assert.NotContains(t, buf.String(), msg, "should not log when we shortcut out")
|
||||||
|
})
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user