Clean up a hostinfo to reduce memory usage (#955)

This commit is contained in:
Nate Brown
2023-11-02 16:53:59 -05:00
committed by GitHub
parent 276978377a
commit a44e1b8b05
10 changed files with 240 additions and 265 deletions

View File

@@ -46,8 +46,8 @@ type HandshakeManager struct {
// Mutex for interacting with the vpnIps and indexes maps
sync.RWMutex
vpnIps map[iputil.VpnIp]*HostInfo
indexes map[uint32]*HostInfo
vpnIps map[iputil.VpnIp]*HandshakeHostInfo
indexes map[uint32]*HandshakeHostInfo
mainHostMap *HostMap
lightHouse *LightHouse
@@ -64,10 +64,47 @@ type HandshakeManager struct {
trigger chan iputil.VpnIp
}
type HandshakeHostInfo struct {
sync.Mutex
startTime time.Time // Time that we first started trying with this handshake
ready bool // Is the handshake ready
counter int // How many attempts have we made so far
lastRemotes []*udp.Addr // Remotes that we sent to during the previous attempt
packetStore []*cachedPacket // A set of packets to be transmitted once the handshake completes
hostinfo *HostInfo
}
func (hh *HandshakeHostInfo) cachePacket(l *logrus.Logger, t header.MessageType, st header.MessageSubType, packet []byte, f packetCallback, m *cachedPacketMetrics) {
if len(hh.packetStore) < 100 {
tempPacket := make([]byte, len(packet))
copy(tempPacket, packet)
hh.packetStore = append(hh.packetStore, &cachedPacket{t, st, f, tempPacket})
if l.Level >= logrus.DebugLevel {
hh.hostinfo.logger(l).
WithField("length", len(hh.packetStore)).
WithField("stored", true).
Debugf("Packet store")
}
} else {
m.dropped.Inc(1)
if l.Level >= logrus.DebugLevel {
hh.hostinfo.logger(l).
WithField("length", len(hh.packetStore)).
WithField("stored", false).
Debugf("Packet store")
}
}
}
func NewHandshakeManager(l *logrus.Logger, mainHostMap *HostMap, lightHouse *LightHouse, outside udp.Conn, config HandshakeConfig) *HandshakeManager {
return &HandshakeManager{
vpnIps: map[iputil.VpnIp]*HostInfo{},
indexes: map[uint32]*HostInfo{},
vpnIps: map[iputil.VpnIp]*HandshakeHostInfo{},
indexes: map[uint32]*HandshakeHostInfo{},
mainHostMap: mainHostMap,
lightHouse: lightHouse,
outside: outside,
@@ -97,6 +134,31 @@ func (c *HandshakeManager) Run(ctx context.Context) {
}
}
func (hm *HandshakeManager) HandleIncoming(addr *udp.Addr, via *ViaSender, packet []byte, h *header.H) {
// First remote allow list check before we know the vpnIp
if addr != nil {
if !hm.lightHouse.GetRemoteAllowList().AllowUnknownVpnIp(addr.IP) {
hm.l.WithField("udpAddr", addr).Debug("lighthouse.remote_allow_list denied incoming handshake")
return
}
}
switch h.Subtype {
case header.HandshakeIXPSK0:
switch h.MessageCounter {
case 1:
ixHandshakeStage1(hm.f, addr, via, packet, h)
case 2:
newHostinfo := hm.queryIndex(h.RemoteIndex)
tearDown := ixHandshakeStage2(hm.f, addr, via, newHostinfo, packet, h)
if tearDown && newHostinfo != nil {
hm.DeleteHostInfo(newHostinfo.hostinfo)
}
}
}
}
func (c *HandshakeManager) NextOutboundHandshakeTimerTick(now time.Time) {
c.OutboundHandshakeTimer.Advance(now)
for {
@@ -108,41 +170,35 @@ func (c *HandshakeManager) NextOutboundHandshakeTimerTick(now time.Time) {
}
}
func (c *HandshakeManager) handleOutbound(vpnIp iputil.VpnIp, lighthouseTriggered bool) {
hostinfo := c.QueryVpnIp(vpnIp)
if hostinfo == nil {
return
}
hostinfo.Lock()
defer hostinfo.Unlock()
// We may have raced to completion but now that we have a lock we should ensure we have not yet completed.
if hostinfo.HandshakeComplete {
// Ensure we don't exist in the pending hostmap anymore since we have completed
c.DeleteHostInfo(hostinfo)
func (hm *HandshakeManager) handleOutbound(vpnIp iputil.VpnIp, lighthouseTriggered bool) {
hh := hm.queryVpnIp(vpnIp)
if hh == nil {
return
}
hh.Lock()
defer hh.Unlock()
hostinfo := hh.hostinfo
// If we are out of time, clean up
if hostinfo.HandshakeCounter >= c.config.retries {
hostinfo.logger(c.l).WithField("udpAddrs", hostinfo.remotes.CopyAddrs(c.mainHostMap.preferredRanges)).
WithField("initiatorIndex", hostinfo.localIndexId).
WithField("remoteIndex", hostinfo.remoteIndexId).
if hh.counter >= hm.config.retries {
hh.hostinfo.logger(hm.l).WithField("udpAddrs", hh.hostinfo.remotes.CopyAddrs(hm.mainHostMap.preferredRanges)).
WithField("initiatorIndex", hh.hostinfo.localIndexId).
WithField("remoteIndex", hh.hostinfo.remoteIndexId).
WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
WithField("durationNs", time.Since(hostinfo.handshakeStart).Nanoseconds()).
WithField("durationNs", time.Since(hh.startTime).Nanoseconds()).
Info("Handshake timed out")
c.metricTimedOut.Inc(1)
c.DeleteHostInfo(hostinfo)
hm.metricTimedOut.Inc(1)
hm.DeleteHostInfo(hostinfo)
return
}
// Increment the counter to increase our delay, linear backoff
hostinfo.HandshakeCounter++
hh.counter++
// Check if we have a handshake packet to transmit yet
if !hostinfo.HandshakeReady {
if !ixHandshakeStage0(c.f, hostinfo) {
c.OutboundHandshakeTimer.Add(vpnIp, c.config.tryInterval*time.Duration(hostinfo.HandshakeCounter))
if !hh.ready {
if !ixHandshakeStage0(hm.f, hh) {
hm.OutboundHandshakeTimer.Add(vpnIp, hm.config.tryInterval*time.Duration(hh.counter))
return
}
}
@@ -152,11 +208,11 @@ func (c *HandshakeManager) handleOutbound(vpnIp iputil.VpnIp, lighthouseTriggere
// NB ^ This comment doesn't jive. It's how the thing gets initialized.
// It's the common path. Should it update every time, in case a future LH query/queries give us more info?
if hostinfo.remotes == nil {
hostinfo.remotes = c.lightHouse.QueryCache(vpnIp)
hostinfo.remotes = hm.lightHouse.QueryCache(vpnIp)
}
remotes := hostinfo.remotes.CopyAddrs(c.mainHostMap.preferredRanges)
remotesHaveChanged := !udp.AddrSlice(remotes).Equal(hostinfo.HandshakeLastRemotes)
remotes := hostinfo.remotes.CopyAddrs(hm.mainHostMap.preferredRanges)
remotesHaveChanged := !udp.AddrSlice(remotes).Equal(hh.lastRemotes)
// We only care about a lighthouse trigger if we have new remotes to send to.
// This is a very specific optimization for a fast lighthouse reply.
@@ -165,25 +221,25 @@ func (c *HandshakeManager) handleOutbound(vpnIp iputil.VpnIp, lighthouseTriggere
return
}
hostinfo.HandshakeLastRemotes = remotes
hh.lastRemotes = remotes
// TODO: this will generate a load of queries for hosts with only 1 ip
// (such as ones registered to the lighthouse with only a private IP)
// So we only do it one time after attempting 5 handshakes already.
if len(remotes) <= 1 && hostinfo.HandshakeCounter == 5 {
if len(remotes) <= 1 && hh.counter == 5 {
// If we only have 1 remote it is highly likely our query raced with the other host registered within the lighthouse
// Our vpnIp here has a tunnel with a lighthouse but has yet to send a host update packet there so we only know about
// the learned public ip for them. Query again to short circuit the promotion counter
c.lightHouse.QueryServer(vpnIp, c.f)
hm.lightHouse.QueryServer(vpnIp, hm.f)
}
// Send the handshake to all known ips, stage 2 takes care of assigning the hostinfo.remote based on the first to reply
var sentTo []*udp.Addr
hostinfo.remotes.ForEach(c.mainHostMap.preferredRanges, func(addr *udp.Addr, _ bool) {
c.messageMetrics.Tx(header.Handshake, header.MessageSubType(hostinfo.HandshakePacket[0][1]), 1)
err := c.outside.WriteTo(hostinfo.HandshakePacket[0], addr)
hostinfo.remotes.ForEach(hm.mainHostMap.preferredRanges, func(addr *udp.Addr, _ bool) {
hm.messageMetrics.Tx(header.Handshake, header.MessageSubType(hostinfo.HandshakePacket[0][1]), 1)
err := hm.outside.WriteTo(hostinfo.HandshakePacket[0], addr)
if err != nil {
hostinfo.logger(c.l).WithField("udpAddr", addr).
hostinfo.logger(hm.l).WithField("udpAddr", addr).
WithField("initiatorIndex", hostinfo.localIndexId).
WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
WithError(err).Error("Failed to send handshake message")
@@ -196,63 +252,63 @@ func (c *HandshakeManager) handleOutbound(vpnIp iputil.VpnIp, lighthouseTriggere
// Don't be too noisy or confusing if we fail to send a handshake - if we don't get through we'll eventually log a timeout,
// so only log when the list of remotes has changed
if remotesHaveChanged {
hostinfo.logger(c.l).WithField("udpAddrs", sentTo).
hostinfo.logger(hm.l).WithField("udpAddrs", sentTo).
WithField("initiatorIndex", hostinfo.localIndexId).
WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
Info("Handshake message sent")
} else if c.l.IsLevelEnabled(logrus.DebugLevel) {
hostinfo.logger(c.l).WithField("udpAddrs", sentTo).
} else if hm.l.IsLevelEnabled(logrus.DebugLevel) {
hostinfo.logger(hm.l).WithField("udpAddrs", sentTo).
WithField("initiatorIndex", hostinfo.localIndexId).
WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
Debug("Handshake message sent")
}
if c.config.useRelays && len(hostinfo.remotes.relays) > 0 {
hostinfo.logger(c.l).WithField("relays", hostinfo.remotes.relays).Info("Attempt to relay through hosts")
if hm.config.useRelays && len(hostinfo.remotes.relays) > 0 {
hostinfo.logger(hm.l).WithField("relays", hostinfo.remotes.relays).Info("Attempt to relay through hosts")
// Send a RelayRequest to all known Relay IP's
for _, relay := range hostinfo.remotes.relays {
// Don't relay to myself, and don't relay through the host I'm trying to connect to
if *relay == vpnIp || *relay == c.lightHouse.myVpnIp {
if *relay == vpnIp || *relay == hm.lightHouse.myVpnIp {
continue
}
relayHostInfo := c.mainHostMap.QueryVpnIp(*relay)
relayHostInfo := hm.mainHostMap.QueryVpnIp(*relay)
if relayHostInfo == nil || relayHostInfo.remote == nil {
hostinfo.logger(c.l).WithField("relay", relay.String()).Info("Establish tunnel to relay target")
c.f.Handshake(*relay)
hostinfo.logger(hm.l).WithField("relay", relay.String()).Info("Establish tunnel to relay target")
hm.f.Handshake(*relay)
continue
}
// Check the relay HostInfo to see if we already established a relay through it
if existingRelay, ok := relayHostInfo.relayState.QueryRelayForByIp(vpnIp); ok {
switch existingRelay.State {
case Established:
hostinfo.logger(c.l).WithField("relay", relay.String()).Info("Send handshake via relay")
c.f.SendVia(relayHostInfo, existingRelay, hostinfo.HandshakePacket[0], make([]byte, 12), make([]byte, mtu), false)
hostinfo.logger(hm.l).WithField("relay", relay.String()).Info("Send handshake via relay")
hm.f.SendVia(relayHostInfo, existingRelay, hostinfo.HandshakePacket[0], make([]byte, 12), make([]byte, mtu), false)
case Requested:
hostinfo.logger(c.l).WithField("relay", relay.String()).Info("Re-send CreateRelay request")
hostinfo.logger(hm.l).WithField("relay", relay.String()).Info("Re-send CreateRelay request")
// Re-send the CreateRelay request, in case the previous one was lost.
m := NebulaControl{
Type: NebulaControl_CreateRelayRequest,
InitiatorRelayIndex: existingRelay.LocalIndex,
RelayFromIp: uint32(c.lightHouse.myVpnIp),
RelayFromIp: uint32(hm.lightHouse.myVpnIp),
RelayToIp: uint32(vpnIp),
}
msg, err := m.Marshal()
if err != nil {
hostinfo.logger(c.l).
hostinfo.logger(hm.l).
WithError(err).
Error("Failed to marshal Control message to create relay")
} else {
// This must send over the hostinfo, not over hm.Hosts[ip]
c.f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
c.l.WithFields(logrus.Fields{
"relayFrom": c.lightHouse.myVpnIp,
hm.f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
hm.l.WithFields(logrus.Fields{
"relayFrom": hm.lightHouse.myVpnIp,
"relayTo": vpnIp,
"initiatorRelayIndex": existingRelay.LocalIndex,
"relay": *relay}).
Info("send CreateRelayRequest")
}
default:
hostinfo.logger(c.l).
hostinfo.logger(hm.l).
WithField("vpnIp", vpnIp).
WithField("state", existingRelay.State).
WithField("relay", relayHostInfo.vpnIp).
@@ -261,26 +317,26 @@ func (c *HandshakeManager) handleOutbound(vpnIp iputil.VpnIp, lighthouseTriggere
} else {
// No relays exist or requested yet.
if relayHostInfo.remote != nil {
idx, err := AddRelay(c.l, relayHostInfo, c.mainHostMap, vpnIp, nil, TerminalType, Requested)
idx, err := AddRelay(hm.l, relayHostInfo, hm.mainHostMap, vpnIp, nil, TerminalType, Requested)
if err != nil {
hostinfo.logger(c.l).WithField("relay", relay.String()).WithError(err).Info("Failed to add relay to hostmap")
hostinfo.logger(hm.l).WithField("relay", relay.String()).WithError(err).Info("Failed to add relay to hostmap")
}
m := NebulaControl{
Type: NebulaControl_CreateRelayRequest,
InitiatorRelayIndex: idx,
RelayFromIp: uint32(c.lightHouse.myVpnIp),
RelayFromIp: uint32(hm.lightHouse.myVpnIp),
RelayToIp: uint32(vpnIp),
}
msg, err := m.Marshal()
if err != nil {
hostinfo.logger(c.l).
hostinfo.logger(hm.l).
WithError(err).
Error("Failed to marshal Control message to create relay")
} else {
c.f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
c.l.WithFields(logrus.Fields{
"relayFrom": c.lightHouse.myVpnIp,
hm.f.SendMessageToHostInfo(header.Control, 0, relayHostInfo, msg, make([]byte, 12), make([]byte, mtu))
hm.l.WithFields(logrus.Fields{
"relayFrom": hm.lightHouse.myVpnIp,
"relayTo": vpnIp,
"initiatorRelayIndex": idx,
"relay": *relay}).
@@ -293,13 +349,13 @@ func (c *HandshakeManager) handleOutbound(vpnIp iputil.VpnIp, lighthouseTriggere
// If a lighthouse triggered this attempt then we are still in the timer wheel and do not need to re-add
if !lighthouseTriggered {
c.OutboundHandshakeTimer.Add(vpnIp, c.config.tryInterval*time.Duration(hostinfo.HandshakeCounter))
hm.OutboundHandshakeTimer.Add(vpnIp, hm.config.tryInterval*time.Duration(hh.counter))
}
}
// GetOrHandshake will try to find a hostinfo with a fully formed tunnel or start a new handshake if one is not present
// The 2nd argument will be true if the hostinfo is ready to transmit traffic
func (hm *HandshakeManager) GetOrHandshake(vpnIp iputil.VpnIp, cacheCb func(*HostInfo)) (*HostInfo, bool) {
func (hm *HandshakeManager) GetOrHandshake(vpnIp iputil.VpnIp, cacheCb func(*HandshakeHostInfo)) (*HostInfo, bool) {
// Check the main hostmap and maintain a read lock if our host is not there
hm.mainHostMap.RLock()
if h, ok := hm.mainHostMap.Hosts[vpnIp]; ok {
@@ -316,16 +372,16 @@ func (hm *HandshakeManager) GetOrHandshake(vpnIp iputil.VpnIp, cacheCb func(*Hos
}
// StartHandshake will ensure a handshake is currently being attempted for the provided vpn ip
func (hm *HandshakeManager) StartHandshake(vpnIp iputil.VpnIp, cacheCb func(*HostInfo)) *HostInfo {
func (hm *HandshakeManager) StartHandshake(vpnIp iputil.VpnIp, cacheCb func(*HandshakeHostInfo)) *HostInfo {
hm.Lock()
if hostinfo, ok := hm.vpnIps[vpnIp]; ok {
if hh, ok := hm.vpnIps[vpnIp]; ok {
// We are already trying to handshake with this vpn ip
if cacheCb != nil {
cacheCb(hostinfo)
cacheCb(hh)
}
hm.Unlock()
return hostinfo
return hh.hostinfo
}
hostinfo := &HostInfo{
@@ -338,12 +394,16 @@ func (hm *HandshakeManager) StartHandshake(vpnIp iputil.VpnIp, cacheCb func(*Hos
},
}
hm.vpnIps[vpnIp] = hostinfo
hh := &HandshakeHostInfo{
hostinfo: hostinfo,
startTime: time.Now(),
}
hm.vpnIps[vpnIp] = hh
hm.metricInitiated.Inc(1)
hm.OutboundHandshakeTimer.Add(vpnIp, hm.config.tryInterval)
if cacheCb != nil {
cacheCb(hostinfo)
cacheCb(hh)
}
// If this is a static host, we don't need to wait for the HostQueryReply
@@ -416,8 +476,8 @@ func (c *HandshakeManager) CheckAndComplete(hostinfo *HostInfo, handshakePacket
return existingIndex, ErrLocalIndexCollision
}
existingIndex, found = c.indexes[hostinfo.localIndexId]
if found && existingIndex != hostinfo {
existingPendingIndex, found := c.indexes[hostinfo.localIndexId]
if found && existingPendingIndex.hostinfo != hostinfo {
// We have a collision, but for a different hostinfo
return existingIndex, ErrLocalIndexCollision
}
@@ -461,7 +521,7 @@ func (hm *HandshakeManager) Complete(hostinfo *HostInfo, f *Interface) {
// allocateIndex generates a unique localIndexId for this HostInfo
// and adds it to the pendingHostMap. Will error if we are unable to generate
// a unique localIndexId
func (hm *HandshakeManager) allocateIndex(h *HostInfo) error {
func (hm *HandshakeManager) allocateIndex(hh *HandshakeHostInfo) error {
hm.mainHostMap.RLock()
defer hm.mainHostMap.RUnlock()
hm.Lock()
@@ -477,8 +537,8 @@ func (hm *HandshakeManager) allocateIndex(h *HostInfo) error {
_, inMain := hm.mainHostMap.Indexes[index]
if !inMain && !inPending {
h.localIndexId = index
hm.indexes[index] = h
hh.hostinfo.localIndexId = index
hm.indexes[index] = hh
return nil
}
}
@@ -495,12 +555,12 @@ func (c *HandshakeManager) DeleteHostInfo(hostinfo *HostInfo) {
func (c *HandshakeManager) unlockedDeleteHostInfo(hostinfo *HostInfo) {
delete(c.vpnIps, hostinfo.vpnIp)
if len(c.vpnIps) == 0 {
c.vpnIps = map[iputil.VpnIp]*HostInfo{}
c.vpnIps = map[iputil.VpnIp]*HandshakeHostInfo{}
}
delete(c.indexes, hostinfo.localIndexId)
if len(c.vpnIps) == 0 {
c.indexes = map[uint32]*HostInfo{}
c.indexes = map[uint32]*HandshakeHostInfo{}
}
if c.l.Level >= logrus.DebugLevel {
@@ -510,16 +570,33 @@ func (c *HandshakeManager) unlockedDeleteHostInfo(hostinfo *HostInfo) {
}
}
func (c *HandshakeManager) QueryVpnIp(vpnIp iputil.VpnIp) *HostInfo {
c.RLock()
defer c.RUnlock()
return c.vpnIps[vpnIp]
func (hm *HandshakeManager) QueryVpnIp(vpnIp iputil.VpnIp) *HostInfo {
hh := hm.queryVpnIp(vpnIp)
if hh != nil {
return hh.hostinfo
}
return nil
}
func (c *HandshakeManager) QueryIndex(index uint32) *HostInfo {
c.RLock()
defer c.RUnlock()
return c.indexes[index]
func (hm *HandshakeManager) queryVpnIp(vpnIp iputil.VpnIp) *HandshakeHostInfo {
hm.RLock()
defer hm.RUnlock()
return hm.vpnIps[vpnIp]
}
func (hm *HandshakeManager) QueryIndex(index uint32) *HostInfo {
hh := hm.queryIndex(index)
if hh != nil {
return hh.hostinfo
}
return nil
}
func (hm *HandshakeManager) queryIndex(index uint32) *HandshakeHostInfo {
hm.RLock()
defer hm.RUnlock()
return hm.indexes[index]
}
func (c *HandshakeManager) GetPreferredRanges() []*net.IPNet {
@@ -531,7 +608,7 @@ func (c *HandshakeManager) ForEachVpnIp(f controlEach) {
defer c.RUnlock()
for _, v := range c.vpnIps {
f(v)
f(v.hostinfo)
}
}
@@ -540,7 +617,7 @@ func (c *HandshakeManager) ForEachIndex(f controlEach) {
defer c.RUnlock()
for _, v := range c.indexes {
f(v)
f(v.hostinfo)
}
}