Limit how often a busy tunnel can requery the lighthouse (#940)

Co-authored-by: Wade Simmons <wadey@slack-corp.com>
This commit is contained in:
Nate Brown 2023-08-08 13:26:41 -05:00 committed by GitHub
parent 5671c6607c
commit 223cc6e660
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 62 additions and 4 deletions

View File

@ -5,6 +5,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"math"
"os" "os"
"os/signal" "os/signal"
"path/filepath" "path/filepath"
@ -236,6 +237,15 @@ func (c *C) GetInt(k string, d int) int {
return v return v
} }
// GetUint32 will get the uint32 for k or return the default d if not found or invalid
func (c *C) GetUint32(k string, d uint32) uint32 {
r := c.GetInt(k, int(d))
if uint64(r) > uint64(math.MaxUint32) {
return d
}
return uint32(r)
}
// GetBool will get the bool for k or return the default d if not found or invalid // GetBool will get the bool for k or return the default d if not found or invalid
func (c *C) GetBool(k string, d bool) bool { func (c *C) GetBool(k string, d bool) bool {
r := strings.ToLower(c.GetString(k, fmt.Sprintf("%v", d))) r := strings.ToLower(c.GetString(k, fmt.Sprintf("%v", d)))

View File

@ -17,8 +17,9 @@ import (
) )
// const ProbeLen = 100 // const ProbeLen = 100
const PromoteEvery = 1000 const defaultPromoteEvery = 1000 // Count of packets sent before we try moving a tunnel to a preferred underlay ip address
const ReQueryEvery = 5000 const defaultReQueryEvery = 5000 // Count of packets sent before re-querying a hostinfo to the lighthouse
const defaultReQueryWait = time.Minute // Minimum amount of seconds to wait before re-querying a hostinfo the lighthouse. Evaluated every ReQueryEvery
const MaxRemotes = 10 const MaxRemotes = 10
// MaxHostInfosPerVpnIp is the max number of hostinfos we will track for a given vpn ip // MaxHostInfosPerVpnIp is the max number of hostinfos we will track for a given vpn ip
@ -215,6 +216,10 @@ type HostInfo struct {
remoteCidr *cidr.Tree4 remoteCidr *cidr.Tree4
relayState RelayState relayState RelayState
// nextLHQuery is the earliest we can ask the lighthouse for new information.
// This is used to limit lighthouse re-queries in chatty clients
nextLHQuery atomic.Int64
// lastRebindCount is the other side of Interface.rebindCount, if these values don't match then we need to ask LH // lastRebindCount is the other side of Interface.rebindCount, if these values don't match then we need to ask LH
// for a punch from the remote end of this tunnel. The goal being to prime their conntrack for our traffic just like // for a punch from the remote end of this tunnel. The goal being to prime their conntrack for our traffic just like
// with a handshake // with a handshake
@ -535,7 +540,7 @@ func (hm *HostMap) ForEachIndex(f controlEach) {
// NOTE: It is an error to call this if you are a lighthouse since they should not roam clients! // NOTE: It is an error to call this if you are a lighthouse since they should not roam clients!
func (i *HostInfo) TryPromoteBest(preferredRanges []*net.IPNet, ifce *Interface) { func (i *HostInfo) TryPromoteBest(preferredRanges []*net.IPNet, ifce *Interface) {
c := i.promoteCounter.Add(1) c := i.promoteCounter.Add(1)
if c%PromoteEvery == 0 { if c%ifce.tryPromoteEvery.Load() == 0 {
// The lock here is currently protecting i.remote access // The lock here is currently protecting i.remote access
i.RLock() i.RLock()
remote := i.remote remote := i.remote
@ -563,7 +568,13 @@ func (i *HostInfo) TryPromoteBest(preferredRanges []*net.IPNet, ifce *Interface)
} }
// Re query our lighthouses for new remotes occasionally // Re query our lighthouses for new remotes occasionally
if c%ReQueryEvery == 0 && ifce.lightHouse != nil { if c%ifce.reQueryEvery.Load() == 0 && ifce.lightHouse != nil {
now := time.Now().UnixNano()
if now < i.nextLHQuery.Load() {
return
}
i.nextLHQuery.Store(now + ifce.reQueryWait.Load())
ifce.lightHouse.QueryServer(i.vpnIp, ifce) ifce.lightHouse.QueryServer(i.vpnIp, ifce)
} }
} }

View File

@ -46,6 +46,10 @@ type InterfaceConfig struct {
relayManager *relayManager relayManager *relayManager
punchy *Punchy punchy *Punchy
tryPromoteEvery uint32
reQueryEvery uint32
reQueryWait time.Duration
ConntrackCacheTimeout time.Duration ConntrackCacheTimeout time.Duration
l *logrus.Logger l *logrus.Logger
} }
@ -72,6 +76,10 @@ type Interface struct {
closed atomic.Bool closed atomic.Bool
relayManager *relayManager relayManager *relayManager
tryPromoteEvery atomic.Uint32
reQueryEvery atomic.Uint32
reQueryWait atomic.Int64
sendRecvErrorConfig sendRecvErrorConfig sendRecvErrorConfig sendRecvErrorConfig
// rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse // rebindCount is used to decide if an active tunnel should trigger a punch notification through a lighthouse
@ -186,6 +194,10 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
l: c.l, l: c.l,
} }
ifce.tryPromoteEvery.Store(c.tryPromoteEvery)
ifce.reQueryEvery.Store(c.reQueryEvery)
ifce.reQueryWait.Store(int64(c.reQueryWait))
ifce.certState.Store(c.certState) ifce.certState.Store(c.certState)
ifce.connectionManager = newConnectionManager(ctx, c.l, ifce, c.checkInterval, c.pendingDeletionInterval, c.punchy) ifce.connectionManager = newConnectionManager(ctx, c.l, ifce, c.checkInterval, c.pendingDeletionInterval, c.punchy)
@ -287,6 +299,7 @@ func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
c.RegisterReloadCallback(f.reloadCertKey) c.RegisterReloadCallback(f.reloadCertKey)
c.RegisterReloadCallback(f.reloadFirewall) c.RegisterReloadCallback(f.reloadFirewall)
c.RegisterReloadCallback(f.reloadSendRecvError) c.RegisterReloadCallback(f.reloadSendRecvError)
c.RegisterReloadCallback(f.reloadMisc)
for _, udpConn := range f.writers { for _, udpConn := range f.writers {
c.RegisterReloadCallback(udpConn.ReloadConfig) c.RegisterReloadCallback(udpConn.ReloadConfig)
} }
@ -389,6 +402,26 @@ func (f *Interface) reloadSendRecvError(c *config.C) {
} }
} }
func (f *Interface) reloadMisc(c *config.C) {
if c.HasChanged("counters.try_promote") {
n := c.GetUint32("counters.try_promote", defaultPromoteEvery)
f.tryPromoteEvery.Store(n)
f.l.Info("counters.try_promote has changed")
}
if c.HasChanged("counters.requery_every_packets") {
n := c.GetUint32("counters.requery_every_packets", defaultReQueryEvery)
f.reQueryEvery.Store(n)
f.l.Info("counters.requery_every_packets has changed")
}
if c.HasChanged("timers.requery_wait_duration") {
n := c.GetDuration("timers.requery_wait_duration", defaultReQueryWait)
f.reQueryWait.Store(int64(n))
f.l.Info("timers.requery_wait_duration has changed")
}
}
func (f *Interface) emitStats(ctx context.Context, i time.Duration) { func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
ticker := time.NewTicker(i) ticker := time.NewTicker(i)
defer ticker.Stop() defer ticker.Stop()

View File

@ -261,6 +261,7 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
checkInterval := c.GetInt("timers.connection_alive_interval", 5) checkInterval := c.GetInt("timers.connection_alive_interval", 5)
pendingDeletionInterval := c.GetInt("timers.pending_deletion_interval", 10) pendingDeletionInterval := c.GetInt("timers.pending_deletion_interval", 10)
ifConfig := &InterfaceConfig{ ifConfig := &InterfaceConfig{
HostMap: hostMap, HostMap: hostMap,
Inside: tun, Inside: tun,
@ -273,6 +274,9 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
lightHouse: lightHouse, lightHouse: lightHouse,
checkInterval: time.Second * time.Duration(checkInterval), checkInterval: time.Second * time.Duration(checkInterval),
pendingDeletionInterval: time.Second * time.Duration(pendingDeletionInterval), pendingDeletionInterval: time.Second * time.Duration(pendingDeletionInterval),
tryPromoteEvery: c.GetUint32("counters.try_promote", defaultPromoteEvery),
reQueryEvery: c.GetUint32("counters.requery_every_packets", defaultReQueryEvery),
reQueryWait: c.GetDuration("timers.requery_wait_duration", defaultReQueryWait),
DropLocalBroadcast: c.GetBool("tun.drop_local_broadcast", false), DropLocalBroadcast: c.GetBool("tun.drop_local_broadcast", false),
DropMulticast: c.GetBool("tun.drop_multicast", false), DropMulticast: c.GetBool("tun.drop_multicast", false),
routines: routines, routines: routines,