Compare commits

..

6 Commits

Author SHA1 Message Date
Wade Simmons
03ab9a1208 update synctrace 2025-06-12 15:22:54 -04:00
Wade Simmons
4258c1388c Merge remote-tracking branch 'origin/master' into synctrace 2025-06-12 13:50:39 -04:00
Wade Simmons
2a2b6424ed add new locks added to master 2025-04-02 11:57:02 -04:00
Wade Simmons
f896e2a863 Merge remote-tracking branch 'origin/master' into synctrace 2025-04-02 11:00:53 -04:00
Wade Simmons
4db6049684 update 2025-04-02 10:53:51 -04:00
Wade Simmons
8f1dc12618 synctrace WIP 2024-05-29 12:54:38 -04:00
41 changed files with 449 additions and 989 deletions

View File

@@ -17,5 +17,5 @@ contact_links:
about: 'The documentation is the best place to start if you are new to Nebula.' about: 'The documentation is the best place to start if you are new to Nebula.'
- name: 💁 Support/Chat - name: 💁 Support/Chat
url: https://join.slack.com/t/nebulaoss/shared_invite/zt-39pk4xopc-CUKlGcb5Z39dQ0cK1v7ehA url: https://join.slack.com/t/nebulaoss/shared_invite/zt-2xqe6e7vn-k_KGi8s13nsr7cvHVvHvuQ
about: 'For faster support, join us on Slack for assistance!' about: 'For faster support, join us on Slack for assistance!'

View File

@@ -52,12 +52,4 @@ jobs:
working-directory: ./.github/workflows/smoke working-directory: ./.github/workflows/smoke
run: NAME="smoke-p256" ./smoke.sh run: NAME="smoke-p256" ./smoke.sh
- name: setup docker image for fips140
working-directory: ./.github/workflows/smoke
run: NAME="smoke-fips140" CURVE=P256 GOFIPS140=v1.0.0 LDFLAGS=-checklinkname=0 ./build.sh
- name: run smoke-fips140
working-directory: ./.github/workflows/smoke
run: NAME="smoke-fips140" ./smoke.sh
timeout-minutes: 10 timeout-minutes: 10

View File

@@ -32,9 +32,9 @@ jobs:
run: make vet run: make vet
- name: golangci-lint - name: golangci-lint
uses: golangci/golangci-lint-action@v8 uses: golangci/golangci-lint-action@v7
with: with:
version: v2.1 version: v2.0
- name: Test - name: Test
run: make test run: make test
@@ -115,9 +115,9 @@ jobs:
run: make vet run: make vet
- name: golangci-lint - name: golangci-lint
uses: golangci/golangci-lint-action@v8 uses: golangci/golangci-lint-action@v7
with: with:
version: v2.1 version: v2.0
- name: Test - name: Test
run: make test run: make test

View File

@@ -61,7 +61,7 @@ ALL = $(ALL_LINUX) \
windows-arm64 windows-arm64
e2e: e2e:
$(TEST_ENV) go test -tags=e2e_testing -count=1 $(TEST_FLAGS) ./e2e $(TEST_ENV) go test -tags=synctrace,e2e_testing -count=1 $(TEST_FLAGS) ./e2e
e2ev: TEST_FLAGS += -v e2ev: TEST_FLAGS += -v
e2ev: e2e e2ev: e2e
@@ -121,12 +121,12 @@ bin-pkcs11: CGO_ENABLED = 1
bin-pkcs11: bin bin-pkcs11: bin
bin: bin:
$(GOENV) go build $(BUILD_ARGS) -ldflags "$(LDFLAGS)" -o ./nebula${NEBULA_CMD_SUFFIX} ${NEBULA_CMD_PATH} go build $(BUILD_ARGS) -ldflags "$(LDFLAGS)" -o ./nebula${NEBULA_CMD_SUFFIX} ${NEBULA_CMD_PATH}
$(GOENV) go build $(BUILD_ARGS) -ldflags "$(LDFLAGS)" -o ./nebula-cert${NEBULA_CMD_SUFFIX} ./cmd/nebula-cert go build $(BUILD_ARGS) -ldflags "$(LDFLAGS)" -o ./nebula-cert${NEBULA_CMD_SUFFIX} ./cmd/nebula-cert
install: install:
$(GOENV) go install $(BUILD_ARGS) -ldflags "$(LDFLAGS)" ${NEBULA_CMD_PATH} go install $(BUILD_ARGS) -ldflags "$(LDFLAGS)" ${NEBULA_CMD_PATH}
$(GOENV) go install $(BUILD_ARGS) -ldflags "$(LDFLAGS)" ./cmd/nebula-cert go install $(BUILD_ARGS) -ldflags "$(LDFLAGS)" ./cmd/nebula-cert
build/linux-arm-%: GOENV += GOARM=$(word 3, $(subst -, ,$*)) build/linux-arm-%: GOENV += GOARM=$(word 3, $(subst -, ,$*))
build/linux-mips-%: GOENV += GOMIPS=$(word 3, $(subst -, ,$*)) build/linux-mips-%: GOENV += GOMIPS=$(word 3, $(subst -, ,$*))
@@ -215,14 +215,7 @@ ifeq ($(words $(MAKECMDGOALS)),1)
@$(MAKE) service ${.DEFAULT_GOAL} --no-print-directory @$(MAKE) service ${.DEFAULT_GOAL} --no-print-directory
endif endif
fips140: bin-docker: BUILD_ARGS = -tags=synctrace
@echo > $(NULL_FILE)
$(eval GOENV += GOFIPS140=v1.0.0)
$(eval LDFLAGS += -checklinkname=0)
ifeq ($(words $(MAKECMDGOALS)),1)
@$(MAKE) fips140 ${.DEFAULT_GOAL} --no-print-directory
endif
bin-docker: bin build/linux-amd64/nebula build/linux-amd64/nebula-cert bin-docker: bin build/linux-amd64/nebula build/linux-amd64/nebula-cert
smoke-docker: bin-docker smoke-docker: bin-docker
@@ -244,5 +237,5 @@ smoke-vagrant/%: bin-docker build/%/nebula
cd .github/workflows/smoke/ && ./smoke-vagrant.sh $* cd .github/workflows/smoke/ && ./smoke-vagrant.sh $*
.FORCE: .FORCE:
.PHONY: bench bench-cpu bench-cpu-long bin build-test-mobile e2e e2ev e2evv e2evvv e2evvvv fips140 proto release service smoke-docker smoke-docker-race test test-cov-html smoke-vagrant/% .PHONY: bench bench-cpu bench-cpu-long bin build-test-mobile e2e e2ev e2evv e2evvv e2evvvv proto release service smoke-docker smoke-docker-race test test-cov-html smoke-vagrant/%
.DEFAULT_GOAL := bin .DEFAULT_GOAL := bin

View File

@@ -12,7 +12,7 @@ Further documentation can be found [here](https://nebula.defined.net/docs/).
You can read more about Nebula [here](https://medium.com/p/884110a5579). You can read more about Nebula [here](https://medium.com/p/884110a5579).
You can also join the NebulaOSS Slack group [here](https://join.slack.com/t/nebulaoss/shared_invite/zt-39pk4xopc-CUKlGcb5Z39dQ0cK1v7ehA). You can also join the NebulaOSS Slack group [here](https://join.slack.com/t/nebulaoss/shared_invite/zt-2xqe6e7vn-k_KGi8s13nsr7cvHVvHvuQ).
## Supported Platforms ## Supported Platforms
@@ -143,24 +143,17 @@ To build nebula for a specific platform (ex, Windows):
See the [Makefile](Makefile) for more details on build targets See the [Makefile](Makefile) for more details on build targets
## Curve P256, BoringCrypto and FIPS 140-3 mode ## Curve P256 and BoringCrypto
The default curve used for cryptographic handshakes and signatures is Curve25519. This is the recommended setting for most users. If your deployment has certain compliance requirements, you have the option of creating your CA using `nebula-cert ca -curve P256` to use NIST Curve P256. The CA will then sign certificates using ECDSA P256, and any hosts using these certificates will use P256 for ECDH handshakes. The default curve used for cryptographic handshakes and signatures is Curve25519. This is the recommended setting for most users. If your deployment has certain compliance requirements, you have the option of creating your CA using `nebula-cert ca -curve P256` to use NIST Curve P256. The CA will then sign certificates using ECDSA P256, and any hosts using these certificates will use P256 for ECDH handshakes.
Nebula can be built using the [BoringCrypto GOEXPERIMENT](https://github.com/golang/go/blob/go1.20/src/crypto/internal/boring/README.md) by running either of the following make targets: In addition, Nebula can be built using the [BoringCrypto GOEXPERIMENT](https://github.com/golang/go/blob/go1.20/src/crypto/internal/boring/README.md) by running either of the following make targets:
```sh ```sh
make bin-boringcrypto make bin-boringcrypto
make release-boringcrypto make release-boringcrypto
``` ```
Nebula can also be built using the [FIPS 140-3](https://go.dev/doc/security/fips140) mode of Go by running either of the following make targets:
```sh
make fips140
make fips140 release
```
This is not the recommended default deployment, but may be useful based on your compliance requirements. This is not the recommended default deployment, but may be useful based on your compliance requirements.
## Credits ## Credits

View File

@@ -11,12 +11,12 @@ import (
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"sync"
"syscall" "syscall"
"time" "time"
"dario.cat/mergo" "dario.cat/mergo"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/wadey/synctrace"
"gopkg.in/yaml.v3" "gopkg.in/yaml.v3"
) )
@@ -27,13 +27,14 @@ type C struct {
oldSettings map[string]any oldSettings map[string]any
callbacks []func(*C) callbacks []func(*C)
l *logrus.Logger l *logrus.Logger
reloadLock sync.Mutex reloadLock synctrace.Mutex
} }
func NewC(l *logrus.Logger) *C { func NewC(l *logrus.Logger) *C {
return &C{ return &C{
Settings: make(map[string]any), Settings: make(map[string]any),
l: l, l: l,
reloadLock: synctrace.NewMutex("config-reload"),
} }
} }

View File

@@ -4,17 +4,14 @@ import (
"bytes" "bytes"
"context" "context"
"encoding/binary" "encoding/binary"
"fmt"
"net/netip" "net/netip"
"sync"
"sync/atomic"
"time" "time"
"github.com/rcrowley/go-metrics" "github.com/rcrowley/go-metrics"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/slackhq/nebula/cert" "github.com/slackhq/nebula/cert"
"github.com/slackhq/nebula/config"
"github.com/slackhq/nebula/header" "github.com/slackhq/nebula/header"
"github.com/wadey/synctrace"
) )
type trafficDecision int type trafficDecision int
@@ -30,124 +27,130 @@ const (
) )
type connectionManager struct { type connectionManager struct {
in map[uint32]struct{}
inLock synctrace.RWMutex
out map[uint32]struct{}
outLock synctrace.RWMutex
// relayUsed holds which relay localIndexs are in use // relayUsed holds which relay localIndexs are in use
relayUsed map[uint32]struct{} relayUsed map[uint32]struct{}
relayUsedLock *sync.RWMutex relayUsedLock synctrace.RWMutex
hostMap *HostMap hostMap *HostMap
trafficTimer *LockingTimerWheel[uint32] trafficTimer *LockingTimerWheel[uint32]
intf *Interface intf *Interface
punchy *Punchy pendingDeletion map[uint32]struct{}
punchy *Punchy
// Configuration settings
checkInterval time.Duration checkInterval time.Duration
pendingDeletionInterval time.Duration pendingDeletionInterval time.Duration
inactivityTimeout atomic.Int64 metricsTxPunchy metrics.Counter
dropInactive atomic.Bool
metricsTxPunchy metrics.Counter
l *logrus.Logger l *logrus.Logger
} }
func newConnectionManagerFromConfig(l *logrus.Logger, c *config.C, hm *HostMap, p *Punchy) *connectionManager { func newConnectionManager(ctx context.Context, l *logrus.Logger, intf *Interface, checkInterval, pendingDeletionInterval time.Duration, punchy *Punchy) *connectionManager {
cm := &connectionManager{ var max time.Duration
hostMap: hm, if checkInterval < pendingDeletionInterval {
l: l, max = pendingDeletionInterval
punchy: p, } else {
relayUsed: make(map[uint32]struct{}), max = checkInterval
relayUsedLock: &sync.RWMutex{},
metricsTxPunchy: metrics.GetOrRegisterCounter("messages.tx.punchy", nil),
} }
cm.reload(c, true) nc := &connectionManager{
c.RegisterReloadCallback(func(c *config.C) { hostMap: intf.hostMap,
cm.reload(c, false) in: make(map[uint32]struct{}),
}) inLock: synctrace.NewRWMutex("connection-manager-in"),
out: make(map[uint32]struct{}),
return cm outLock: synctrace.NewRWMutex("connection-manager-out"),
} relayUsed: make(map[uint32]struct{}),
relayUsedLock: synctrace.NewRWMutex("connection-manager-relay-used"),
func (cm *connectionManager) reload(c *config.C, initial bool) { trafficTimer: NewLockingTimerWheel[uint32]("traffic-timer", time.Millisecond*500, max),
if initial { intf: intf,
cm.checkInterval = time.Duration(c.GetInt("timers.connection_alive_interval", 5)) * time.Second pendingDeletion: make(map[uint32]struct{}),
cm.pendingDeletionInterval = time.Duration(c.GetInt("timers.pending_deletion_interval", 10)) * time.Second checkInterval: checkInterval,
pendingDeletionInterval: pendingDeletionInterval,
// We want at least a minimum resolution of 500ms per tick so that we can hit these intervals punchy: punchy,
// pretty close to their configured duration. metricsTxPunchy: metrics.GetOrRegisterCounter("messages.tx.punchy", nil),
// The inactivity duration is checked each time a hostinfo ticks through so we don't need the wheel to contain it. l: l,
minDuration := min(time.Millisecond*500, cm.checkInterval, cm.pendingDeletionInterval)
maxDuration := max(cm.checkInterval, cm.pendingDeletionInterval)
cm.trafficTimer = NewLockingTimerWheel[uint32](minDuration, maxDuration)
} }
if initial || c.HasChanged("tunnels.inactivity_timeout") { nc.Start(ctx)
old := cm.getInactivityTimeout() return nc
cm.inactivityTimeout.Store((int64)(c.GetDuration("tunnels.inactivity_timeout", 10*time.Minute)))
if !initial {
cm.l.WithField("oldDuration", old).
WithField("newDuration", cm.getInactivityTimeout()).
Info("Inactivity timeout has changed")
}
}
if initial || c.HasChanged("tunnels.drop_inactive") {
old := cm.dropInactive.Load()
cm.dropInactive.Store(c.GetBool("tunnels.drop_inactive", false))
if !initial {
cm.l.WithField("oldBool", old).
WithField("newBool", cm.dropInactive.Load()).
Info("Drop inactive setting has changed")
}
}
} }
func (cm *connectionManager) getInactivityTimeout() time.Duration { func (n *connectionManager) In(localIndex uint32) {
return (time.Duration)(cm.inactivityTimeout.Load()) n.inLock.RLock()
}
func (cm *connectionManager) In(h *HostInfo) {
h.in.Store(true)
}
func (cm *connectionManager) Out(h *HostInfo) {
h.out.Store(true)
}
func (cm *connectionManager) RelayUsed(localIndex uint32) {
cm.relayUsedLock.RLock()
// If this already exists, return // If this already exists, return
if _, ok := cm.relayUsed[localIndex]; ok { if _, ok := n.in[localIndex]; ok {
cm.relayUsedLock.RUnlock() n.inLock.RUnlock()
return return
} }
cm.relayUsedLock.RUnlock() n.inLock.RUnlock()
cm.relayUsedLock.Lock() n.inLock.Lock()
cm.relayUsed[localIndex] = struct{}{} n.in[localIndex] = struct{}{}
cm.relayUsedLock.Unlock() n.inLock.Unlock()
}
func (n *connectionManager) Out(localIndex uint32) {
n.outLock.RLock()
// If this already exists, return
if _, ok := n.out[localIndex]; ok {
n.outLock.RUnlock()
return
}
n.outLock.RUnlock()
n.outLock.Lock()
n.out[localIndex] = struct{}{}
n.outLock.Unlock()
}
func (n *connectionManager) RelayUsed(localIndex uint32) {
n.relayUsedLock.RLock()
// If this already exists, return
if _, ok := n.relayUsed[localIndex]; ok {
n.relayUsedLock.RUnlock()
return
}
n.relayUsedLock.RUnlock()
n.relayUsedLock.Lock()
n.relayUsed[localIndex] = struct{}{}
n.relayUsedLock.Unlock()
} }
// getAndResetTrafficCheck returns if there was any inbound or outbound traffic within the last tick and // getAndResetTrafficCheck returns if there was any inbound or outbound traffic within the last tick and
// resets the state for this local index // resets the state for this local index
func (cm *connectionManager) getAndResetTrafficCheck(h *HostInfo, now time.Time) (bool, bool) { func (n *connectionManager) getAndResetTrafficCheck(localIndex uint32) (bool, bool) {
in := h.in.Swap(false) n.inLock.Lock()
out := h.out.Swap(false) n.outLock.Lock()
if in || out { _, in := n.in[localIndex]
h.lastUsed = now _, out := n.out[localIndex]
} delete(n.in, localIndex)
delete(n.out, localIndex)
n.inLock.Unlock()
n.outLock.Unlock()
return in, out return in, out
} }
// AddTrafficWatch must be called for every new HostInfo. func (n *connectionManager) AddTrafficWatch(localIndex uint32) {
// We will continue to monitor the HostInfo until the tunnel is dropped. // Use a write lock directly because it should be incredibly rare that we are ever already tracking this index
func (cm *connectionManager) AddTrafficWatch(h *HostInfo) { n.outLock.Lock()
if h.out.Swap(true) == false { if _, ok := n.out[localIndex]; ok {
cm.trafficTimer.Add(h.localIndexId, cm.checkInterval) n.outLock.Unlock()
return
} }
n.out[localIndex] = struct{}{}
n.trafficTimer.Add(localIndex, n.checkInterval)
n.outLock.Unlock()
} }
func (cm *connectionManager) Start(ctx context.Context) { func (n *connectionManager) Start(ctx context.Context) {
clockSource := time.NewTicker(cm.trafficTimer.t.tickDuration) go n.Run(ctx)
}
func (n *connectionManager) Run(ctx context.Context) {
//TODO: this tick should be based on the min wheel tick? Check firewall
clockSource := time.NewTicker(500 * time.Millisecond)
defer clockSource.Stop() defer clockSource.Stop()
p := []byte("") p := []byte("")
@@ -160,61 +163,61 @@ func (cm *connectionManager) Start(ctx context.Context) {
return return
case now := <-clockSource.C: case now := <-clockSource.C:
cm.trafficTimer.Advance(now) n.trafficTimer.Advance(now)
for { for {
localIndex, has := cm.trafficTimer.Purge() localIndex, has := n.trafficTimer.Purge()
if !has { if !has {
break break
} }
cm.doTrafficCheck(localIndex, p, nb, out, now) n.doTrafficCheck(localIndex, p, nb, out, now)
} }
} }
} }
} }
func (cm *connectionManager) doTrafficCheck(localIndex uint32, p, nb, out []byte, now time.Time) { func (n *connectionManager) doTrafficCheck(localIndex uint32, p, nb, out []byte, now time.Time) {
decision, hostinfo, primary := cm.makeTrafficDecision(localIndex, now) decision, hostinfo, primary := n.makeTrafficDecision(localIndex, now)
switch decision { switch decision {
case deleteTunnel: case deleteTunnel:
if cm.hostMap.DeleteHostInfo(hostinfo) { if n.hostMap.DeleteHostInfo(hostinfo) {
// Only clearing the lighthouse cache if this is the last hostinfo for this vpn ip in the hostmap // Only clearing the lighthouse cache if this is the last hostinfo for this vpn ip in the hostmap
cm.intf.lightHouse.DeleteVpnAddrs(hostinfo.vpnAddrs) n.intf.lightHouse.DeleteVpnAddrs(hostinfo.vpnAddrs)
} }
case closeTunnel: case closeTunnel:
cm.intf.sendCloseTunnel(hostinfo) n.intf.sendCloseTunnel(hostinfo)
cm.intf.closeTunnel(hostinfo) n.intf.closeTunnel(hostinfo)
case swapPrimary: case swapPrimary:
cm.swapPrimary(hostinfo, primary) n.swapPrimary(hostinfo, primary)
case migrateRelays: case migrateRelays:
cm.migrateRelayUsed(hostinfo, primary) n.migrateRelayUsed(hostinfo, primary)
case tryRehandshake: case tryRehandshake:
cm.tryRehandshake(hostinfo) n.tryRehandshake(hostinfo)
case sendTestPacket: case sendTestPacket:
cm.intf.SendMessageToHostInfo(header.Test, header.TestRequest, hostinfo, p, nb, out) n.intf.SendMessageToHostInfo(header.Test, header.TestRequest, hostinfo, p, nb, out)
} }
cm.resetRelayTrafficCheck(hostinfo) n.resetRelayTrafficCheck(hostinfo)
} }
func (cm *connectionManager) resetRelayTrafficCheck(hostinfo *HostInfo) { func (n *connectionManager) resetRelayTrafficCheck(hostinfo *HostInfo) {
if hostinfo != nil { if hostinfo != nil {
cm.relayUsedLock.Lock() n.relayUsedLock.Lock()
defer cm.relayUsedLock.Unlock() defer n.relayUsedLock.Unlock()
// No need to migrate any relays, delete usage info now. // No need to migrate any relays, delete usage info now.
for _, idx := range hostinfo.relayState.CopyRelayForIdxs() { for _, idx := range hostinfo.relayState.CopyRelayForIdxs() {
delete(cm.relayUsed, idx) delete(n.relayUsed, idx)
} }
} }
} }
func (cm *connectionManager) migrateRelayUsed(oldhostinfo, newhostinfo *HostInfo) { func (n *connectionManager) migrateRelayUsed(oldhostinfo, newhostinfo *HostInfo) {
relayFor := oldhostinfo.relayState.CopyAllRelayFor() relayFor := oldhostinfo.relayState.CopyAllRelayFor()
for _, r := range relayFor { for _, r := range relayFor {
@@ -224,51 +227,46 @@ func (cm *connectionManager) migrateRelayUsed(oldhostinfo, newhostinfo *HostInfo
var relayFrom netip.Addr var relayFrom netip.Addr
var relayTo netip.Addr var relayTo netip.Addr
switch { switch {
case ok: case ok && existing.State == Established:
switch existing.State { // This relay already exists in newhostinfo, then do nothing.
case Established, PeerRequested, Disestablished: continue
// This relay already exists in newhostinfo, then do nothing. case ok && existing.State == Requested:
continue // The relay exists in a Requested state; re-send the request
case Requested: index = existing.LocalIndex
// The relay exists in a Requested state; re-send the request switch r.Type {
index = existing.LocalIndex case TerminalType:
switch r.Type { relayFrom = n.intf.myVpnAddrs[0]
case TerminalType: relayTo = existing.PeerAddr
relayFrom = cm.intf.myVpnAddrs[0] case ForwardingType:
relayTo = existing.PeerAddr relayFrom = existing.PeerAddr
case ForwardingType: relayTo = newhostinfo.vpnAddrs[0]
relayFrom = existing.PeerAddr default:
relayTo = newhostinfo.vpnAddrs[0] // should never happen
default:
// should never happen
panic(fmt.Sprintf("Migrating unknown relay type: %v", r.Type))
}
} }
case !ok: case !ok:
cm.relayUsedLock.RLock() n.relayUsedLock.RLock()
if _, relayUsed := cm.relayUsed[r.LocalIndex]; !relayUsed { if _, relayUsed := n.relayUsed[r.LocalIndex]; !relayUsed {
// The relay hasn't been used; don't migrate it. // The relay hasn't been used; don't migrate it.
cm.relayUsedLock.RUnlock() n.relayUsedLock.RUnlock()
continue continue
} }
cm.relayUsedLock.RUnlock() n.relayUsedLock.RUnlock()
// The relay doesn't exist at all; create some relay state and send the request. // The relay doesn't exist at all; create some relay state and send the request.
var err error var err error
index, err = AddRelay(cm.l, newhostinfo, cm.hostMap, r.PeerAddr, nil, r.Type, Requested) index, err = AddRelay(n.l, newhostinfo, n.hostMap, r.PeerAddr, nil, r.Type, Requested)
if err != nil { if err != nil {
cm.l.WithError(err).Error("failed to migrate relay to new hostinfo") n.l.WithError(err).Error("failed to migrate relay to new hostinfo")
continue continue
} }
switch r.Type { switch r.Type {
case TerminalType: case TerminalType:
relayFrom = cm.intf.myVpnAddrs[0] relayFrom = n.intf.myVpnAddrs[0]
relayTo = r.PeerAddr relayTo = r.PeerAddr
case ForwardingType: case ForwardingType:
relayFrom = r.PeerAddr relayFrom = r.PeerAddr
relayTo = newhostinfo.vpnAddrs[0] relayTo = newhostinfo.vpnAddrs[0]
default: default:
// should never happen // should never happen
panic(fmt.Sprintf("Migrating unknown relay type: %v", r.Type))
} }
} }
@@ -281,12 +279,12 @@ func (cm *connectionManager) migrateRelayUsed(oldhostinfo, newhostinfo *HostInfo
switch newhostinfo.GetCert().Certificate.Version() { switch newhostinfo.GetCert().Certificate.Version() {
case cert.Version1: case cert.Version1:
if !relayFrom.Is4() { if !relayFrom.Is4() {
cm.l.Error("can not migrate v1 relay with a v6 network because the relay is not running a current nebula version") n.l.Error("can not migrate v1 relay with a v6 network because the relay is not running a current nebula version")
continue continue
} }
if !relayTo.Is4() { if !relayTo.Is4() {
cm.l.Error("can not migrate v1 relay with a v6 remote network because the relay is not running a current nebula version") n.l.Error("can not migrate v1 relay with a v6 remote network because the relay is not running a current nebula version")
continue continue
} }
@@ -298,16 +296,16 @@ func (cm *connectionManager) migrateRelayUsed(oldhostinfo, newhostinfo *HostInfo
req.RelayFromAddr = netAddrToProtoAddr(relayFrom) req.RelayFromAddr = netAddrToProtoAddr(relayFrom)
req.RelayToAddr = netAddrToProtoAddr(relayTo) req.RelayToAddr = netAddrToProtoAddr(relayTo)
default: default:
newhostinfo.logger(cm.l).Error("Unknown certificate version found while attempting to migrate relay") newhostinfo.logger(n.l).Error("Unknown certificate version found while attempting to migrate relay")
continue continue
} }
msg, err := req.Marshal() msg, err := req.Marshal()
if err != nil { if err != nil {
cm.l.WithError(err).Error("failed to marshal Control message to migrate relay") n.l.WithError(err).Error("failed to marshal Control message to migrate relay")
} else { } else {
cm.intf.SendMessageToHostInfo(header.Control, 0, newhostinfo, msg, make([]byte, 12), make([]byte, mtu)) n.intf.SendMessageToHostInfo(header.Control, 0, newhostinfo, msg, make([]byte, 12), make([]byte, mtu))
cm.l.WithFields(logrus.Fields{ n.l.WithFields(logrus.Fields{
"relayFrom": req.RelayFromAddr, "relayFrom": req.RelayFromAddr,
"relayTo": req.RelayToAddr, "relayTo": req.RelayToAddr,
"initiatorRelayIndex": req.InitiatorRelayIndex, "initiatorRelayIndex": req.InitiatorRelayIndex,
@@ -318,45 +316,46 @@ func (cm *connectionManager) migrateRelayUsed(oldhostinfo, newhostinfo *HostInfo
} }
} }
func (cm *connectionManager) makeTrafficDecision(localIndex uint32, now time.Time) (trafficDecision, *HostInfo, *HostInfo) { func (n *connectionManager) makeTrafficDecision(localIndex uint32, now time.Time) (trafficDecision, *HostInfo, *HostInfo) {
// Read lock the main hostmap to order decisions based on tunnels being the primary tunnel n.hostMap.RLock()
cm.hostMap.RLock() defer n.hostMap.RUnlock()
defer cm.hostMap.RUnlock()
hostinfo := cm.hostMap.Indexes[localIndex] hostinfo := n.hostMap.Indexes[localIndex]
if hostinfo == nil { if hostinfo == nil {
cm.l.WithField("localIndex", localIndex).Debugln("Not found in hostmap") n.l.WithField("localIndex", localIndex).Debugf("Not found in hostmap")
delete(n.pendingDeletion, localIndex)
return doNothing, nil, nil return doNothing, nil, nil
} }
if cm.isInvalidCertificate(now, hostinfo) { if n.isInvalidCertificate(now, hostinfo) {
delete(n.pendingDeletion, hostinfo.localIndexId)
return closeTunnel, hostinfo, nil return closeTunnel, hostinfo, nil
} }
primary := cm.hostMap.Hosts[hostinfo.vpnAddrs[0]] primary := n.hostMap.Hosts[hostinfo.vpnAddrs[0]]
mainHostInfo := true mainHostInfo := true
if primary != nil && primary != hostinfo { if primary != nil && primary != hostinfo {
mainHostInfo = false mainHostInfo = false
} }
// Check for traffic on this hostinfo // Check for traffic on this hostinfo
inTraffic, outTraffic := cm.getAndResetTrafficCheck(hostinfo, now) inTraffic, outTraffic := n.getAndResetTrafficCheck(localIndex)
// A hostinfo is determined alive if there is incoming traffic // A hostinfo is determined alive if there is incoming traffic
if inTraffic { if inTraffic {
decision := doNothing decision := doNothing
if cm.l.Level >= logrus.DebugLevel { if n.l.Level >= logrus.DebugLevel {
hostinfo.logger(cm.l). hostinfo.logger(n.l).
WithField("tunnelCheck", m{"state": "alive", "method": "passive"}). WithField("tunnelCheck", m{"state": "alive", "method": "passive"}).
Debug("Tunnel status") Debug("Tunnel status")
} }
hostinfo.pendingDeletion.Store(false) delete(n.pendingDeletion, hostinfo.localIndexId)
if mainHostInfo { if mainHostInfo {
decision = tryRehandshake decision = tryRehandshake
} else { } else {
if cm.shouldSwapPrimary(hostinfo, primary) { if n.shouldSwapPrimary(hostinfo, primary) {
decision = swapPrimary decision = swapPrimary
} else { } else {
// migrate the relays to the primary, if in use. // migrate the relays to the primary, if in use.
@@ -364,55 +363,46 @@ func (cm *connectionManager) makeTrafficDecision(localIndex uint32, now time.Tim
} }
} }
cm.trafficTimer.Add(hostinfo.localIndexId, cm.checkInterval) n.trafficTimer.Add(hostinfo.localIndexId, n.checkInterval)
if !outTraffic { if !outTraffic {
// Send a punch packet to keep the NAT state alive // Send a punch packet to keep the NAT state alive
cm.sendPunch(hostinfo) n.sendPunch(hostinfo)
} }
return decision, hostinfo, primary return decision, hostinfo, primary
} }
if hostinfo.pendingDeletion.Load() { if _, ok := n.pendingDeletion[hostinfo.localIndexId]; ok {
// We have already sent a test packet and nothing was returned, this hostinfo is dead // We have already sent a test packet and nothing was returned, this hostinfo is dead
hostinfo.logger(cm.l). hostinfo.logger(n.l).
WithField("tunnelCheck", m{"state": "dead", "method": "active"}). WithField("tunnelCheck", m{"state": "dead", "method": "active"}).
Info("Tunnel status") Info("Tunnel status")
delete(n.pendingDeletion, hostinfo.localIndexId)
return deleteTunnel, hostinfo, nil return deleteTunnel, hostinfo, nil
} }
decision := doNothing decision := doNothing
if hostinfo != nil && hostinfo.ConnectionState != nil && mainHostInfo { if hostinfo != nil && hostinfo.ConnectionState != nil && mainHostInfo {
if !outTraffic { if !outTraffic {
inactiveFor, isInactive := cm.isInactive(hostinfo, now)
if isInactive {
// Tunnel is inactive, tear it down
hostinfo.logger(cm.l).
WithField("inactiveDuration", inactiveFor).
WithField("primary", mainHostInfo).
Info("Dropping tunnel due to inactivity")
return closeTunnel, hostinfo, primary
}
// If we aren't sending or receiving traffic then its an unused tunnel and we don't to test the tunnel. // If we aren't sending or receiving traffic then its an unused tunnel and we don't to test the tunnel.
// Just maintain NAT state if configured to do so. // Just maintain NAT state if configured to do so.
cm.sendPunch(hostinfo) n.sendPunch(hostinfo)
cm.trafficTimer.Add(hostinfo.localIndexId, cm.checkInterval) n.trafficTimer.Add(hostinfo.localIndexId, n.checkInterval)
return doNothing, nil, nil return doNothing, nil, nil
} }
if cm.punchy.GetTargetEverything() { if n.punchy.GetTargetEverything() {
// This is similar to the old punchy behavior with a slight optimization. // This is similar to the old punchy behavior with a slight optimization.
// We aren't receiving traffic but we are sending it, punch on all known // We aren't receiving traffic but we are sending it, punch on all known
// ips in case we need to re-prime NAT state // ips in case we need to re-prime NAT state
cm.sendPunch(hostinfo) n.sendPunch(hostinfo)
} }
if cm.l.Level >= logrus.DebugLevel { if n.l.Level >= logrus.DebugLevel {
hostinfo.logger(cm.l). hostinfo.logger(n.l).
WithField("tunnelCheck", m{"state": "testing", "method": "active"}). WithField("tunnelCheck", m{"state": "testing", "method": "active"}).
Debug("Tunnel status") Debug("Tunnel status")
} }
@@ -421,33 +411,17 @@ func (cm *connectionManager) makeTrafficDecision(localIndex uint32, now time.Tim
decision = sendTestPacket decision = sendTestPacket
} else { } else {
if cm.l.Level >= logrus.DebugLevel { if n.l.Level >= logrus.DebugLevel {
hostinfo.logger(cm.l).Debugf("Hostinfo sadness") hostinfo.logger(n.l).Debugf("Hostinfo sadness")
} }
} }
hostinfo.pendingDeletion.Store(true) n.pendingDeletion[hostinfo.localIndexId] = struct{}{}
cm.trafficTimer.Add(hostinfo.localIndexId, cm.pendingDeletionInterval) n.trafficTimer.Add(hostinfo.localIndexId, n.pendingDeletionInterval)
return decision, hostinfo, nil return decision, hostinfo, nil
} }
func (cm *connectionManager) isInactive(hostinfo *HostInfo, now time.Time) (time.Duration, bool) { func (n *connectionManager) shouldSwapPrimary(current, primary *HostInfo) bool {
if cm.dropInactive.Load() == false {
// We aren't configured to drop inactive tunnels
return 0, false
}
inactiveDuration := now.Sub(hostinfo.lastUsed)
if inactiveDuration < cm.getInactivityTimeout() {
// It's not considered inactive
return inactiveDuration, false
}
// The tunnel is inactive
return inactiveDuration, true
}
func (cm *connectionManager) shouldSwapPrimary(current, primary *HostInfo) bool {
// The primary tunnel is the most recent handshake to complete locally and should work entirely fine. // The primary tunnel is the most recent handshake to complete locally and should work entirely fine.
// If we are here then we have multiple tunnels for a host pair and neither side believes the same tunnel is primary. // If we are here then we have multiple tunnels for a host pair and neither side believes the same tunnel is primary.
// Let's sort this out. // Let's sort this out.
@@ -455,80 +429,73 @@ func (cm *connectionManager) shouldSwapPrimary(current, primary *HostInfo) bool
// Only one side should swap because if both swap then we may never resolve to a single tunnel. // Only one side should swap because if both swap then we may never resolve to a single tunnel.
// vpn addr is static across all tunnels for this host pair so lets // vpn addr is static across all tunnels for this host pair so lets
// use that to determine if we should consider swapping. // use that to determine if we should consider swapping.
if current.vpnAddrs[0].Compare(cm.intf.myVpnAddrs[0]) < 0 { if current.vpnAddrs[0].Compare(n.intf.myVpnAddrs[0]) < 0 {
// Their primary vpn addr is less than mine. Do not swap. // Their primary vpn addr is less than mine. Do not swap.
return false return false
} }
crt := cm.intf.pki.getCertState().getCertificate(current.ConnectionState.myCert.Version()) crt := n.intf.pki.getCertState().getCertificate(current.ConnectionState.myCert.Version())
// If this tunnel is using the latest certificate then we should swap it to primary for a bit and see if things // If this tunnel is using the latest certificate then we should swap it to primary for a bit and see if things
// settle down. // settle down.
return bytes.Equal(current.ConnectionState.myCert.Signature(), crt.Signature()) return bytes.Equal(current.ConnectionState.myCert.Signature(), crt.Signature())
} }
func (cm *connectionManager) swapPrimary(current, primary *HostInfo) { func (n *connectionManager) swapPrimary(current, primary *HostInfo) {
cm.hostMap.Lock() n.hostMap.Lock()
// Make sure the primary is still the same after the write lock. This avoids a race with a rehandshake. // Make sure the primary is still the same after the write lock. This avoids a race with a rehandshake.
if cm.hostMap.Hosts[current.vpnAddrs[0]] == primary { if n.hostMap.Hosts[current.vpnAddrs[0]] == primary {
cm.hostMap.unlockedMakePrimary(current) n.hostMap.unlockedMakePrimary(current)
} }
cm.hostMap.Unlock() n.hostMap.Unlock()
} }
// isInvalidCertificate will check if we should destroy a tunnel if pki.disconnect_invalid is true and // isInvalidCertificate will check if we should destroy a tunnel if pki.disconnect_invalid is true and
// the certificate is no longer valid. Block listed certificates will skip the pki.disconnect_invalid // the certificate is no longer valid. Block listed certificates will skip the pki.disconnect_invalid
// check and return true. // check and return true.
func (cm *connectionManager) isInvalidCertificate(now time.Time, hostinfo *HostInfo) bool { func (n *connectionManager) isInvalidCertificate(now time.Time, hostinfo *HostInfo) bool {
remoteCert := hostinfo.GetCert() remoteCert := hostinfo.GetCert()
if remoteCert == nil { if remoteCert == nil {
return false return false
} }
caPool := cm.intf.pki.GetCAPool() caPool := n.intf.pki.GetCAPool()
err := caPool.VerifyCachedCertificate(now, remoteCert) err := caPool.VerifyCachedCertificate(now, remoteCert)
if err == nil { if err == nil {
return false return false
} }
if !cm.intf.disconnectInvalid.Load() && err != cert.ErrBlockListed { if !n.intf.disconnectInvalid.Load() && err != cert.ErrBlockListed {
// Block listed certificates should always be disconnected // Block listed certificates should always be disconnected
return false return false
} }
hostinfo.logger(cm.l).WithError(err). hostinfo.logger(n.l).WithError(err).
WithField("fingerprint", remoteCert.Fingerprint). WithField("fingerprint", remoteCert.Fingerprint).
Info("Remote certificate is no longer valid, tearing down the tunnel") Info("Remote certificate is no longer valid, tearing down the tunnel")
return true return true
} }
func (cm *connectionManager) sendPunch(hostinfo *HostInfo) { func (n *connectionManager) sendPunch(hostinfo *HostInfo) {
if !cm.punchy.GetPunch() { if !n.punchy.GetPunch() {
// Punching is disabled // Punching is disabled
return return
} }
if cm.intf.lightHouse.IsAnyLighthouseAddr(hostinfo.vpnAddrs) { if n.punchy.GetTargetEverything() {
// Do not punch to lighthouses, we assume our lighthouse update interval is good enough. hostinfo.remotes.ForEach(n.hostMap.GetPreferredRanges(), func(addr netip.AddrPort, preferred bool) {
// In the event the update interval is not sufficient to maintain NAT state then a publicly available lighthouse n.metricsTxPunchy.Inc(1)
// would lose the ability to notify us and punchy.respond would become unreliable. n.intf.outside.WriteTo([]byte{1}, addr)
return
}
if cm.punchy.GetTargetEverything() {
hostinfo.remotes.ForEach(cm.hostMap.GetPreferredRanges(), func(addr netip.AddrPort, preferred bool) {
cm.metricsTxPunchy.Inc(1)
cm.intf.outside.WriteTo([]byte{1}, addr)
}) })
} else if hostinfo.remote.IsValid() { } else if hostinfo.remote.IsValid() {
cm.metricsTxPunchy.Inc(1) n.metricsTxPunchy.Inc(1)
cm.intf.outside.WriteTo([]byte{1}, hostinfo.remote) n.intf.outside.WriteTo([]byte{1}, hostinfo.remote)
} }
} }
func (cm *connectionManager) tryRehandshake(hostinfo *HostInfo) { func (n *connectionManager) tryRehandshake(hostinfo *HostInfo) {
cs := cm.intf.pki.getCertState() cs := n.intf.pki.getCertState()
curCrt := hostinfo.ConnectionState.myCert curCrt := hostinfo.ConnectionState.myCert
myCrt := cs.getCertificate(curCrt.Version()) myCrt := cs.getCertificate(curCrt.Version())
if curCrt.Version() >= cs.initiatingVersion && bytes.Equal(curCrt.Signature(), myCrt.Signature()) == true { if curCrt.Version() >= cs.initiatingVersion && bytes.Equal(curCrt.Signature(), myCrt.Signature()) == true {
@@ -536,9 +503,9 @@ func (cm *connectionManager) tryRehandshake(hostinfo *HostInfo) {
return return
} }
cm.l.WithField("vpnAddrs", hostinfo.vpnAddrs). n.l.WithField("vpnAddrs", hostinfo.vpnAddrs).
WithField("reason", "local certificate is not current"). WithField("reason", "local certificate is not current").
Info("Re-handshaking with remote") Info("Re-handshaking with remote")
cm.intf.handshakeManager.StartHandshake(hostinfo.vpnAddrs[0], nil) n.intf.handshakeManager.StartHandshake(hostinfo.vpnAddrs[0], nil)
} }

View File

@@ -1,6 +1,7 @@
package nebula package nebula
import ( import (
"context"
"crypto/ed25519" "crypto/ed25519"
"crypto/rand" "crypto/rand"
"net/netip" "net/netip"
@@ -63,10 +64,10 @@ func Test_NewConnectionManagerTest(t *testing.T) {
ifce.pki.cs.Store(cs) ifce.pki.cs.Store(cs)
// Create manager // Create manager
conf := config.NewC(l) ctx, cancel := context.WithCancel(context.Background())
punchy := NewPunchyFromConfig(l, conf) defer cancel()
nc := newConnectionManagerFromConfig(l, conf, hostMap, punchy) punchy := NewPunchyFromConfig(l, config.NewC(l))
nc.intf = ifce nc := newConnectionManager(ctx, l, ifce, 5, 10, punchy)
p := []byte("") p := []byte("")
nb := make([]byte, 12, 12) nb := make([]byte, 12, 12)
out := make([]byte, mtu) out := make([]byte, mtu)
@@ -84,33 +85,32 @@ func Test_NewConnectionManagerTest(t *testing.T) {
nc.hostMap.unlockedAddHostInfo(hostinfo, ifce) nc.hostMap.unlockedAddHostInfo(hostinfo, ifce)
// We saw traffic out to vpnIp // We saw traffic out to vpnIp
nc.Out(hostinfo) nc.Out(hostinfo.localIndexId)
nc.In(hostinfo) nc.In(hostinfo.localIndexId)
assert.False(t, hostinfo.pendingDeletion.Load()) assert.NotContains(t, nc.pendingDeletion, hostinfo.localIndexId)
assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs[0]) assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs[0])
assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId) assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId)
assert.True(t, hostinfo.out.Load()) assert.Contains(t, nc.out, hostinfo.localIndexId)
assert.True(t, hostinfo.in.Load())
// Do a traffic check tick, should not be pending deletion but should not have any in/out packets recorded // Do a traffic check tick, should not be pending deletion but should not have any in/out packets recorded
nc.doTrafficCheck(hostinfo.localIndexId, p, nb, out, time.Now()) nc.doTrafficCheck(hostinfo.localIndexId, p, nb, out, time.Now())
assert.False(t, hostinfo.pendingDeletion.Load()) assert.NotContains(t, nc.pendingDeletion, hostinfo.localIndexId)
assert.False(t, hostinfo.out.Load()) assert.NotContains(t, nc.out, hostinfo.localIndexId)
assert.False(t, hostinfo.in.Load()) assert.NotContains(t, nc.in, hostinfo.localIndexId)
// Do another traffic check tick, this host should be pending deletion now // Do another traffic check tick, this host should be pending deletion now
nc.Out(hostinfo) nc.Out(hostinfo.localIndexId)
assert.True(t, hostinfo.out.Load())
nc.doTrafficCheck(hostinfo.localIndexId, p, nb, out, time.Now()) nc.doTrafficCheck(hostinfo.localIndexId, p, nb, out, time.Now())
assert.True(t, hostinfo.pendingDeletion.Load()) assert.Contains(t, nc.pendingDeletion, hostinfo.localIndexId)
assert.False(t, hostinfo.out.Load()) assert.NotContains(t, nc.out, hostinfo.localIndexId)
assert.False(t, hostinfo.in.Load()) assert.NotContains(t, nc.in, hostinfo.localIndexId)
assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId) assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId)
assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs[0]) assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs[0])
// Do a final traffic check tick, the host should now be removed // Do a final traffic check tick, the host should now be removed
nc.doTrafficCheck(hostinfo.localIndexId, p, nb, out, time.Now()) nc.doTrafficCheck(hostinfo.localIndexId, p, nb, out, time.Now())
assert.NotContains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs) assert.NotContains(t, nc.pendingDeletion, hostinfo.localIndexId)
assert.NotContains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs[0])
assert.NotContains(t, nc.hostMap.Indexes, hostinfo.localIndexId) assert.NotContains(t, nc.hostMap.Indexes, hostinfo.localIndexId)
} }
@@ -146,10 +146,10 @@ func Test_NewConnectionManagerTest2(t *testing.T) {
ifce.pki.cs.Store(cs) ifce.pki.cs.Store(cs)
// Create manager // Create manager
conf := config.NewC(l) ctx, cancel := context.WithCancel(context.Background())
punchy := NewPunchyFromConfig(l, conf) defer cancel()
nc := newConnectionManagerFromConfig(l, conf, hostMap, punchy) punchy := NewPunchyFromConfig(l, config.NewC(l))
nc.intf = ifce nc := newConnectionManager(ctx, l, ifce, 5, 10, punchy)
p := []byte("") p := []byte("")
nb := make([]byte, 12, 12) nb := make([]byte, 12, 12)
out := make([]byte, mtu) out := make([]byte, mtu)
@@ -167,129 +167,33 @@ func Test_NewConnectionManagerTest2(t *testing.T) {
nc.hostMap.unlockedAddHostInfo(hostinfo, ifce) nc.hostMap.unlockedAddHostInfo(hostinfo, ifce)
// We saw traffic out to vpnIp // We saw traffic out to vpnIp
nc.Out(hostinfo) nc.Out(hostinfo.localIndexId)
nc.In(hostinfo) nc.In(hostinfo.localIndexId)
assert.True(t, hostinfo.in.Load()) assert.NotContains(t, nc.pendingDeletion, hostinfo.vpnAddrs[0])
assert.True(t, hostinfo.out.Load())
assert.False(t, hostinfo.pendingDeletion.Load())
assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs[0]) assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs[0])
assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId) assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId)
// Do a traffic check tick, should not be pending deletion but should not have any in/out packets recorded // Do a traffic check tick, should not be pending deletion but should not have any in/out packets recorded
nc.doTrafficCheck(hostinfo.localIndexId, p, nb, out, time.Now()) nc.doTrafficCheck(hostinfo.localIndexId, p, nb, out, time.Now())
assert.False(t, hostinfo.pendingDeletion.Load()) assert.NotContains(t, nc.pendingDeletion, hostinfo.localIndexId)
assert.False(t, hostinfo.out.Load()) assert.NotContains(t, nc.out, hostinfo.localIndexId)
assert.False(t, hostinfo.in.Load()) assert.NotContains(t, nc.in, hostinfo.localIndexId)
// Do another traffic check tick, this host should be pending deletion now // Do another traffic check tick, this host should be pending deletion now
nc.Out(hostinfo) nc.Out(hostinfo.localIndexId)
nc.doTrafficCheck(hostinfo.localIndexId, p, nb, out, time.Now()) nc.doTrafficCheck(hostinfo.localIndexId, p, nb, out, time.Now())
assert.True(t, hostinfo.pendingDeletion.Load()) assert.Contains(t, nc.pendingDeletion, hostinfo.localIndexId)
assert.False(t, hostinfo.out.Load()) assert.NotContains(t, nc.out, hostinfo.localIndexId)
assert.False(t, hostinfo.in.Load()) assert.NotContains(t, nc.in, hostinfo.localIndexId)
assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId) assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId)
assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs[0]) assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs[0])
// We saw traffic, should no longer be pending deletion // We saw traffic, should no longer be pending deletion
nc.In(hostinfo) nc.In(hostinfo.localIndexId)
nc.doTrafficCheck(hostinfo.localIndexId, p, nb, out, time.Now()) nc.doTrafficCheck(hostinfo.localIndexId, p, nb, out, time.Now())
assert.False(t, hostinfo.pendingDeletion.Load()) assert.NotContains(t, nc.pendingDeletion, hostinfo.localIndexId)
assert.False(t, hostinfo.out.Load()) assert.NotContains(t, nc.out, hostinfo.localIndexId)
assert.False(t, hostinfo.in.Load()) assert.NotContains(t, nc.in, hostinfo.localIndexId)
assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId)
assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs[0])
}
func Test_NewConnectionManager_DisconnectInactive(t *testing.T) {
l := test.NewLogger()
localrange := netip.MustParsePrefix("10.1.1.1/24")
vpnAddrs := []netip.Addr{netip.MustParseAddr("172.1.1.2")}
preferredRanges := []netip.Prefix{localrange}
// Very incomplete mock objects
hostMap := newHostMap(l)
hostMap.preferredRanges.Store(&preferredRanges)
cs := &CertState{
initiatingVersion: cert.Version1,
privateKey: []byte{},
v1Cert: &dummyCert{version: cert.Version1},
v1HandshakeBytes: []byte{},
}
lh := newTestLighthouse()
ifce := &Interface{
hostMap: hostMap,
inside: &test.NoopTun{},
outside: &udp.NoopConn{},
firewall: &Firewall{},
lightHouse: lh,
pki: &PKI{},
handshakeManager: NewHandshakeManager(l, hostMap, lh, &udp.NoopConn{}, defaultHandshakeConfig),
l: l,
}
ifce.pki.cs.Store(cs)
// Create manager
conf := config.NewC(l)
conf.Settings["tunnels"] = map[string]any{
"drop_inactive": true,
}
punchy := NewPunchyFromConfig(l, conf)
nc := newConnectionManagerFromConfig(l, conf, hostMap, punchy)
assert.True(t, nc.dropInactive.Load())
nc.intf = ifce
// Add an ip we have established a connection w/ to hostmap
hostinfo := &HostInfo{
vpnAddrs: vpnAddrs,
localIndexId: 1099,
remoteIndexId: 9901,
}
hostinfo.ConnectionState = &ConnectionState{
myCert: &dummyCert{version: cert.Version1},
H: &noise.HandshakeState{},
}
nc.hostMap.unlockedAddHostInfo(hostinfo, ifce)
// Do a traffic check tick, in and out should be cleared but should not be pending deletion
nc.Out(hostinfo)
nc.In(hostinfo)
assert.True(t, hostinfo.out.Load())
assert.True(t, hostinfo.in.Load())
now := time.Now()
decision, _, _ := nc.makeTrafficDecision(hostinfo.localIndexId, now)
assert.Equal(t, tryRehandshake, decision)
assert.Equal(t, now, hostinfo.lastUsed)
assert.False(t, hostinfo.pendingDeletion.Load())
assert.False(t, hostinfo.out.Load())
assert.False(t, hostinfo.in.Load())
decision, _, _ = nc.makeTrafficDecision(hostinfo.localIndexId, now.Add(time.Second*5))
assert.Equal(t, doNothing, decision)
assert.Equal(t, now, hostinfo.lastUsed)
assert.False(t, hostinfo.pendingDeletion.Load())
assert.False(t, hostinfo.out.Load())
assert.False(t, hostinfo.in.Load())
// Do another traffic check tick, should still not be pending deletion
decision, _, _ = nc.makeTrafficDecision(hostinfo.localIndexId, now.Add(time.Second*10))
assert.Equal(t, doNothing, decision)
assert.Equal(t, now, hostinfo.lastUsed)
assert.False(t, hostinfo.pendingDeletion.Load())
assert.False(t, hostinfo.out.Load())
assert.False(t, hostinfo.in.Load())
assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId)
assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs[0])
// Finally advance beyond the inactivity timeout
decision, _, _ = nc.makeTrafficDecision(hostinfo.localIndexId, now.Add(time.Minute*10))
assert.Equal(t, closeTunnel, decision)
assert.Equal(t, now, hostinfo.lastUsed)
assert.False(t, hostinfo.pendingDeletion.Load())
assert.False(t, hostinfo.out.Load())
assert.False(t, hostinfo.in.Load())
assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId) assert.Contains(t, nc.hostMap.Indexes, hostinfo.localIndexId)
assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs[0]) assert.Contains(t, nc.hostMap.Hosts, hostinfo.vpnAddrs[0])
} }
@@ -360,10 +264,10 @@ func Test_NewConnectionManagerTest_DisconnectInvalid(t *testing.T) {
ifce.disconnectInvalid.Store(true) ifce.disconnectInvalid.Store(true)
// Create manager // Create manager
conf := config.NewC(l) ctx, cancel := context.WithCancel(context.Background())
punchy := NewPunchyFromConfig(l, conf) defer cancel()
nc := newConnectionManagerFromConfig(l, conf, hostMap, punchy) punchy := NewPunchyFromConfig(l, config.NewC(l))
nc.intf = ifce nc := newConnectionManager(ctx, l, ifce, 5, 10, punchy)
ifce.connectionManager = nc ifce.connectionManager = nc
hostinfo := &HostInfo{ hostinfo := &HostInfo{

View File

@@ -4,13 +4,13 @@ import (
"crypto/rand" "crypto/rand"
"encoding/json" "encoding/json"
"fmt" "fmt"
"sync"
"sync/atomic" "sync/atomic"
"github.com/flynn/noise" "github.com/flynn/noise"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/slackhq/nebula/cert" "github.com/slackhq/nebula/cert"
"github.com/slackhq/nebula/noiseutil" "github.com/slackhq/nebula/noiseutil"
"github.com/wadey/synctrace"
) )
const ReplayWindow = 1024 const ReplayWindow = 1024
@@ -24,7 +24,7 @@ type ConnectionState struct {
initiator bool initiator bool
messageCounter atomic.Uint64 messageCounter atomic.Uint64
window *Bits window *Bits
writeLock sync.Mutex writeLock synctrace.Mutex
} }
func NewConnectionState(l *logrus.Logger, cs *CertState, crt cert.Certificate, initiator bool, pattern noise.HandshakePattern) (*ConnectionState, error) { func NewConnectionState(l *logrus.Logger, cs *CertState, crt cert.Certificate, initiator bool, pattern noise.HandshakePattern) (*ConnectionState, error) {
@@ -76,6 +76,7 @@ func NewConnectionState(l *logrus.Logger, cs *CertState, crt cert.Certificate, i
initiator: initiator, initiator: initiator,
window: b, window: b,
myCert: crt, myCert: crt,
writeLock: synctrace.NewMutex("connection-state"),
} }
// always start the counter from 2, as packet 1 and packet 2 are handshake packets. // always start the counter from 2, as packet 1 and packet 2 are handshake packets.
ci.messageCounter.Add(2) ci.messageCounter.Add(2)

View File

@@ -26,15 +26,14 @@ type controlHostLister interface {
} }
type Control struct { type Control struct {
f *Interface f *Interface
l *logrus.Logger l *logrus.Logger
ctx context.Context ctx context.Context
cancel context.CancelFunc cancel context.CancelFunc
sshStart func() sshStart func()
statsStart func() statsStart func()
dnsStart func() dnsStart func()
lighthouseStart func() lighthouseStart func()
connectionManagerStart func(context.Context)
} }
type ControlHostInfo struct { type ControlHostInfo struct {
@@ -64,9 +63,6 @@ func (c *Control) Start() {
if c.dnsStart != nil { if c.dnsStart != nil {
go c.dnsStart() go c.dnsStart()
} }
if c.connectionManagerStart != nil {
go c.connectionManagerStart(c.ctx)
}
if c.lighthouseStart != nil { if c.lighthouseStart != nil {
c.lighthouseStart() c.lighthouseStart()
} }

View File

@@ -53,7 +53,7 @@ func TestControl_GetHostInfoByVpnIp(t *testing.T) {
localIndexId: 201, localIndexId: 201,
vpnAddrs: []netip.Addr{vpnIp}, vpnAddrs: []netip.Addr{vpnIp},
relayState: RelayState{ relayState: RelayState{
relays: nil, relays: map[netip.Addr]struct{}{},
relayForByAddr: map[netip.Addr]*Relay{}, relayForByAddr: map[netip.Addr]*Relay{},
relayForByIdx: map[uint32]*Relay{}, relayForByIdx: map[uint32]*Relay{},
}, },
@@ -72,7 +72,7 @@ func TestControl_GetHostInfoByVpnIp(t *testing.T) {
localIndexId: 201, localIndexId: 201,
vpnAddrs: []netip.Addr{vpnIp2}, vpnAddrs: []netip.Addr{vpnIp2},
relayState: RelayState{ relayState: RelayState{
relays: nil, relays: map[netip.Addr]struct{}{},
relayForByAddr: map[netip.Addr]*Relay{}, relayForByAddr: map[netip.Addr]*Relay{},
relayForByIdx: map[uint32]*Relay{}, relayForByIdx: map[uint32]*Relay{},
}, },

View File

@@ -6,12 +6,12 @@ import (
"net/netip" "net/netip"
"strconv" "strconv"
"strings" "strings"
"sync"
"github.com/gaissmai/bart" "github.com/gaissmai/bart"
"github.com/miekg/dns" "github.com/miekg/dns"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/slackhq/nebula/config" "github.com/slackhq/nebula/config"
"github.com/wadey/synctrace"
) )
// This whole thing should be rewritten to use context // This whole thing should be rewritten to use context
@@ -21,7 +21,7 @@ var dnsServer *dns.Server
var dnsAddr string var dnsAddr string
type dnsRecords struct { type dnsRecords struct {
sync.RWMutex synctrace.RWMutex
l *logrus.Logger l *logrus.Logger
dnsMap4 map[string]netip.Addr dnsMap4 map[string]netip.Addr
dnsMap6 map[string]netip.Addr dnsMap6 map[string]netip.Addr
@@ -31,6 +31,7 @@ type dnsRecords struct {
func newDnsRecords(l *logrus.Logger, cs *CertState, hostMap *HostMap) *dnsRecords { func newDnsRecords(l *logrus.Logger, cs *CertState, hostMap *HostMap) *dnsRecords {
return &dnsRecords{ return &dnsRecords{
RWMutex: synctrace.NewRWMutex("dns-records"),
l: l, l: l,
dnsMap4: make(map[string]netip.Addr), dnsMap4: make(map[string]netip.Addr),
dnsMap6: make(map[string]netip.Addr), dnsMap6: make(map[string]netip.Addr),

View File

@@ -506,7 +506,7 @@ func TestReestablishRelays(t *testing.T) {
curIndexes := len(myControl.GetHostmap().Indexes) curIndexes := len(myControl.GetHostmap().Indexes)
for curIndexes >= start { for curIndexes >= start {
curIndexes = len(myControl.GetHostmap().Indexes) curIndexes = len(myControl.GetHostmap().Indexes)
r.Logf("Wait for the dead index to go away:start=%v indexes, current=%v indexes", start, curIndexes) r.Logf("Wait for the dead index to go away:start=%v indexes, currnet=%v indexes", start, curIndexes)
myControl.InjectTunUDPPacket(theirVpnIpNet[0].Addr(), 80, myVpnIpNet[0].Addr(), 80, []byte("Hi from me should fail")) myControl.InjectTunUDPPacket(theirVpnIpNet[0].Addr(), 80, myVpnIpNet[0].Addr(), 80, []byte("Hi from me should fail"))
r.RouteForAllExitFunc(func(p *udp.Packet, c *nebula.Control) router.ExitType { r.RouteForAllExitFunc(func(p *udp.Packet, c *nebula.Control) router.ExitType {
@@ -1052,9 +1052,6 @@ func TestRehandshakingLoser(t *testing.T) {
t.Log("Stand up a tunnel between me and them") t.Log("Stand up a tunnel between me and them")
assertTunnel(t, myVpnIpNet[0].Addr(), theirVpnIpNet[0].Addr(), myControl, theirControl, r) assertTunnel(t, myVpnIpNet[0].Addr(), theirVpnIpNet[0].Addr(), myControl, theirControl, r)
myControl.GetHostInfoByVpnAddr(theirVpnIpNet[0].Addr(), false)
theirControl.GetHostInfoByVpnAddr(myVpnIpNet[0].Addr(), false)
r.RenderHostmaps("Starting hostmaps", myControl, theirControl) r.RenderHostmaps("Starting hostmaps", myControl, theirControl)
r.Log("Renew their certificate and spin until mine sees it") r.Log("Renew their certificate and spin until mine sees it")

View File

@@ -700,7 +700,6 @@ func (r *R) FlushAll() {
r.Unlock() r.Unlock()
panic("Can't FlushAll for host: " + p.To.String()) panic("Can't FlushAll for host: " + p.To.String())
} }
receiver.InjectUDPPacket(p)
r.Unlock() r.Unlock()
} }
} }

View File

@@ -1,57 +0,0 @@
//go:build e2e_testing
// +build e2e_testing
package e2e
import (
"testing"
"time"
"github.com/slackhq/nebula/cert"
"github.com/slackhq/nebula/cert_test"
"github.com/slackhq/nebula/e2e/router"
)
func TestDropInactiveTunnels(t *testing.T) {
// The goal of this test is to ensure the shortest inactivity timeout will close the tunnel on both sides
// under ideal conditions
ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
myControl, myVpnIpNet, myUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "me", "10.128.0.1/24", m{"tunnels": m{"drop_inactive": true, "inactivity_timeout": "5s"}})
theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServer(cert.Version1, ca, caKey, "them", "10.128.0.2/24", m{"tunnels": m{"drop_inactive": true, "inactivity_timeout": "10m"}})
// Share our underlay information
myControl.InjectLightHouseAddr(theirVpnIpNet[0].Addr(), theirUdpAddr)
theirControl.InjectLightHouseAddr(myVpnIpNet[0].Addr(), myUdpAddr)
// Start the servers
myControl.Start()
theirControl.Start()
r := router.NewR(t, myControl, theirControl)
r.Log("Assert the tunnel between me and them works")
assertTunnel(t, myVpnIpNet[0].Addr(), theirVpnIpNet[0].Addr(), myControl, theirControl, r)
r.Log("Go inactive and wait for the tunnels to get dropped")
waitStart := time.Now()
for {
myIndexes := len(myControl.GetHostmap().Indexes)
theirIndexes := len(theirControl.GetHostmap().Indexes)
if myIndexes == 0 && theirIndexes == 0 {
break
}
since := time.Since(waitStart)
r.Logf("my tunnels: %v; their tunnels: %v; duration: %v", myIndexes, theirIndexes, since)
if since > time.Second*30 {
t.Fatal("Tunnel should have been declared inactive after 5 seconds and before 30 seconds")
}
time.Sleep(1 * time.Second)
r.FlushAll()
}
r.Logf("Inactive tunnels were dropped within %v", time.Since(waitStart))
myControl.Stop()
theirControl.Stop()
}

View File

@@ -338,18 +338,6 @@ logging:
# after receiving the response for lighthouse queries # after receiving the response for lighthouse queries
#trigger_buffer: 64 #trigger_buffer: 64
# Tunnel manager settings
#tunnels:
# drop_inactive controls whether inactive tunnels are maintained or dropped after the inactive_timeout period has
# elapsed.
# In general, it is a good idea to enable this setting. It will be enabled by default in a future release.
# This setting is reloadable
#drop_inactive: false
# inactivity_timeout controls how long a tunnel MUST NOT see any inbound or outbound traffic before being considered
# inactive and eligible to be dropped.
# This setting is reloadable
#inactivity_timeout: 10m
# Nebula security group configuration # Nebula security group configuration
firewall: firewall:

View File

@@ -10,7 +10,6 @@ import (
"reflect" "reflect"
"strconv" "strconv"
"strings" "strings"
"sync"
"time" "time"
"github.com/gaissmai/bart" "github.com/gaissmai/bart"
@@ -19,6 +18,7 @@ import (
"github.com/slackhq/nebula/cert" "github.com/slackhq/nebula/cert"
"github.com/slackhq/nebula/config" "github.com/slackhq/nebula/config"
"github.com/slackhq/nebula/firewall" "github.com/slackhq/nebula/firewall"
"github.com/wadey/synctrace"
) )
type FirewallInterface interface { type FirewallInterface interface {
@@ -76,7 +76,7 @@ type firewallMetrics struct {
} }
type FirewallConntrack struct { type FirewallConntrack struct {
sync.Mutex synctrace.Mutex
Conns map[firewall.Packet]*conn Conns map[firewall.Packet]*conn
TimerWheel *TimerWheel[firewall.Packet] TimerWheel *TimerWheel[firewall.Packet]
@@ -164,6 +164,7 @@ func NewFirewall(l *logrus.Logger, tcpTimeout, UDPTimeout, defaultTimeout time.D
return &Firewall{ return &Firewall{
Conntrack: &FirewallConntrack{ Conntrack: &FirewallConntrack{
Mutex: synctrace.NewMutex("firewall-conntrack"),
Conns: make(map[firewall.Packet]*conn), Conns: make(map[firewall.Packet]*conn),
TimerWheel: NewTimerWheel[firewall.Packet](tmin, tmax), TimerWheel: NewTimerWheel[firewall.Packet](tmin, tmax),
}, },

13
go.mod
View File

@@ -1,11 +1,11 @@
module github.com/slackhq/nebula module github.com/slackhq/nebula
go 1.24.0 go 1.23.0
toolchain go1.24.1 toolchain go1.24.1
require ( require (
dario.cat/mergo v1.0.2 dario.cat/mergo v1.0.1
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be
github.com/armon/go-radix v1.0.0 github.com/armon/go-radix v1.0.0
github.com/cyberdelia/go-metrics-graphite v0.0.0-20161219230853-39f87cc3b432 github.com/cyberdelia/go-metrics-graphite v0.0.0-20161219230853-39f87cc3b432
@@ -23,7 +23,8 @@ require (
github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e
github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6 github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6
github.com/stretchr/testify v1.10.0 github.com/stretchr/testify v1.10.0
github.com/vishvananda/netlink v1.3.1 github.com/vishvananda/netlink v1.3.0
github.com/wadey/synctrace v0.0.0-20250612192159-94547ef50dfe
golang.org/x/crypto v0.37.0 golang.org/x/crypto v0.37.0
golang.org/x/exp v0.0.0-20230725093048-515e97ebf090 golang.org/x/exp v0.0.0-20230725093048-515e97ebf090
golang.org/x/net v0.39.0 golang.org/x/net v0.39.0
@@ -42,13 +43,17 @@ require (
github.com/beorn7/perks v1.0.1 // indirect github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emirpasic/gods v1.18.1 // indirect
github.com/google/btree v1.1.2 // indirect github.com/google/btree v1.1.2 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/heimdalr/dag v1.4.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.62.0 // indirect github.com/prometheus/common v0.62.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect github.com/prometheus/procfs v0.15.1 // indirect
github.com/vishvananda/netns v0.0.5 // indirect github.com/timandy/routine v1.1.5 // indirect
github.com/vishvananda/netns v0.0.4 // indirect
golang.org/x/mod v0.23.0 // indirect golang.org/x/mod v0.23.0 // indirect
golang.org/x/time v0.5.0 // indirect golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.30.0 // indirect golang.org/x/tools v0.30.0 // indirect

24
go.sum
View File

@@ -1,6 +1,6 @@
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8= dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
dario.cat/mergo v1.0.2/go.mod h1:E/hbnu0NxMFBjpMIE34DRGLWqDy0g5FuKDhCb31ngxA= dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
@@ -22,6 +22,8 @@ github.com/cyberdelia/go-metrics-graphite v0.0.0-20161219230853-39f87cc3b432/go.
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/flynn/noise v1.1.0 h1:KjPQoQCEFdZDiP03phOvGi11+SVVhBG2wOWAorLsstg= github.com/flynn/noise v1.1.0 h1:KjPQoQCEFdZDiP03phOvGi11+SVVhBG2wOWAorLsstg=
github.com/flynn/noise v1.1.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag= github.com/flynn/noise v1.1.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag=
github.com/gaissmai/bart v0.20.4 h1:Ik47r1fy3jRVU+1eYzKSW3ho2UgBVTVnUS8O993584U= github.com/gaissmai/bart v0.20.4 h1:Ik47r1fy3jRVU+1eYzKSW3ho2UgBVTVnUS8O993584U=
@@ -33,6 +35,8 @@ github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-test/deep v1.1.0 h1:WOcxcdHcvdgThNXjw0t76K42FXTU7HpNQWHpA2HHNlg=
github.com/go-test/deep v1.1.0/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
@@ -58,6 +62,10 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8= github.com/google/gopacket v1.1.19 h1:ves8RnFZPGiFnTS0uPQStjwru6uO6h+nlr9j6fL7kF8=
github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo= github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/heimdalr/dag v1.4.0 h1:zG3JA4RDVLc55k3AXAgfwa+EgBNZ0TkfOO3C29Ucpmg=
github.com/heimdalr/dag v1.4.0/go.mod h1:OCh6ghKmU0hPjtwMqWBoNxPmtRioKd1xSu7Zs4sbIqM=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
@@ -145,10 +153,14 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0= github.com/timandy/routine v1.1.5 h1:LSpm7Iijwb9imIPlucl4krpr2EeCeAUvifiQ9Uf5X+M=
github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4= github.com/timandy/routine v1.1.5/go.mod h1:kXslgIosdY8LW0byTyPnenDgn4/azt2euufAq9rK51w=
github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY= github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=
github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
github.com/wadey/synctrace v0.0.0-20250612192159-94547ef50dfe h1:dc8Q42VsX+ABr0drJw27f3smvGfcz7eB8rJx+IkVMAo=
github.com/wadey/synctrace v0.0.0-20250612192159-94547ef50dfe/go.mod h1:F2VCml4UxGPgAAqqm9T0ZfnVRWITrQS1EMZM+KCAm/Q=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=

View File

@@ -9,6 +9,7 @@ import (
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/slackhq/nebula/cert" "github.com/slackhq/nebula/cert"
"github.com/slackhq/nebula/header" "github.com/slackhq/nebula/header"
"github.com/wadey/synctrace"
) )
// NOISE IX Handshakes // NOISE IX Handshakes
@@ -249,7 +250,8 @@ func ixHandshakeStage1(f *Interface, addr netip.AddrPort, via *ViaSender, packet
HandshakePacket: make(map[uint8][]byte, 0), HandshakePacket: make(map[uint8][]byte, 0),
lastHandshakeTime: hs.Details.Time, lastHandshakeTime: hs.Details.Time,
relayState: RelayState{ relayState: RelayState{
relays: nil, RWMutex: synctrace.NewRWMutex("relay-state"),
relays: map[netip.Addr]struct{}{},
relayForByAddr: map[netip.Addr]*Relay{}, relayForByAddr: map[netip.Addr]*Relay{},
relayForByIdx: map[uint32]*Relay{}, relayForByIdx: map[uint32]*Relay{},
}, },
@@ -457,7 +459,7 @@ func ixHandshakeStage1(f *Interface, addr netip.AddrPort, via *ViaSender, packet
Info("Handshake message sent") Info("Handshake message sent")
} }
f.connectionManager.AddTrafficWatch(hostinfo) f.connectionManager.AddTrafficWatch(hostinfo.localIndexId)
hostinfo.remotes.ResetBlockedRemotes() hostinfo.remotes.ResetBlockedRemotes()
@@ -652,7 +654,7 @@ func ixHandshakeStage2(f *Interface, addr netip.AddrPort, via *ViaSender, hh *Ha
// Complete our handshake and update metrics, this will replace any existing tunnels for the vpnAddrs here // Complete our handshake and update metrics, this will replace any existing tunnels for the vpnAddrs here
f.handshakeManager.Complete(hostinfo, f) f.handshakeManager.Complete(hostinfo, f)
f.connectionManager.AddTrafficWatch(hostinfo) f.connectionManager.AddTrafficWatch(hostinfo.localIndexId)
if f.l.Level >= logrus.DebugLevel { if f.l.Level >= logrus.DebugLevel {
hostinfo.logger(f.l).Debugf("Sending %d stored packets", len(hh.packetStore)) hostinfo.logger(f.l).Debugf("Sending %d stored packets", len(hh.packetStore))

View File

@@ -8,7 +8,6 @@ import (
"errors" "errors"
"net/netip" "net/netip"
"slices" "slices"
"sync"
"time" "time"
"github.com/rcrowley/go-metrics" "github.com/rcrowley/go-metrics"
@@ -16,6 +15,7 @@ import (
"github.com/slackhq/nebula/cert" "github.com/slackhq/nebula/cert"
"github.com/slackhq/nebula/header" "github.com/slackhq/nebula/header"
"github.com/slackhq/nebula/udp" "github.com/slackhq/nebula/udp"
"github.com/wadey/synctrace"
) )
const ( const (
@@ -45,7 +45,7 @@ type HandshakeConfig struct {
type HandshakeManager struct { type HandshakeManager struct {
// Mutex for interacting with the vpnIps and indexes maps // Mutex for interacting with the vpnIps and indexes maps
sync.RWMutex synctrace.RWMutex
vpnIps map[netip.Addr]*HandshakeHostInfo vpnIps map[netip.Addr]*HandshakeHostInfo
indexes map[uint32]*HandshakeHostInfo indexes map[uint32]*HandshakeHostInfo
@@ -66,7 +66,7 @@ type HandshakeManager struct {
} }
type HandshakeHostInfo struct { type HandshakeHostInfo struct {
sync.Mutex synctrace.Mutex
startTime time.Time // Time that we first started trying with this handshake startTime time.Time // Time that we first started trying with this handshake
ready bool // Is the handshake ready ready bool // Is the handshake ready
@@ -104,6 +104,7 @@ func (hh *HandshakeHostInfo) cachePacket(l *logrus.Logger, t header.MessageType,
func NewHandshakeManager(l *logrus.Logger, mainHostMap *HostMap, lightHouse *LightHouse, outside udp.Conn, config HandshakeConfig) *HandshakeManager { func NewHandshakeManager(l *logrus.Logger, mainHostMap *HostMap, lightHouse *LightHouse, outside udp.Conn, config HandshakeConfig) *HandshakeManager {
return &HandshakeManager{ return &HandshakeManager{
RWMutex: synctrace.NewRWMutex("handshake-manager"),
vpnIps: map[netip.Addr]*HandshakeHostInfo{}, vpnIps: map[netip.Addr]*HandshakeHostInfo{},
indexes: map[uint32]*HandshakeHostInfo{}, indexes: map[uint32]*HandshakeHostInfo{},
mainHostMap: mainHostMap, mainHostMap: mainHostMap,
@@ -111,7 +112,7 @@ func NewHandshakeManager(l *logrus.Logger, mainHostMap *HostMap, lightHouse *Lig
outside: outside, outside: outside,
config: config, config: config,
trigger: make(chan netip.Addr, config.triggerBuffer), trigger: make(chan netip.Addr, config.triggerBuffer),
OutboundHandshakeTimer: NewLockingTimerWheel[netip.Addr](config.tryInterval, hsTimeout(config.retries, config.tryInterval)), OutboundHandshakeTimer: NewLockingTimerWheel[netip.Addr]("outbound-handshake-timer", config.tryInterval, hsTimeout(config.retries, config.tryInterval)),
messageMetrics: config.messageMetrics, messageMetrics: config.messageMetrics,
metricInitiated: metrics.GetOrRegisterCounter("handshake_manager.initiated", nil), metricInitiated: metrics.GetOrRegisterCounter("handshake_manager.initiated", nil),
metricTimedOut: metrics.GetOrRegisterCounter("handshake_manager.timed_out", nil), metricTimedOut: metrics.GetOrRegisterCounter("handshake_manager.timed_out", nil),
@@ -450,13 +451,15 @@ func (hm *HandshakeManager) StartHandshake(vpnAddr netip.Addr, cacheCb func(*Han
vpnAddrs: []netip.Addr{vpnAddr}, vpnAddrs: []netip.Addr{vpnAddr},
HandshakePacket: make(map[uint8][]byte, 0), HandshakePacket: make(map[uint8][]byte, 0),
relayState: RelayState{ relayState: RelayState{
relays: nil, RWMutex: synctrace.NewRWMutex("relay-state"),
relays: map[netip.Addr]struct{}{},
relayForByAddr: map[netip.Addr]*Relay{}, relayForByAddr: map[netip.Addr]*Relay{},
relayForByIdx: map[uint32]*Relay{}, relayForByIdx: map[uint32]*Relay{},
}, },
} }
hh := &HandshakeHostInfo{ hh := &HandshakeHostInfo{
Mutex: synctrace.NewMutex("handshake-hostinfo"),
hostinfo: hostinfo, hostinfo: hostinfo,
startTime: time.Now(), startTime: time.Now(),
} }

View File

@@ -4,8 +4,6 @@ import (
"errors" "errors"
"net" "net"
"net/netip" "net/netip"
"slices"
"sync"
"sync/atomic" "sync/atomic"
"time" "time"
@@ -15,6 +13,7 @@ import (
"github.com/slackhq/nebula/cert" "github.com/slackhq/nebula/cert"
"github.com/slackhq/nebula/config" "github.com/slackhq/nebula/config"
"github.com/slackhq/nebula/header" "github.com/slackhq/nebula/header"
"github.com/wadey/synctrace"
) )
// const ProbeLen = 100 // const ProbeLen = 100
@@ -54,22 +53,22 @@ type Relay struct {
} }
type HostMap struct { type HostMap struct {
sync.RWMutex //Because we concurrently read and write to our maps synctrace.RWMutex //Because we concurrently read and write to our maps
Indexes map[uint32]*HostInfo Indexes map[uint32]*HostInfo
Relays map[uint32]*HostInfo // Maps a Relay IDX to a Relay HostInfo object Relays map[uint32]*HostInfo // Maps a Relay IDX to a Relay HostInfo object
RemoteIndexes map[uint32]*HostInfo RemoteIndexes map[uint32]*HostInfo
Hosts map[netip.Addr]*HostInfo Hosts map[netip.Addr]*HostInfo
preferredRanges atomic.Pointer[[]netip.Prefix] preferredRanges atomic.Pointer[[]netip.Prefix]
l *logrus.Logger l *logrus.Logger
} }
// For synchronization, treat the pointed-to Relay struct as immutable. To edit the Relay // For synchronization, treat the pointed-to Relay struct as immutable. To edit the Relay
// struct, make a copy of an existing value, edit the fileds in the copy, and // struct, make a copy of an existing value, edit the fileds in the copy, and
// then store a pointer to the new copy in both realyForBy* maps. // then store a pointer to the new copy in both realyForBy* maps.
type RelayState struct { type RelayState struct {
sync.RWMutex synctrace.RWMutex
relays []netip.Addr // Ordered set of VpnAddrs of Hosts to use as relays to access this peer relays map[netip.Addr]struct{} // Set of vpnAddr's of Hosts to use as relays to access this peer
// For data race avoidance, the contents of a *Relay are treated immutably. To update a *Relay, copy the existing data, // For data race avoidance, the contents of a *Relay are treated immutably. To update a *Relay, copy the existing data,
// modify what needs to be updated, and store the new modified copy in the relayForByIp and relayForByIdx maps (with // modify what needs to be updated, and store the new modified copy in the relayForByIp and relayForByIdx maps (with
// the RelayState Lock held) // the RelayState Lock held)
@@ -80,12 +79,7 @@ type RelayState struct {
func (rs *RelayState) DeleteRelay(ip netip.Addr) { func (rs *RelayState) DeleteRelay(ip netip.Addr) {
rs.Lock() rs.Lock()
defer rs.Unlock() defer rs.Unlock()
for idx, val := range rs.relays { delete(rs.relays, ip)
if val == ip {
rs.relays = append(rs.relays[:idx], rs.relays[idx+1:]...)
return
}
}
} }
func (rs *RelayState) UpdateRelayForByIpState(vpnIp netip.Addr, state int) { func (rs *RelayState) UpdateRelayForByIpState(vpnIp netip.Addr, state int) {
@@ -130,16 +124,16 @@ func (rs *RelayState) GetRelayForByAddr(addr netip.Addr) (*Relay, bool) {
func (rs *RelayState) InsertRelayTo(ip netip.Addr) { func (rs *RelayState) InsertRelayTo(ip netip.Addr) {
rs.Lock() rs.Lock()
defer rs.Unlock() defer rs.Unlock()
if !slices.Contains(rs.relays, ip) { rs.relays[ip] = struct{}{}
rs.relays = append(rs.relays, ip)
}
} }
func (rs *RelayState) CopyRelayIps() []netip.Addr { func (rs *RelayState) CopyRelayIps() []netip.Addr {
ret := make([]netip.Addr, len(rs.relays))
rs.RLock() rs.RLock()
defer rs.RUnlock() defer rs.RUnlock()
copy(ret, rs.relays) ret := make([]netip.Addr, 0, len(rs.relays))
for ip := range rs.relays {
ret = append(ret, ip)
}
return ret return ret
} }
@@ -256,14 +250,6 @@ type HostInfo struct {
// Used to track other hostinfos for this vpn ip since only 1 can be primary // Used to track other hostinfos for this vpn ip since only 1 can be primary
// Synchronised via hostmap lock and not the hostinfo lock. // Synchronised via hostmap lock and not the hostinfo lock.
next, prev *HostInfo next, prev *HostInfo
//TODO: in, out, and others might benefit from being an atomic.Int32. We could collapse connectionManager pendingDeletion, relayUsed, and in/out into this 1 thing
in, out, pendingDeletion atomic.Bool
// lastUsed tracks the last time ConnectionManager checked the tunnel and it was in use.
// This value will be behind against actual tunnel utilization in the hot path.
// This should only be used by the ConnectionManagers ticker routine.
lastUsed time.Time
} }
type ViaSender struct { type ViaSender struct {
@@ -302,6 +288,7 @@ func NewHostMapFromConfig(l *logrus.Logger, c *config.C) *HostMap {
func newHostMap(l *logrus.Logger) *HostMap { func newHostMap(l *logrus.Logger) *HostMap {
return &HostMap{ return &HostMap{
RWMutex: synctrace.NewRWMutex("hostmap"),
Indexes: map[uint32]*HostInfo{}, Indexes: map[uint32]*HostInfo{},
Relays: map[uint32]*HostInfo{}, Relays: map[uint32]*HostInfo{},
RemoteIndexes: map[uint32]*HostInfo{}, RemoteIndexes: map[uint32]*HostInfo{},

View File

@@ -7,7 +7,6 @@ import (
"github.com/slackhq/nebula/config" "github.com/slackhq/nebula/config"
"github.com/slackhq/nebula/test" "github.com/slackhq/nebula/test"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
func TestHostMap_MakePrimary(t *testing.T) { func TestHostMap_MakePrimary(t *testing.T) {
@@ -216,31 +215,3 @@ func TestHostMap_reload(t *testing.T) {
c.ReloadConfigString("preferred_ranges: [1.1.1.1/32]") c.ReloadConfigString("preferred_ranges: [1.1.1.1/32]")
assert.Equal(t, []string{"1.1.1.1/32"}, toS(hm.GetPreferredRanges())) assert.Equal(t, []string{"1.1.1.1/32"}, toS(hm.GetPreferredRanges()))
} }
func TestHostMap_RelayState(t *testing.T) {
h1 := &HostInfo{vpnAddrs: []netip.Addr{netip.MustParseAddr("0.0.0.1")}, localIndexId: 1}
a1 := netip.MustParseAddr("::1")
a2 := netip.MustParseAddr("2001::1")
h1.relayState.InsertRelayTo(a1)
assert.Equal(t, []netip.Addr{a1}, h1.relayState.relays)
h1.relayState.InsertRelayTo(a2)
assert.Equal(t, []netip.Addr{a1, a2}, h1.relayState.relays)
// Ensure that the first relay added is the first one returned in the copy
currentRelays := h1.relayState.CopyRelayIps()
require.Len(t, currentRelays, 2)
assert.Equal(t, a1, currentRelays[0])
// Deleting the last one in the list works ok
h1.relayState.DeleteRelay(a2)
assert.Equal(t, []netip.Addr{a1}, h1.relayState.relays)
// Deleting an element not in the list works ok
h1.relayState.DeleteRelay(a2)
assert.Equal(t, []netip.Addr{a1}, h1.relayState.relays)
// Deleting the only element in the list works ok
h1.relayState.DeleteRelay(a1)
assert.Equal(t, []netip.Addr{}, h1.relayState.relays)
}

View File

@@ -288,7 +288,7 @@ func (f *Interface) SendVia(via *HostInfo,
c := via.ConnectionState.messageCounter.Add(1) c := via.ConnectionState.messageCounter.Add(1)
out = header.Encode(out, header.Version, header.Message, header.MessageRelay, relay.RemoteIndex, c) out = header.Encode(out, header.Version, header.Message, header.MessageRelay, relay.RemoteIndex, c)
f.connectionManager.Out(via) f.connectionManager.Out(via.localIndexId)
// Authenticate the header and payload, but do not encrypt for this message type. // Authenticate the header and payload, but do not encrypt for this message type.
// The payload consists of the inner, unencrypted Nebula header, as well as the end-to-end encrypted payload. // The payload consists of the inner, unencrypted Nebula header, as well as the end-to-end encrypted payload.
@@ -356,7 +356,7 @@ func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType
//l.WithField("trace", string(debug.Stack())).Error("out Header ", &Header{Version, t, st, 0, hostinfo.remoteIndexId, c}, p) //l.WithField("trace", string(debug.Stack())).Error("out Header ", &Header{Version, t, st, 0, hostinfo.remoteIndexId, c}, p)
out = header.Encode(out, header.Version, t, st, hostinfo.remoteIndexId, c) out = header.Encode(out, header.Version, t, st, hostinfo.remoteIndexId, c)
f.connectionManager.Out(hostinfo) f.connectionManager.Out(hostinfo.localIndexId)
// Query our LH if we haven't since the last time we've been rebound, this will cause the remote to punch against // Query our LH if we haven't since the last time we've been rebound, this will cause the remote to punch against
// all our addrs and enable a faster roaming. // all our addrs and enable a faster roaming.

View File

@@ -2,7 +2,6 @@ package nebula
import ( import (
"context" "context"
"crypto/fips140"
"errors" "errors"
"fmt" "fmt"
"io" "io"
@@ -25,23 +24,23 @@ import (
const mtu = 9001 const mtu = 9001
type InterfaceConfig struct { type InterfaceConfig struct {
HostMap *HostMap HostMap *HostMap
Outside udp.Conn Outside udp.Conn
Inside overlay.Device Inside overlay.Device
pki *PKI pki *PKI
Cipher string Firewall *Firewall
Firewall *Firewall ServeDns bool
ServeDns bool HandshakeManager *HandshakeManager
HandshakeManager *HandshakeManager lightHouse *LightHouse
lightHouse *LightHouse checkInterval time.Duration
connectionManager *connectionManager pendingDeletionInterval time.Duration
DropLocalBroadcast bool DropLocalBroadcast bool
DropMulticast bool DropMulticast bool
routines int routines int
MessageMetrics *MessageMetrics MessageMetrics *MessageMetrics
version string version string
relayManager *relayManager relayManager *relayManager
punchy *Punchy punchy *Punchy
tryPromoteEvery uint32 tryPromoteEvery uint32
reQueryEvery uint32 reQueryEvery uint32
@@ -158,9 +157,6 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
if c.Firewall == nil { if c.Firewall == nil {
return nil, errors.New("no firewall rules") return nil, errors.New("no firewall rules")
} }
if c.connectionManager == nil {
return nil, errors.New("no connection manager")
}
cs := c.pki.getCertState() cs := c.pki.getCertState()
ifce := &Interface{ ifce := &Interface{
@@ -185,7 +181,7 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
myVpnAddrsTable: cs.myVpnAddrsTable, myVpnAddrsTable: cs.myVpnAddrsTable,
myBroadcastAddrsTable: cs.myVpnBroadcastAddrsTable, myBroadcastAddrsTable: cs.myVpnBroadcastAddrsTable,
relayManager: c.relayManager, relayManager: c.relayManager,
connectionManager: c.connectionManager,
conntrackCacheTimeout: c.ConntrackCacheTimeout, conntrackCacheTimeout: c.ConntrackCacheTimeout,
metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)), metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
@@ -202,7 +198,7 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
ifce.reQueryEvery.Store(c.reQueryEvery) ifce.reQueryEvery.Store(c.reQueryEvery)
ifce.reQueryWait.Store(int64(c.reQueryWait)) ifce.reQueryWait.Store(int64(c.reQueryWait))
ifce.connectionManager.intf = ifce ifce.connectionManager = newConnectionManager(ctx, c.l, ifce, c.checkInterval, c.pendingDeletionInterval, c.punchy)
return ifce, nil return ifce, nil
} }
@@ -221,7 +217,6 @@ func (f *Interface) activate() {
f.l.WithField("interface", f.inside.Name()).WithField("networks", f.myVpnNetworks). f.l.WithField("interface", f.inside.Name()).WithField("networks", f.myVpnNetworks).
WithField("build", f.version).WithField("udpAddr", addr). WithField("build", f.version).WithField("udpAddr", addr).
WithField("boringcrypto", boringEnabled()). WithField("boringcrypto", boringEnabled()).
WithField("fips140", fips140.Enabled()).
Info("Nebula interface is active") Info("Nebula interface is active")
metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines)) metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines))

View File

@@ -9,7 +9,6 @@ import (
"net/netip" "net/netip"
"slices" "slices"
"strconv" "strconv"
"sync"
"sync/atomic" "sync/atomic"
"time" "time"
@@ -21,15 +20,16 @@ import (
"github.com/slackhq/nebula/header" "github.com/slackhq/nebula/header"
"github.com/slackhq/nebula/udp" "github.com/slackhq/nebula/udp"
"github.com/slackhq/nebula/util" "github.com/slackhq/nebula/util"
"github.com/wadey/synctrace"
) )
var ErrHostNotKnown = errors.New("host not known") var ErrHostNotKnown = errors.New("host not known")
type LightHouse struct { type LightHouse struct {
//TODO: We need a timer wheel to kick out vpnAddrs that haven't reported in a long time //TODO: We need a timer wheel to kick out vpnAddrs that haven't reported in a long time
sync.RWMutex //Because we concurrently read and write to our maps synctrace.RWMutex //Because we concurrently read and write to our maps
ctx context.Context ctx context.Context
amLighthouse bool amLighthouse bool
myVpnNetworks []netip.Prefix myVpnNetworks []netip.Prefix
myVpnNetworksTable *bart.Lite myVpnNetworksTable *bart.Lite
@@ -96,6 +96,7 @@ func NewLightHouseFromConfig(ctx context.Context, l *logrus.Logger, c *config.C,
} }
h := LightHouse{ h := LightHouse{
RWMutex: synctrace.NewRWMutex("lighthouse"),
ctx: ctx, ctx: ctx,
amLighthouse: amLighthouse, amLighthouse: amLighthouse,
myVpnNetworks: cs.myVpnNetworks, myVpnNetworks: cs.myVpnNetworks,
@@ -472,6 +473,7 @@ func (lh *LightHouse) QueryServer(vpnAddr netip.Addr) {
return return
} }
synctrace.ChanDebugSend("lighthouse-querychan")
lh.queryChan <- vpnAddr lh.queryChan <- vpnAddr
} }
@@ -725,9 +727,11 @@ func (lh *LightHouse) startQueryWorker() {
nb := make([]byte, 12, 12) nb := make([]byte, 12, 12)
out := make([]byte, mtu) out := make([]byte, mtu)
synctrace.ChanDebugRecvLock("lighthouse-querychan")
for { for {
select { select {
case <-lh.ctx.Done(): case <-lh.ctx.Done():
synctrace.ChanDebugRecvUnlock("lighthouse-querychan")
return return
case addr := <-lh.queryChan: case addr := <-lh.queryChan:
lh.innerQueryServer(addr, nb, out) lh.innerQueryServer(addr, nb, out)

45
main.go
View File

@@ -185,7 +185,6 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
hostMap := NewHostMapFromConfig(l, c) hostMap := NewHostMapFromConfig(l, c)
punchy := NewPunchyFromConfig(l, c) punchy := NewPunchyFromConfig(l, c)
connManager := newConnectionManagerFromConfig(l, c, hostMap, punchy)
lightHouse, err := NewLightHouseFromConfig(ctx, l, c, pki.getCertState(), udpConns[0], punchy) lightHouse, err := NewLightHouseFromConfig(ctx, l, c, pki.getCertState(), udpConns[0], punchy)
if err != nil { if err != nil {
return nil, util.ContextualizeIfNeeded("Failed to initialize lighthouse handler", err) return nil, util.ContextualizeIfNeeded("Failed to initialize lighthouse handler", err)
@@ -221,26 +220,31 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
} }
} }
checkInterval := c.GetInt("timers.connection_alive_interval", 5)
pendingDeletionInterval := c.GetInt("timers.pending_deletion_interval", 10)
ifConfig := &InterfaceConfig{ ifConfig := &InterfaceConfig{
HostMap: hostMap, HostMap: hostMap,
Inside: tun, Inside: tun,
Outside: udpConns[0], Outside: udpConns[0],
pki: pki, pki: pki,
Firewall: fw, Firewall: fw,
ServeDns: serveDns, ServeDns: serveDns,
HandshakeManager: handshakeManager, HandshakeManager: handshakeManager,
connectionManager: connManager, lightHouse: lightHouse,
lightHouse: lightHouse, checkInterval: time.Second * time.Duration(checkInterval),
tryPromoteEvery: c.GetUint32("counters.try_promote", defaultPromoteEvery), pendingDeletionInterval: time.Second * time.Duration(pendingDeletionInterval),
reQueryEvery: c.GetUint32("counters.requery_every_packets", defaultReQueryEvery), tryPromoteEvery: c.GetUint32("counters.try_promote", defaultPromoteEvery),
reQueryWait: c.GetDuration("timers.requery_wait_duration", defaultReQueryWait), reQueryEvery: c.GetUint32("counters.requery_every_packets", defaultReQueryEvery),
DropLocalBroadcast: c.GetBool("tun.drop_local_broadcast", false), reQueryWait: c.GetDuration("timers.requery_wait_duration", defaultReQueryWait),
DropMulticast: c.GetBool("tun.drop_multicast", false), DropLocalBroadcast: c.GetBool("tun.drop_local_broadcast", false),
routines: routines, DropMulticast: c.GetBool("tun.drop_multicast", false),
MessageMetrics: messageMetrics, routines: routines,
version: buildVersion, MessageMetrics: messageMetrics,
relayManager: NewRelayManager(ctx, l, hostMap, c), version: buildVersion,
punchy: punchy, relayManager: NewRelayManager(ctx, l, hostMap, c),
punchy: punchy,
ConntrackCacheTimeout: conntrackCacheTimeout, ConntrackCacheTimeout: conntrackCacheTimeout,
l: l, l: l,
} }
@@ -292,6 +296,5 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
statsStart, statsStart,
dnsStart, dnsStart,
lightHouse.StartUpdateWorker, lightHouse.StartUpdateWorker,
connManager.Start,
}, nil }, nil
} }

View File

@@ -25,11 +25,6 @@ func NewNebulaCipherState(s *noise.CipherState) *NebulaCipherState {
} }
type cipherAEADDanger interface {
EncryptDanger(out, ad, plaintext []byte, n uint64, nb []byte) ([]byte, error)
DecryptDanger(out, ad, plaintext []byte, n uint64, nb []byte) ([]byte, error)
}
// EncryptDanger encrypts and authenticates a given payload. // EncryptDanger encrypts and authenticates a given payload.
// //
// out is a destination slice to hold the output of the EncryptDanger operation. // out is a destination slice to hold the output of the EncryptDanger operation.
@@ -40,25 +35,20 @@ type cipherAEADDanger interface {
// be re-used by callers to minimize garbage collection. // be re-used by callers to minimize garbage collection.
func (s *NebulaCipherState) EncryptDanger(out, ad, plaintext []byte, n uint64, nb []byte) ([]byte, error) { func (s *NebulaCipherState) EncryptDanger(out, ad, plaintext []byte, n uint64, nb []byte) ([]byte, error) {
if s != nil { if s != nil {
switch ce := s.c.(type) { // TODO: Is this okay now that we have made messageCounter atomic?
case cipherAEADDanger: // Alternative may be to split the counter space into ranges
return ce.EncryptDanger(out, ad, plaintext, n, nb) //if n <= s.n {
default: // return nil, errors.New("CRITICAL: a duplicate counter value was used")
// TODO: Is this okay now that we have made messageCounter atomic? //}
// Alternative may be to split the counter space into ranges //s.n = n
//if n <= s.n { nb[0] = 0
// return nil, errors.New("CRITICAL: a duplicate counter value was used") nb[1] = 0
//} nb[2] = 0
//s.n = n nb[3] = 0
nb[0] = 0 noiseEndianness.PutUint64(nb[4:], n)
nb[1] = 0 out = s.c.(cipher.AEAD).Seal(out, nb, plaintext, ad)
nb[2] = 0 //l.Debugf("Encryption: outlen: %d, nonce: %d, ad: %s, plainlen %d", len(out), n, ad, len(plaintext))
nb[3] = 0 return out, nil
noiseEndianness.PutUint64(nb[4:], n)
out = s.c.(cipher.AEAD).Seal(out, nb, plaintext, ad)
//l.Debugf("Encryption: outlen: %d, nonce: %d, ad: %s, plainlen %d", len(out), n, ad, len(plaintext))
return out, nil
}
} else { } else {
return nil, errors.New("no cipher state available to encrypt") return nil, errors.New("no cipher state available to encrypt")
} }
@@ -66,17 +56,12 @@ func (s *NebulaCipherState) EncryptDanger(out, ad, plaintext []byte, n uint64, n
func (s *NebulaCipherState) DecryptDanger(out, ad, ciphertext []byte, n uint64, nb []byte) ([]byte, error) { func (s *NebulaCipherState) DecryptDanger(out, ad, ciphertext []byte, n uint64, nb []byte) ([]byte, error) {
if s != nil { if s != nil {
switch ce := s.c.(type) { nb[0] = 0
case cipherAEADDanger: nb[1] = 0
return ce.DecryptDanger(out, ad, ciphertext, n, nb) nb[2] = 0
default: nb[3] = 0
nb[0] = 0 noiseEndianness.PutUint64(nb[4:], n)
nb[1] = 0 return s.c.(cipher.AEAD).Open(out, nb, ciphertext, ad)
nb[2] = 0
nb[3] = 0
noiseEndianness.PutUint64(nb[4:], n)
return s.c.(cipher.AEAD).Open(out, nb, ciphertext, ad)
}
} else { } else {
return []byte{}, nil return []byte{}, nil
} }

View File

@@ -1,78 +0,0 @@
//go:build fips140v1.0
// +build fips140v1.0
package noiseutil
import (
"crypto/cipher"
"encoding/binary"
// unsafe needed for go:linkname
_ "unsafe"
"github.com/flynn/noise"
)
// EncryptLockNeeded indicates if calls to Encrypt need a lock
// This is true for fips140 because the Seal function verifies that the
// nonce is strictly increasing.
const EncryptLockNeeded = true
// TODO: Use NewGCMWithCounterNonce once available:
// - https://github.com/golang/go/issues/73110
// Using tls.aeadAESGCM gives us the TLS 1.2 GCM, which also verifies
// that the nonce is strictly increasing.
//
//go:linkname aeadAESGCM crypto/tls.aeadAESGCM
func aeadAESGCM(key, noncePrefix []byte) cipher.AEAD
type cipherFn struct {
fn func([32]byte) noise.Cipher
name string
}
func (c cipherFn) Cipher(k [32]byte) noise.Cipher { return c.fn(k) }
func (c cipherFn) CipherName() string { return c.name }
// CipherAESGCM is the AES256-GCM AEAD cipher (using aeadAESGCM when fips140 is enabled)
var CipherAESGCM noise.CipherFunc = cipherFn{cipherAESGCM, "AESGCM"}
// tls.aeadAESGCM uses a 4 byte static prefix and an 8 byte nonce
var emptyPrefix = []byte{0, 0, 0, 0}
func cipherAESGCM(k [32]byte) noise.Cipher {
gcm := aeadAESGCM(k[:], emptyPrefix)
return aeadCipher{
gcm,
func(n uint64) []byte {
// tls.aeadAESGCM uses a 4 byte static prefix and an 8 byte nonce
var nonce [8]byte
binary.BigEndian.PutUint64(nonce[:], n)
return nonce[:]
},
}
}
type aeadCipher struct {
cipher.AEAD
nonce func(uint64) []byte
}
func (c aeadCipher) Encrypt(out []byte, n uint64, ad, plaintext []byte) []byte {
return c.Seal(out, c.nonce(n), plaintext, ad)
}
func (c aeadCipher) Decrypt(out []byte, n uint64, ad, ciphertext []byte) ([]byte, error) {
return c.Open(out, c.nonce(n), ciphertext, ad)
}
func (c aeadCipher) EncryptDanger(out, ad, plaintext []byte, n uint64, nb []byte) ([]byte, error) {
binary.BigEndian.PutUint64(nb[4:], n)
out = c.Seal(out, nb[4:], plaintext, ad)
return out, nil
}
func (c aeadCipher) DecryptDanger(out, ad, ciphertext []byte, n uint64, nb []byte) ([]byte, error) {
binary.BigEndian.PutUint64(nb[4:], n)
return c.Open(out, nb[4:], ciphertext, ad)
}

View File

@@ -1,42 +0,0 @@
//go:build fips140v1.0
// +build fips140v1.0
package noiseutil
import (
"crypto/fips140"
"encoding/hex"
"log"
"testing"
"github.com/stretchr/testify/assert"
)
func TestEncryptLockNeeded(t *testing.T) {
assert.True(t, EncryptLockNeeded)
}
// Ensure NewAESGCM validates the nonce is non-repeating
func TestNewAESGCM(t *testing.T) {
assert.True(t, fips140.Enabled())
key, _ := hex.DecodeString("feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308")
iv, _ := hex.DecodeString("00000000facedbaddecaf888")
plaintext, _ := hex.DecodeString("d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39")
aad, _ := hex.DecodeString("feedfacedeadbeeffeedfacedeadbeefabaddad2")
expected, _ := hex.DecodeString("72ce2ea385f88c20d856e9d1248c2ca08562bbe8a61459ffae06ec393540518e9b6b4c40a146053f26a3df83c5384a48d273148b15aba64d970107432b2892741359275676441c1572c3fa9e")
var keyArray [32]byte
copy(keyArray[:], key)
c := CipherAESGCM.Cipher(keyArray)
aead := c.(aeadCipher).AEAD
dst := aead.Seal([]byte{}, iv, plaintext, aad)
log.Printf("%x", dst)
assert.Equal(t, expected, dst)
// We expect this to fail since we are re-encrypting with a repeat IV
assert.PanicsWithValue(t, "crypto/cipher: counter decreased", func() {
dst = aead.Seal([]byte{}, iv, plaintext, aad)
})
}

View File

@@ -1,5 +1,5 @@
//go:build !boringcrypto && !fips140v1.0 //go:build !boringcrypto
// +build !boringcrypto,!fips140v1.0 // +build !boringcrypto
package noiseutil package noiseutil

View File

@@ -1,5 +1,5 @@
//go:build !boringcrypto && !fips140v1.0 //go:build !boringcrypto
// +build !boringcrypto,!fips140v1.0 // +build !boringcrypto
package noiseutil package noiseutil

View File

@@ -81,7 +81,7 @@ func (f *Interface) readOutsidePackets(ip netip.AddrPort, via *ViaSender, out []
// Pull the Roaming parts up here, and return in all call paths. // Pull the Roaming parts up here, and return in all call paths.
f.handleHostRoaming(hostinfo, ip) f.handleHostRoaming(hostinfo, ip)
// Track usage of both the HostInfo and the Relay for the received & authenticated packet // Track usage of both the HostInfo and the Relay for the received & authenticated packet
f.connectionManager.In(hostinfo) f.connectionManager.In(hostinfo.localIndexId)
f.connectionManager.RelayUsed(h.RemoteIndex) f.connectionManager.RelayUsed(h.RemoteIndex)
relay, ok := hostinfo.relayState.QueryRelayForByIdx(h.RemoteIndex) relay, ok := hostinfo.relayState.QueryRelayForByIdx(h.RemoteIndex)
@@ -213,7 +213,7 @@ func (f *Interface) readOutsidePackets(ip netip.AddrPort, via *ViaSender, out []
f.handleHostRoaming(hostinfo, ip) f.handleHostRoaming(hostinfo, ip)
f.connectionManager.In(hostinfo) f.connectionManager.In(hostinfo.localIndexId)
} }
// closeTunnel closes a tunnel locally, it does not send a closeTunnel packet to the remote // closeTunnel closes a tunnel locally, it does not send a closeTunnel packet to the remote
@@ -498,7 +498,7 @@ func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out
return false return false
} }
f.connectionManager.In(hostinfo) f.connectionManager.In(hostinfo.localIndexId)
_, err = f.readers[q].Write(out) _, err = f.readers[q].Write(out)
if err != nil { if err != nil {
f.l.WithError(err).Error("Failed to write to tun") f.l.WithError(err).Error("Failed to write to tun")

View File

@@ -7,11 +7,11 @@ import (
"slices" "slices"
"sort" "sort"
"strconv" "strconv"
"sync"
"sync/atomic" "sync/atomic"
"time" "time"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/wadey/synctrace"
) )
// forEachFunc is used to benefit folks that want to do work inside the lock // forEachFunc is used to benefit folks that want to do work inside the lock
@@ -185,7 +185,7 @@ func (hr *hostnamesResults) GetAddrs() []netip.AddrPort {
// It serves as a local cache of query replies, host update notifications, and locally learned addresses // It serves as a local cache of query replies, host update notifications, and locally learned addresses
type RemoteList struct { type RemoteList struct {
// Every interaction with internals requires a lock! // Every interaction with internals requires a lock!
sync.RWMutex synctrace.RWMutex
// The full list of vpn addresses assigned to this host // The full list of vpn addresses assigned to this host
vpnAddrs []netip.Addr vpnAddrs []netip.Addr
@@ -215,6 +215,7 @@ type RemoteList struct {
// NewRemoteList creates a new empty RemoteList // NewRemoteList creates a new empty RemoteList
func NewRemoteList(vpnAddrs []netip.Addr, shouldAdd func(netip.Addr) bool) *RemoteList { func NewRemoteList(vpnAddrs []netip.Addr, shouldAdd func(netip.Addr) bool) *RemoteList {
r := &RemoteList{ r := &RemoteList{
RWMutex: synctrace.NewRWMutex("remote-list"),
vpnAddrs: make([]netip.Addr, len(vpnAddrs)), vpnAddrs: make([]netip.Addr, len(vpnAddrs)),
addrs: make([]netip.AddrPort, 0), addrs: make([]netip.AddrPort, 0),
relays: make([]netip.Addr, 0), relays: make([]netip.Addr, 0),

View File

@@ -5,10 +5,10 @@ import (
"errors" "errors"
"fmt" "fmt"
"net" "net"
"sync"
"github.com/armon/go-radix" "github.com/armon/go-radix"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/wadey/synctrace"
"golang.org/x/crypto/ssh" "golang.org/x/crypto/ssh"
) )
@@ -28,7 +28,7 @@ type SSHServer struct {
listener net.Listener listener net.Listener
// Locks the conns/counter to avoid concurrent map access // Locks the conns/counter to avoid concurrent map access
connsLock sync.Mutex connsLock synctrace.Mutex
conns map[int]*session conns map[int]*session
counter int counter int
} }
@@ -41,6 +41,7 @@ func NewSSHServer(l *logrus.Entry) (*SSHServer, error) {
l: l, l: l,
commands: radix.New(), commands: radix.New(),
conns: make(map[int]*session), conns: make(map[int]*session),
connsLock: synctrace.NewMutex("ssh-server-conns"),
} }
cc := ssh.CertChecker{ cc := ssh.CertChecker{

View File

@@ -1,8 +1,9 @@
package nebula package nebula
import ( import (
"sync"
"time" "time"
"github.com/wadey/synctrace"
) )
// How many timer objects should be cached // How many timer objects should be cached
@@ -34,7 +35,7 @@ type TimerWheel[T any] struct {
} }
type LockingTimerWheel[T any] struct { type LockingTimerWheel[T any] struct {
m sync.Mutex m synctrace.Mutex
t *TimerWheel[T] t *TimerWheel[T]
} }
@@ -81,8 +82,9 @@ func NewTimerWheel[T any](min, max time.Duration) *TimerWheel[T] {
} }
// NewLockingTimerWheel is version of TimerWheel that is safe for concurrent use with a small performance penalty // NewLockingTimerWheel is version of TimerWheel that is safe for concurrent use with a small performance penalty
func NewLockingTimerWheel[T any](min, max time.Duration) *LockingTimerWheel[T] { func NewLockingTimerWheel[T any](name string, min, max time.Duration) *LockingTimerWheel[T] {
return &LockingTimerWheel[T]{ return &LockingTimerWheel[T]{
m: synctrace.NewMutex(name),
t: NewTimerWheel[T](min, max), t: NewTimerWheel[T](min, max),
} }
} }

View File

@@ -1,5 +0,0 @@
package udp
import "errors"
var ErrInvalidIPv6RemoteForSocket = errors.New("listener is IPv4, but writing to IPv6 remote")

View File

@@ -3,62 +3,20 @@
package udp package udp
// Darwin support is primarily implemented in udp_generic, besides NewListenConfig
import ( import (
"context"
"encoding/binary"
"errors"
"fmt" "fmt"
"net" "net"
"net/netip" "net/netip"
"syscall" "syscall"
"unsafe"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/slackhq/nebula/config"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
type StdConn struct {
*net.UDPConn
isV4 bool
sysFd uintptr
l *logrus.Logger
}
var _ Conn = &StdConn{}
func NewListener(l *logrus.Logger, ip netip.Addr, port int, multi bool, batch int) (Conn, error) { func NewListener(l *logrus.Logger, ip netip.Addr, port int, multi bool, batch int) (Conn, error) {
lc := NewListenConfig(multi) return NewGenericListener(l, ip, port, multi, batch)
pc, err := lc.ListenPacket(context.TODO(), "udp", net.JoinHostPort(ip.String(), fmt.Sprintf("%v", port)))
if err != nil {
return nil, err
}
if uc, ok := pc.(*net.UDPConn); ok {
c := &StdConn{UDPConn: uc, l: l}
rc, err := uc.SyscallConn()
if err != nil {
return nil, fmt.Errorf("failed to open udp socket: %w", err)
}
err = rc.Control(func(fd uintptr) {
c.sysFd = fd
})
if err != nil {
return nil, fmt.Errorf("failed to get udp fd: %w", err)
}
la, err := c.LocalAddr()
if err != nil {
return nil, err
}
c.isV4 = la.Addr().Is4()
return c, nil
}
return nil, fmt.Errorf("unexpected PacketConn: %T %#v", pc, pc)
} }
func NewListenConfig(multi bool) net.ListenConfig { func NewListenConfig(multi bool) net.ListenConfig {
@@ -85,116 +43,16 @@ func NewListenConfig(multi bool) net.ListenConfig {
} }
} }
//go:linkname sendto golang.org/x/sys/unix.sendto func (u *GenericConn) Rebind() error {
//go:noescape rc, err := u.UDPConn.SyscallConn()
func sendto(s int, buf []byte, flags int, to unsafe.Pointer, addrlen int32) (err error)
func (u *StdConn) WriteTo(b []byte, ap netip.AddrPort) error {
var sa unsafe.Pointer
var addrLen int32
if u.isV4 {
if ap.Addr().Is6() {
return ErrInvalidIPv6RemoteForSocket
}
var rsa unix.RawSockaddrInet6
rsa.Family = unix.AF_INET6
rsa.Addr = ap.Addr().As16()
binary.BigEndian.PutUint16((*[2]byte)(unsafe.Pointer(&rsa.Port))[:], ap.Port())
sa = unsafe.Pointer(&rsa)
addrLen = syscall.SizeofSockaddrInet4
} else {
var rsa unix.RawSockaddrInet6
rsa.Family = unix.AF_INET6
rsa.Addr = ap.Addr().As16()
binary.BigEndian.PutUint16((*[2]byte)(unsafe.Pointer(&rsa.Port))[:], ap.Port())
sa = unsafe.Pointer(&rsa)
addrLen = syscall.SizeofSockaddrInet6
}
// Golang stdlib doesn't handle EAGAIN correctly in some situations so we do writes ourselves
// See https://github.com/golang/go/issues/73919
for {
//_, _, err := unix.Syscall6(unix.SYS_SENDTO, u.sysFd, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), 0, sa, addrLen)
err := sendto(int(u.sysFd), b, 0, sa, addrLen)
if err == nil {
// Written, get out before the error handling
return nil
}
if errors.Is(err, syscall.EINTR) {
// Write was interrupted, retry
continue
}
if errors.Is(err, syscall.EAGAIN) {
return &net.OpError{Op: "sendto", Err: unix.EWOULDBLOCK}
}
if errors.Is(err, syscall.EBADF) {
return net.ErrClosed
}
return &net.OpError{Op: "sendto", Err: err}
}
}
func (u *StdConn) LocalAddr() (netip.AddrPort, error) {
a := u.UDPConn.LocalAddr()
switch v := a.(type) {
case *net.UDPAddr:
addr, ok := netip.AddrFromSlice(v.IP)
if !ok {
return netip.AddrPort{}, fmt.Errorf("LocalAddr returned invalid IP address: %s", v.IP)
}
return netip.AddrPortFrom(addr, uint16(v.Port)), nil
default:
return netip.AddrPort{}, fmt.Errorf("LocalAddr returned: %#v", a)
}
}
func (u *StdConn) ReloadConfig(c *config.C) {
// TODO
}
func NewUDPStatsEmitter(udpConns []Conn) func() {
// No UDP stats for non-linux
return func() {}
}
func (u *StdConn) ListenOut(r EncReader) {
buffer := make([]byte, MTU)
for {
// Just read one packet at a time
n, rua, err := u.ReadFromUDPAddrPort(buffer)
if err != nil {
if errors.Is(err, net.ErrClosed) {
u.l.WithError(err).Debug("udp socket is closed, exiting read loop")
return
}
u.l.WithError(err).Error("unexpected udp socket receive error")
}
r(netip.AddrPortFrom(rua.Addr().Unmap(), rua.Port()), buffer[:n])
}
}
func (u *StdConn) Rebind() error {
var err error
if u.isV4 {
err = syscall.SetsockoptInt(int(u.sysFd), syscall.IPPROTO_IP, syscall.IP_BOUND_IF, 0)
} else {
err = syscall.SetsockoptInt(int(u.sysFd), syscall.IPPROTO_IPV6, syscall.IPV6_BOUND_IF, 0)
}
if err != nil { if err != nil {
u.l.WithError(err).Error("Failed to rebind udp socket") return err
} }
return nil return rc.Control(func(fd uintptr) {
err := syscall.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_BOUND_IF, 0)
if err != nil {
u.l.WithError(err).Error("Failed to rebind udp socket")
}
})
} }

View File

@@ -1,7 +1,6 @@
//go:build (!linux || android) && !e2e_testing && !darwin //go:build (!linux || android) && !e2e_testing
// +build !linux android // +build !linux android
// +build !e2e_testing // +build !e2e_testing
// +build !darwin
// udp_generic implements the nebula UDP interface in pure Go stdlib. This // udp_generic implements the nebula UDP interface in pure Go stdlib. This
// means it can be used on platforms like Darwin and Windows. // means it can be used on platforms like Darwin and Windows.

View File

@@ -221,7 +221,7 @@ func (u *StdConn) writeTo6(b []byte, ip netip.AddrPort) error {
func (u *StdConn) writeTo4(b []byte, ip netip.AddrPort) error { func (u *StdConn) writeTo4(b []byte, ip netip.AddrPort) error {
if !ip.Addr().Is4() { if !ip.Addr().Is4() {
return ErrInvalidIPv6RemoteForSocket return fmt.Errorf("Listener is IPv4, but writing to IPv6 remote")
} }
var rsa unix.RawSockaddrInet4 var rsa unix.RawSockaddrInet4

View File

@@ -11,13 +11,13 @@ import (
"io" "io"
"net" "net"
"net/netip" "net/netip"
"sync"
"sync/atomic" "sync/atomic"
"syscall" "syscall"
"unsafe" "unsafe"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/slackhq/nebula/config" "github.com/slackhq/nebula/config"
"github.com/wadey/synctrace"
"golang.org/x/sys/windows" "golang.org/x/sys/windows"
"golang.zx2c4.com/wireguard/conn/winrio" "golang.zx2c4.com/wireguard/conn/winrio"
) )
@@ -46,7 +46,7 @@ type ringBuffer struct {
iocp windows.Handle iocp windows.Handle
isFull bool isFull bool
cq winrio.Cq cq winrio.Cq
mu sync.Mutex mu synctrace.Mutex
overlapped windows.Overlapped overlapped windows.Overlapped
} }
@@ -64,7 +64,11 @@ func NewRIOListener(l *logrus.Logger, addr netip.Addr, port int) (*RIOConn, erro
return nil, errors.New("could not initialize winrio") return nil, errors.New("could not initialize winrio")
} }
u := &RIOConn{l: l} u := &RIOConn{
l: l,
rx: ringBuffer{mu: synctrace.NewMutex("rio-rx")},
tx: ringBuffer{mu: synctrace.NewMutex("rio-tx")},
}
err := u.bind(&windows.SockaddrInet6{Addr: addr.As16(), Port: port}) err := u.bind(&windows.SockaddrInet6{Addr: addr.As16(), Port: port})
if err != nil { if err != nil {
@@ -92,25 +96,6 @@ func (u *RIOConn) bind(sa windows.Sockaddr) error {
// Enable v4 for this socket // Enable v4 for this socket
syscall.SetsockoptInt(syscall.Handle(u.sock), syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 0) syscall.SetsockoptInt(syscall.Handle(u.sock), syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, 0)
// Disable reporting of PORT_UNREACHABLE and NET_UNREACHABLE errors from the UDP socket receive call.
// These errors are returned on Windows during UDP receives based on the receipt of ICMP packets. Disable
// the UDP receive error returns with these ioctl calls.
ret := uint32(0)
flag := uint32(0)
size := uint32(unsafe.Sizeof(flag))
err = syscall.WSAIoctl(syscall.Handle(u.sock), syscall.SIO_UDP_CONNRESET, (*byte)(unsafe.Pointer(&flag)), size, nil, 0, &ret, nil, 0)
if err != nil {
return err
}
ret = 0
flag = 0
size = uint32(unsafe.Sizeof(flag))
SIO_UDP_NETRESET := uint32(syscall.IOC_IN | syscall.IOC_VENDOR | 15)
err = syscall.WSAIoctl(syscall.Handle(u.sock), SIO_UDP_NETRESET, (*byte)(unsafe.Pointer(&flag)), size, nil, 0, &ret, nil, 0)
if err != nil {
return err
}
err = u.rx.Open() err = u.rx.Open()
if err != nil { if err != nil {
return err return err
@@ -141,12 +126,8 @@ func (u *RIOConn) ListenOut(r EncReader) {
// Just read one packet at a time // Just read one packet at a time
n, rua, err := u.receive(buffer) n, rua, err := u.receive(buffer)
if err != nil { if err != nil {
if errors.Is(err, net.ErrClosed) { u.l.WithError(err).Debug("udp socket is closed, exiting read loop")
u.l.WithError(err).Debug("udp socket is closed, exiting read loop") return
return
}
u.l.WithError(err).Error("unexpected udp socket receive error")
continue
} }
r(netip.AddrPortFrom(netip.AddrFrom16(rua.Addr).Unmap(), (rua.Port>>8)|((rua.Port&0xff)<<8)), buffer[:n]) r(netip.AddrPortFrom(netip.AddrFrom16(rua.Addr).Unmap(), (rua.Port>>8)|((rua.Port&0xff)<<8)), buffer[:n])