diff --git a/.github/workflows/smoke-extra.yml b/.github/workflows/smoke-extra.yml index cca7678b..e0428e9c 100644 --- a/.github/workflows/smoke-extra.yml +++ b/.github/workflows/smoke-extra.yml @@ -81,3 +81,52 @@ jobs: run: make smoke-vagrant/linux-386 timeout-minutes: 30 + + smoke-windows: + if: github.ref == 'refs/heads/master' || contains(github.event.pull_request.labels.*.name, 'smoke-test-extra') + name: Run windows smoke test + runs-on: windows-latest + steps: + + - uses: actions/checkout@v6 + + - uses: actions/setup-go@v6 + with: + go-version: '1.25' + check-latest: true + + # WSL2 + Ubuntu so the smoke can run a real linux peer with its own + # netns. iputils-ping is needed for the in-WSL ping check. WSL1 has no + # real kernel and would lack /dev/net/tun, so we have to force WSL2. + - uses: Vampire/setup-wsl@v3 + with: + distribution: Ubuntu-24.04 + additional-packages: iputils-ping iproute2 + + # Vampire/setup-wsl provisions WSL1 even when the WSL2 platform is present. + # Convert the distro to WSL2 explicitly before we try to use /dev/net/tun. + - name: convert distro to WSL2 + shell: pwsh + run: | + wsl --set-version Ubuntu-24.04 2 + wsl --shutdown + wsl --list --verbose + + - name: build windows nebula + run: make bin-windows + + - name: build linux nebula for WSL + shell: bash + env: + GOOS: linux + GOARCH: amd64 + run: | + mkdir -p build/linux-amd64 + go build -o build/linux-amd64/nebula ./cmd/nebula + + - name: run smoke-windows + shell: pwsh + working-directory: ./.github/workflows/smoke + run: ./smoke-windows.ps1 + + timeout-minutes: 15 diff --git a/.github/workflows/smoke/smoke-windows.ps1 b/.github/workflows/smoke/smoke-windows.ps1 new file mode 100644 index 00000000..0436598d --- /dev/null +++ b/.github/workflows/smoke/smoke-windows.ps1 @@ -0,0 +1,272 @@ +#!/usr/bin/env pwsh +# Windows smoke test for the nebula tun + UDP + NLM code paths. +# +# Topology: +# - lighthouse runs natively on the Windows host (wintun + windows UDP) +# - peer runs inside WSL2 (Linux build of nebula, /dev/net/tun) +# +# WSL2 gives us a real netns boundary so the loopback fast-path on Windows +# does not short-circuit the overlay -- when WSL pings the lighthouse VPN IP, +# Linux has no idea that IP is local to the Windows host, so the packet is +# forced through nebula. Same in reverse. + +$ErrorActionPreference = 'Stop' + +# wsl.exe emits UTF-16 LE by default which PowerShell reads as bytes, mangling +# every captured string. WSL_UTF8 makes wsl.exe emit UTF-8 instead. +$env:WSL_UTF8 = '1' + +$RepoRoot = Resolve-Path "$PSScriptRoot\..\..\.." +$Nebula = Join-Path $RepoRoot 'nebula.exe' +$NebulaCert = Join-Path $RepoRoot 'nebula-cert.exe' +$NebulaLinux = Join-Path $RepoRoot 'build\linux-amd64\nebula' + +if (-not (Test-Path $Nebula)) { throw "missing $Nebula; run 'make bin-windows' first" } +if (-not (Test-Path $NebulaCert)) { throw "missing $NebulaCert; run 'make bin-windows' first" } +if (-not (Test-Path $NebulaLinux)) { throw "missing $NebulaLinux; build the linux nebula first" } + +# Matches the distro installed by Vampire/setup-wsl in smoke-extra.yml. +$Distro = 'Ubuntu-24.04' +$listed = (wsl --list --quiet 2>$null) -join "`n" +if ($listed -notmatch [regex]::Escape($Distro)) { + throw "WSL distro $Distro not registered. Got: $listed" +} +Write-Host "Using WSL distro: $Distro" + +# Windows host as seen from inside WSL: WSL's default-route gateway. We extract +# it with a regex rather than awk fields so PowerShell does not eat any '$N' +# tokens, and tabs/double-spaces in `ip route` output do not confuse a cut. +$ipCmd = 'ip route show default | grep -oE "([0-9]+\.){3}[0-9]+" | head -1' +$WindowsIp = (wsl -d $Distro -- bash -c $ipCmd).Trim() +if (-not $WindowsIp) { throw "could not determine Windows host IP from WSL" } +Write-Host "Windows host IP from WSL: $WindowsIp" + +$WorkDir = Join-Path $env:TEMP 'nebula-smoke-windows' +if (Test-Path $WorkDir) { Remove-Item -Recurse -Force $WorkDir } +New-Item -ItemType Directory -Path $WorkDir | Out-Null + +$WslDir = '/tmp/nebula-smoke' +wsl -d $Distro -- bash -c "rm -rf $WslDir && mkdir -p $WslDir" | Out-Null + +$DevName = 'nebula-smoke' +$Ip1 = '192.168.241.1' +$Ip2 = '192.168.241.2' +$Port = 4242 + +& $NebulaCert ca -name 'smoke-ca' -out-crt "$WorkDir\ca.crt" -out-key "$WorkDir\ca.key" +if ($LASTEXITCODE -ne 0) { throw "nebula-cert ca failed (exit $LASTEXITCODE)" } + +& $NebulaCert sign -name 'lighthouse' -networks "$Ip1/24" -ca-crt "$WorkDir\ca.crt" -ca-key "$WorkDir\ca.key" -out-crt "$WorkDir\lighthouse.crt" -out-key "$WorkDir\lighthouse.key" +if ($LASTEXITCODE -ne 0) { throw "nebula-cert sign lighthouse failed (exit $LASTEXITCODE)" } + +& $NebulaCert sign -name 'peer' -networks "$Ip2/24" -ca-crt "$WorkDir\ca.crt" -ca-key "$WorkDir\ca.key" -out-crt "$WorkDir\peer.crt" -out-key "$WorkDir\peer.key" +if ($LASTEXITCODE -ne 0) { throw "nebula-cert sign peer failed (exit $LASTEXITCODE)" } + +# Windows lighthouse config. +@" +pki: + ca: $WorkDir\ca.crt + cert: $WorkDir\lighthouse.crt + key: $WorkDir\lighthouse.key +static_host_map: {} +lighthouse: + am_lighthouse: true + interval: 60 + hosts: [] +listen: + host: 0.0.0.0 + port: $Port +tun: + disabled: false + dev: $DevName + drop_local_broadcast: false + drop_multicast: false + tx_queue: 500 + mtu: 1300 + network_category: private +logging: + level: info + format: text +firewall: + outbound_action: drop + inbound_action: drop + conntrack: + tcp_timeout: 12m + udp_timeout: 3m + default_timeout: 10m + outbound: + - port: any + proto: any + host: any + inbound: + - port: any + proto: any + host: any +"@ | Out-File -FilePath "$WorkDir\lighthouse.yml" -Encoding utf8 + +# WSL peer config (paths are POSIX, deliberately). +@" +pki: + ca: $WslDir/ca.crt + cert: $WslDir/peer.crt + key: $WslDir/peer.key +static_host_map: + "${Ip1}": ["${WindowsIp}:$Port"] +lighthouse: + am_lighthouse: false + interval: 60 + hosts: + - "${Ip1}" +listen: + host: 0.0.0.0 + port: 0 +tun: + disabled: false + dev: nebula1 + drop_local_broadcast: false + drop_multicast: false + tx_queue: 500 + mtu: 1300 +logging: + level: info + format: text +firewall: + outbound_action: drop + inbound_action: drop + conntrack: + tcp_timeout: 12m + udp_timeout: 3m + default_timeout: 10m + outbound: + - port: any + proto: any + host: any + inbound: + - port: any + proto: any + host: any +"@ | Out-File -FilePath "$WorkDir\peer.yml" -Encoding utf8 + +# Stage WSL artifacts. Convert Windows paths to WSL paths ourselves rather than +# calling `wslpath`, because PowerShell's argument-passing to external EXEs +# strips backslashes from path arguments in ways that are hard to escape around. +function ConvertTo-WslPath { + param([string]$WindowsPath) + if ($WindowsPath -notmatch '^([A-Za-z]):\\(.*)$') { + throw "cannot convert path to WSL: $WindowsPath" + } + return "/mnt/$($matches[1].ToLower())/$($matches[2].Replace('\','/'))" +} + +$WslWorkDir = ConvertTo-WslPath $WorkDir +$WslNebulaPath = ConvertTo-WslPath $NebulaLinux +wsl -d $Distro -- bash -c "cp '$WslWorkDir/ca.crt' '$WslWorkDir/peer.crt' '$WslWorkDir/peer.key' '$WslWorkDir/peer.yml' $WslDir/ && cp '$WslNebulaPath' $WslDir/nebula && chmod +x $WslDir/nebula" + +# Make sure WSL has tun support and /dev/net/tun is usable before starting +# nebula. Diagnostics first so a fail here points at the real problem (e.g. +# WSL1 distros do not have a real kernel and will not have tun). +Write-Host '=== WSL diagnostic ===' +wsl --version 2>&1 | Out-Host +wsl --list --verbose 2>&1 | Out-Host +wsl -d $Distro -u root -- uname -a | Out-Host +wsl -d $Distro -u root -- bash -c "modprobe tun 2>&1 || true; mkdir -p /dev/net; [ -c /dev/net/tun ] || mknod /dev/net/tun c 10 200; chmod 600 /dev/net/tun; ls -l /dev/net/tun" +if ($LASTEXITCODE -ne 0) { throw "failed to prepare /dev/net/tun in WSL (TUN support missing?)" } + +# Deliberately no New-NetFirewallRule calls here -- nebula's windows_bypass_wdf +# feature is supposed to install WFP permit filters that let inbound traffic +# through Windows Defender Firewall on its own. If this smoke regresses, that +# feature regressed. + +$lhOut = Join-Path $WorkDir 'lighthouse.out.log' +$lhErr = Join-Path $WorkDir 'lighthouse.err.log' +$lhProc = Start-Process -FilePath $Nebula -ArgumentList @('-config', "$WorkDir\lighthouse.yml") ` + -PassThru -NoNewWindow ` + -RedirectStandardOutput $lhOut ` + -RedirectStandardError $lhErr + +# Run nebula in WSL as root with no sudo + no shell wrapper. PowerShell's +# Start-Process arg quoting mangles `bash -c "..."` strings that contain +# spaces/redirections, so we skip bash entirely and let Start-Process do the +# stdout/stderr capture itself. +$peerOut = Join-Path $WorkDir 'peer.out.log' +$peerErr = Join-Path $WorkDir 'peer.err.log' +$peerProc = Start-Process -FilePath 'wsl' ` + -ArgumentList @('-d', $Distro, '-u', 'root', '--', "$WslDir/nebula", '-config', "$WslDir/peer.yml") ` + -PassThru -NoNewWindow ` + -RedirectStandardOutput $peerOut ` + -RedirectStandardError $peerErr + +function Wait-Until { + param([scriptblock]$Predicate, [int]$TimeoutSec, [string]$What) + $deadline = (Get-Date).AddSeconds($TimeoutSec) + while ((Get-Date) -lt $deadline) { + if (& $Predicate) { return } + Start-Sleep -Milliseconds 500 + } + throw "timed out waiting for: $What" +} + +try { + Wait-Until -TimeoutSec 30 -What "windows wintun adapter $DevName with NetworkCategory=Private" -Predicate { + if ($lhProc.HasExited) { throw "lighthouse exited (code $($lhProc.ExitCode)) before tun was ready" } + $p = Get-NetConnectionProfile -InterfaceAlias $DevName -ErrorAction SilentlyContinue + $p -and ("$($p.NetworkCategory)" -ieq 'Private') + } + Write-Host "OK: $DevName NetworkCategory=Private" + + Wait-Until -TimeoutSec 30 -What "WSL nebula1 with $Ip2" -Predicate { + if ($peerProc.HasExited) { throw "peer exited (code $($peerProc.ExitCode)) before tun was ready" } + $r = wsl -d $Distro -u root -- bash -c "ip -o addr show nebula1 2>/dev/null | grep -q 'inet $Ip2' && echo yes" + ("$r").Trim() -eq 'yes' + } + Write-Host "OK: WSL nebula1 has $Ip2" + + Wait-Until -TimeoutSec 30 -What "ping from WSL peer to windows lighthouse ($Ip1)" -Predicate { + if ($peerProc.HasExited) { throw "peer exited (code $($peerProc.ExitCode)) before ping succeeded" } + $r = wsl -d $Distro -u root -- bash -c "ping -c1 -W1 $Ip1 >/dev/null 2>&1 && echo OK" + ("$r").Trim() -eq 'OK' + } + Write-Host "OK: WSL peer -> windows lighthouse" + + Wait-Until -TimeoutSec 30 -What "ping from windows lighthouse to WSL peer ($Ip2)" -Predicate { + $null = & ping.exe -n 1 -w 1000 $Ip2 + $LASTEXITCODE -eq 0 + } + Write-Host "OK: windows lighthouse -> WSL peer" + + Write-Host '' + Write-Host 'All smoke checks passed.' +} +catch { + Write-Host '' + Write-Host '=== lighthouse stdout ===' + Get-Content $lhOut -ErrorAction SilentlyContinue | Out-Host + Write-Host '=== lighthouse stderr ===' + Get-Content $lhErr -ErrorAction SilentlyContinue | Out-Host + Write-Host '=== peer stdout ===' + Get-Content $peerOut -ErrorAction SilentlyContinue | Out-Host + Write-Host '=== peer stderr ===' + Get-Content $peerErr -ErrorAction SilentlyContinue | Out-Host + Write-Host '=== nebula WFP filters ===' + # Dump nebula-installed filters so we can verify they got registered with + # the conditions we expect. + $wfpDump = Join-Path $WorkDir 'wfp.xml' + netsh wfp show filters file=$wfpDump 2>&1 | Out-Null + if (Test-Path $wfpDump) { + Select-String -Path $wfpDump -Pattern 'Nebula' -Context 0,80 -ErrorAction SilentlyContinue | Out-Host + } + throw +} +finally { + if (-not $lhProc.HasExited) { + Stop-Process -Id $lhProc.Id -Force -ErrorAction SilentlyContinue + $lhProc.WaitForExit(5000) | Out-Null + } + wsl -d $Distro -u root -- bash -c "pkill -f $WslDir/nebula 2>/dev/null; true" | Out-Null + # pkill returns 1 when no match and wsl propagates that; the smoke is done + # so we don't want it to leak into the script's exit code. + $global:LASTEXITCODE = 0 + if ($peerProc -and -not $peerProc.HasExited) { + Stop-Process -Id $peerProc.Id -Force -ErrorAction SilentlyContinue + } +} diff --git a/examples/config.yml b/examples/config.yml index ac4810e6..6c7fb489 100644 --- a/examples/config.yml +++ b/examples/config.yml @@ -138,6 +138,14 @@ listen: # max, net.core.rmem_max and net.core.wmem_max #read_buffer: 10485760 #write_buffer: 10485760 + + # On Windows only + # When true, Nebula installs a WFP (Windows Filtering Platform) PERMIT filter scoped to UDP at the listener port. + # WFP sits below Windows Defender Firewall, so this lets peer handshakes reach Nebula's outside socket regardless + # of WDF's inbound rules. + # Default true; set to false to leave WDF in charge of inbound decisions on the listener port. Not reloadable. + #windows_bypass_wdf: true + # By default, Nebula replies to packets it has no tunnel for with a "recv_error" packet. This packet helps speed up reconnection # in the case that Nebula on either side did not shut down cleanly. This response can be abused as a way to discover if Nebula is running # on a host though. This option lets you configure if you want to send "recv_error" packets always, never, or only to private network remotes. @@ -286,6 +294,24 @@ tun: # metric: 100 # install: true + # On Windows only, sets the network category of the nebula interface. Without this, Windows often + # leaves the network as "Unidentified" and treats it as Public, which makes the host firewall more + # restrictive than you usually want for an overlay between trusted peers. Valid values: + # private - treat the nebula network as a private/trusted network (default) + # public - treat it as a public/untrusted network + # domain - treat it as a domain-authenticated network + # unset - leave whatever Windows decided alone + # Not reloadable. + #network_category: private + + # On Windows only + # When true, Nebula installs a WFP (Windows Filtering Platform) PERMIT filter scoped to the nebula adapter LUID. + # WFP sits below Windows Defender Firewall, so this lets inbound traffic through regardless of WDF rules. + # Filters are auto-removed when the adapter goes away. + # See listen.windows_bypass_wdf for the matching control over inbound to nebula's outside UDP listener. + # Default true; set to false to leave WDF in charge of inbound decisions on the nebula interface. Not reloadable. + #windows_bypass_wdf: true + # On linux only, set to true to manage unsafe routes directly on the system route table with gateway routes instead of # in nebula configuration files. Default false, not reloadable. #use_system_route_table: false diff --git a/overlay/network_category_windows.go b/overlay/network_category_windows.go new file mode 100644 index 00000000..cbf87f00 --- /dev/null +++ b/overlay/network_category_windows.go @@ -0,0 +1,358 @@ +//go:build !e2e_testing +// +build !e2e_testing + +package overlay + +import ( + "errors" + "fmt" + "log/slog" + "runtime" + "strings" + "syscall" + "time" + "unsafe" + + "golang.org/x/sys/windows" +) + +// networkCategory mirrors NLM_NETWORK_CATEGORY from netlistmgr.h. +type networkCategory int32 + +const ( + networkCategoryPublic networkCategory = 0 + networkCategoryPrivate networkCategory = 1 + networkCategoryDomainAuthenticated networkCategory = 2 +) + +func (c networkCategory) String() string { + switch c { + case networkCategoryPublic: + return "public" + case networkCategoryPrivate: + return "private" + case networkCategoryDomainAuthenticated: + return "domain" + } + return fmt.Sprintf("unknown(%d)", c) +} + +// parseNetworkCategory accepts the user-supplied tun.network_category. A +// second return of false means "leave the category alone". +func parseNetworkCategory(s string) (networkCategory, bool, error) { + switch strings.ToLower(strings.TrimSpace(s)) { + case "", "unset": + return 0, false, nil + case "public": + return networkCategoryPublic, true, nil + case "private": + return networkCategoryPrivate, true, nil + case "domain", "domainauthenticated": + return networkCategoryDomainAuthenticated, true, nil + } + return 0, false, fmt.Errorf("unknown tun.network_category %q (expected public, private, domain, or unset)", s) +} + +// CLSID_NetworkListManager {DCB00C01-570F-4A9B-8D69-199FDBA5723B} +var clsidNetworkListManager = windows.GUID{ + Data1: 0xDCB00C01, Data2: 0x570F, Data3: 0x4A9B, + Data4: [8]byte{0x8D, 0x69, 0x19, 0x9F, 0xDB, 0xA5, 0x72, 0x3B}, +} + +// IID_INetworkListManager {DCB00000-570F-4A9B-8D69-199FDBA5723B} +var iidINetworkListManager = windows.GUID{ + Data1: 0xDCB00000, Data2: 0x570F, Data3: 0x4A9B, + Data4: [8]byte{0x8D, 0x69, 0x19, 0x9F, 0xDB, 0xA5, 0x72, 0x3B}, +} + +// x/sys/windows doesn't expose CoCreateInstance, so we bind it ourselves. +var procCoCreateInstance = windows.NewLazySystemDLL("ole32.dll").NewProc("CoCreateInstance") + +const clsCtxAll = windows.CLSCTX_INPROC_SERVER | windows.CLSCTX_INPROC_HANDLER | + windows.CLSCTX_LOCAL_SERVER | windows.CLSCTX_REMOTE_SERVER + +const ( + hrSFALSE = 0x00000001 + hrRPCEChangedMode = 0x80010106 +) + +type hresult uint32 + +func (h hresult) failed() bool { return int32(h) < 0 } +func (h hresult) String() string { + return fmt.Sprintf("HRESULT 0x%08x", uint32(h)) +} + +var errAdapterNotFound = errors.New("adapter not present in network connections enumeration") + +// Vtable layouts. Slot order must match the declaration order in netlistmgr.h. +// All NLM interfaces here derive from IDispatch, which derives from IUnknown. + +type iUnknownVtbl struct { + QueryInterface uintptr + AddRef uintptr + Release uintptr +} + +type iDispatchVtbl struct { + iUnknownVtbl + GetTypeInfoCount uintptr + GetTypeInfo uintptr + GetIDsOfNames uintptr + Invoke uintptr +} + +type iNetworkListManagerVtbl struct { + iDispatchVtbl + GetNetworks uintptr + GetNetwork uintptr + GetNetworkConnections uintptr + GetNetworkConnection uintptr + IsConnectedToInternet uintptr + IsConnected uintptr + GetConnectivity uintptr +} + +type iNetworkListManager struct{ Vtbl *iNetworkListManagerVtbl } + +func (n *iNetworkListManager) Release() { + syscall.SyscallN(n.Vtbl.Release, uintptr(unsafe.Pointer(n))) +} + +func (n *iNetworkListManager) GetNetworkConnections() (*iEnumNetworkConnections, error) { + var enum *iEnumNetworkConnections + r1, _, _ := syscall.SyscallN(n.Vtbl.GetNetworkConnections, + uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(&enum)), + ) + if hr := hresult(r1); hr.failed() { + return nil, fmt.Errorf("INetworkListManager.GetNetworkConnections: %s", hr) + } + return enum, nil +} + +type iEnumNetworkConnectionsVtbl struct { + iDispatchVtbl + NewEnum uintptr + Next uintptr + Skip uintptr + Reset uintptr + Clone uintptr +} + +type iEnumNetworkConnections struct{ Vtbl *iEnumNetworkConnectionsVtbl } + +func (e *iEnumNetworkConnections) Release() { + syscall.SyscallN(e.Vtbl.Release, uintptr(unsafe.Pointer(e))) +} + +// Next returns the next connection, or (nil, nil) at the end of the enumeration. +func (e *iEnumNetworkConnections) Next() (*iNetworkConnection, error) { + var conn *iNetworkConnection + var fetched uint32 + r1, _, _ := syscall.SyscallN(e.Vtbl.Next, + uintptr(unsafe.Pointer(e)), 1, + uintptr(unsafe.Pointer(&conn)), uintptr(unsafe.Pointer(&fetched)), + ) + if hr := hresult(r1); hr.failed() { + return nil, fmt.Errorf("IEnumNetworkConnections.Next: %s", hr) + } + if fetched == 0 { + return nil, nil + } + return conn, nil +} + +type iNetworkConnectionVtbl struct { + iDispatchVtbl + GetNetwork uintptr + IsConnectedToInternet uintptr + IsConnected uintptr + GetConnectivity uintptr + GetConnectionId uintptr + GetAdapterId uintptr + GetDomainType uintptr +} + +type iNetworkConnection struct{ Vtbl *iNetworkConnectionVtbl } + +func (c *iNetworkConnection) Release() { + syscall.SyscallN(c.Vtbl.Release, uintptr(unsafe.Pointer(c))) +} + +func (c *iNetworkConnection) GetAdapterId() (windows.GUID, error) { + var g windows.GUID + r1, _, _ := syscall.SyscallN(c.Vtbl.GetAdapterId, + uintptr(unsafe.Pointer(c)), uintptr(unsafe.Pointer(&g)), + ) + if hr := hresult(r1); hr.failed() { + return windows.GUID{}, fmt.Errorf("INetworkConnection.GetAdapterId: %s", hr) + } + return g, nil +} + +func (c *iNetworkConnection) GetNetwork() (*iNetwork, error) { + var net *iNetwork + r1, _, _ := syscall.SyscallN(c.Vtbl.GetNetwork, + uintptr(unsafe.Pointer(c)), uintptr(unsafe.Pointer(&net)), + ) + if hr := hresult(r1); hr.failed() { + return nil, fmt.Errorf("INetworkConnection.GetNetwork: %s", hr) + } + return net, nil +} + +type iNetworkVtbl struct { + iDispatchVtbl + GetName uintptr + SetName uintptr + GetDescription uintptr + SetDescription uintptr + GetNetworkId uintptr + GetDomainType uintptr + GetNetworkConnections uintptr + GetTimeCreatedAndConnected uintptr + IsConnectedToInternet uintptr + IsConnected uintptr + GetConnectivity uintptr + GetCategory uintptr + SetCategory uintptr +} + +type iNetwork struct{ Vtbl *iNetworkVtbl } + +func (n *iNetwork) Release() { + syscall.SyscallN(n.Vtbl.Release, uintptr(unsafe.Pointer(n))) +} + +func (n *iNetwork) GetCategory() (networkCategory, error) { + var c networkCategory + r1, _, _ := syscall.SyscallN(n.Vtbl.GetCategory, + uintptr(unsafe.Pointer(n)), uintptr(unsafe.Pointer(&c)), + ) + if hr := hresult(r1); hr.failed() { + return 0, fmt.Errorf("INetwork.GetCategory: %s", hr) + } + return c, nil +} + +func (n *iNetwork) SetCategory(c networkCategory) error { + r1, _, _ := syscall.SyscallN(n.Vtbl.SetCategory, + uintptr(unsafe.Pointer(n)), uintptr(int32(c)), + ) + if hr := hresult(r1); hr.failed() { + return fmt.Errorf("INetwork.SetCategory: %s", hr) + } + return nil +} + +// coInit initializes COM for the current OS thread. The returned function must +// be deferred to balance a successful init. RPC_E_CHANGED_MODE means COM is +// already initialized in a different mode on this thread, which is still fine +// for our calls but we must not Uninitialize in that case. +func coInit() (func(), error) { + err := windows.CoInitializeEx(0, windows.COINIT_MULTITHREADED) + if err == nil { + return windows.CoUninitialize, nil + } + if e, ok := err.(syscall.Errno); ok { + switch uint32(e) { + case hrSFALSE: + return windows.CoUninitialize, nil + case hrRPCEChangedMode: + return func() {}, nil + } + } + return nil, fmt.Errorf("CoInitializeEx: %w", err) +} + +func createNetworkListManager() (*iNetworkListManager, error) { + var nlm *iNetworkListManager + r1, _, _ := procCoCreateInstance.Call( + uintptr(unsafe.Pointer(&clsidNetworkListManager)), + 0, + uintptr(clsCtxAll), + uintptr(unsafe.Pointer(&iidINetworkListManager)), + uintptr(unsafe.Pointer(&nlm)), + ) + if hr := hresult(r1); hr.failed() { + return nil, fmt.Errorf("CoCreateInstance(NetworkListManager): %s", hr) + } + return nlm, nil +} + +// setNetworkCategory locates the network connection bound to adapterGUID and +// sets the category of its parent network. Returns errAdapterNotFound if the +// adapter is not yet visible in the NLM enumeration. +func setNetworkCategory(adapterGUID windows.GUID, cat networkCategory) error { + deinit, err := coInit() + if err != nil { + return err + } + defer deinit() + + nlm, err := createNetworkListManager() + if err != nil { + return err + } + defer nlm.Release() + + enum, err := nlm.GetNetworkConnections() + if err != nil { + return err + } + defer enum.Release() + + for { + conn, err := enum.Next() + if err != nil { + return err + } + if conn == nil { + return errAdapterNotFound + } + + guid, err := conn.GetAdapterId() + if err != nil || guid != adapterGUID { + conn.Release() + continue + } + + net, err := conn.GetNetwork() + conn.Release() + if err != nil { + return err + } + err = net.SetCategory(cat) + net.Release() + return err + } +} + +// applyNetworkCategory polls until the wintun adapter shows up in the NLM +// enumeration, then sets the category. Intended to run in its own goroutine. +func applyNetworkCategory(l *slog.Logger, adapterGUID windows.GUID, cat networkCategory) { + // COM Init/Uninit must be paired on the same OS thread. + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + const ( + attempts = 30 + interval = 500 * time.Millisecond + ) + for i := 0; i < attempts; i++ { + err := setNetworkCategory(adapterGUID, cat) + if err == nil { + l.Info("Set Windows network category", "category", cat.String()) + return + } + if !errors.Is(err, errAdapterNotFound) { + l.Warn("Failed to set Windows network category", "error", err, "category", cat.String()) + return + } + time.Sleep(interval) + } + l.Warn("Gave up waiting for adapter to appear in NLM enumeration; network category not set", + "category", cat.String(), + "waited", time.Duration(attempts)*interval, + ) +} diff --git a/overlay/network_category_windows_test.go b/overlay/network_category_windows_test.go new file mode 100644 index 00000000..c679f8c4 --- /dev/null +++ b/overlay/network_category_windows_test.go @@ -0,0 +1,109 @@ +//go:build !e2e_testing +// +build !e2e_testing + +package overlay + +import ( + "testing" +) + +func Test_parseNetworkCategory(t *testing.T) { + cases := []struct { + in string + wantCat networkCategory + wantApply bool + wantErr bool + }{ + {"", 0, false, false}, + {"unset", 0, false, false}, + {" UNSET ", 0, false, false}, + {"private", networkCategoryPrivate, true, false}, + {"Private", networkCategoryPrivate, true, false}, + {" PRIVATE ", networkCategoryPrivate, true, false}, + {"public", networkCategoryPublic, true, false}, + {"PUBLIC", networkCategoryPublic, true, false}, + {"domain", networkCategoryDomainAuthenticated, true, false}, + {"DomainAuthenticated", networkCategoryDomainAuthenticated, true, false}, + {"garbage", 0, false, true}, + {"privates", 0, false, true}, + } + for _, tc := range cases { + cat, apply, err := parseNetworkCategory(tc.in) + if (err != nil) != tc.wantErr { + t.Errorf("parseNetworkCategory(%q) err=%v, wantErr=%v", tc.in, err, tc.wantErr) + continue + } + if cat != tc.wantCat || apply != tc.wantApply { + t.Errorf("parseNetworkCategory(%q) = (%v, %v), want (%v, %v)", tc.in, cat, apply, tc.wantCat, tc.wantApply) + } + } +} + +// Test_NLM_round_trip exercises every COM call path used by setNetworkCategory +// without mutating the host's network state. It validates the CLSID/IID +// constants and every vtable index by enumerating connections, fetching the +// adapter id and parent network, reading the current category, and writing it +// back unchanged. +// +// Requires Windows but does not require admin or the wintun driver. Skips if +// no network connections are available (unlikely outside of an isolated +// container). +func Test_NLM_round_trip(t *testing.T) { + deinit, err := coInit() + if err != nil { + t.Fatalf("coInit: %v", err) + } + defer deinit() + + nlm, err := createNetworkListManager() + if err != nil { + t.Fatalf("createNetworkListManager: %v", err) + } + defer nlm.Release() + + enum, err := nlm.GetNetworkConnections() + if err != nil { + t.Fatalf("GetNetworkConnections: %v", err) + } + defer enum.Release() + + saw := 0 + for { + conn, err := enum.Next() + if err != nil { + t.Fatalf("EnumNetworkConnections.Next: %v", err) + } + if conn == nil { + break + } + saw++ + + if _, err := conn.GetAdapterId(); err != nil { + conn.Release() + t.Fatalf("INetworkConnection.GetAdapterId: %v", err) + } + + net, err := conn.GetNetwork() + conn.Release() + if err != nil { + t.Fatalf("INetworkConnection.GetNetwork: %v", err) + } + + cat, err := net.GetCategory() + if err != nil { + net.Release() + t.Fatalf("INetwork.GetCategory: %v", err) + } + // Set to the current value so the host's NLM state is unchanged but + // SetCategory's vtable slot is still validated end-to-end. + if err := net.SetCategory(cat); err != nil { + net.Release() + t.Fatalf("INetwork.SetCategory(%v): %v", cat, err) + } + net.Release() + } + + if saw == 0 { + t.Skip("no NLM network connections available; skipping round-trip") + } +} diff --git a/overlay/tun_bypass_windows.go b/overlay/tun_bypass_windows.go new file mode 100644 index 00000000..1f62373c --- /dev/null +++ b/overlay/tun_bypass_windows.go @@ -0,0 +1,23 @@ +//go:build (amd64 || arm64) && !e2e_testing +// +build amd64 arm64 +// +build !e2e_testing + +package overlay + +import ( + "log/slog" + + "github.com/slackhq/nebula/wfp" +) + +// installInterfaceBypass installs a WFP PERMIT filter scoped to the wintun interface LUID so inbound traffic on the +// nebula adapter bypasses Windows Defender Firewall. +func installInterfaceBypass(l *slog.Logger, luid uint64) closer { + s, err := wfp.PermitInterface(luid) + if err != nil { + l.Warn("Failed to install WFP bypass filters on nebula interface", "error", err) + return nil + } + l.Info("Installed WFP filters bypassing Windows Defender Firewall on nebula interface") + return s +} diff --git a/overlay/tun_bypass_windows_386.go b/overlay/tun_bypass_windows_386.go new file mode 100644 index 00000000..366430b0 --- /dev/null +++ b/overlay/tun_bypass_windows_386.go @@ -0,0 +1,11 @@ +//go:build !e2e_testing +// +build !e2e_testing + +package overlay + +import "log/slog" + +// installInterfaceBypass is a no-op on windows-386 because we don't currently build for it. +func installInterfaceBypass(_ *slog.Logger, _ uint64) closer { + return nil +} diff --git a/overlay/tun_windows.go b/overlay/tun_windows.go index 14c8d499..cf01615f 100644 --- a/overlay/tun_windows.go +++ b/overlay/tun_windows.go @@ -25,15 +25,24 @@ import ( "golang.zx2c4.com/wireguard/windows/tunnel/winipcfg" ) +type closer interface { + Close() +} + const tunGUIDLabel = "Fixed Nebula Windows GUID v1" type winTun struct { - Device string - vpnNetworks []netip.Prefix - MTU int - Routes atomic.Pointer[[]Route] - routeTree atomic.Pointer[bart.Table[routing.Gateways]] - l *slog.Logger + Device string + vpnNetworks []netip.Prefix + MTU int + Routes atomic.Pointer[[]Route] + routeTree atomic.Pointer[bart.Table[routing.Gateways]] + guid windows.GUID + networkCategory networkCategory + setCategory bool + bypassWDF bool + wdfBypass closer + l *slog.Logger tun *wintun.NativeTun } @@ -54,11 +63,20 @@ func newTun(c *config.C, l *slog.Logger, vpnNetworks []netip.Prefix, _ bool) (*w return nil, fmt.Errorf("generate GUID failed: %w", err) } + cat, setCat, err := parseNetworkCategory(c.GetString("tun.network_category", "private")) + if err != nil { + return nil, err + } + t := &winTun{ - Device: deviceName, - vpnNetworks: vpnNetworks, - MTU: c.GetInt("tun.mtu", DefaultMTU), - l: l, + Device: deviceName, + vpnNetworks: vpnNetworks, + MTU: c.GetInt("tun.mtu", DefaultMTU), + guid: *guid, + networkCategory: cat, + setCategory: setCat, + bypassWDF: c.GetBool("tun.windows_bypass_wdf", true), + l: l, } err = t.reload(c, true) @@ -142,6 +160,17 @@ func (t *winTun) Activate() error { return err } + if t.setCategory { + // The wintun adapter takes a moment to register with the Network List + // Manager, so we apply the category in the background and retry until + // it shows up. + go applyNetworkCategory(t.l, t.guid, t.networkCategory) + } + + if t.bypassWDF { + t.wdfBypass = installInterfaceBypass(t.l, uint64(t.tun.LUID())) + } + return nil } @@ -255,6 +284,11 @@ func (t *winTun) Close() error { _ = luid.FlushDNS(windows.AF_INET) _ = luid.FlushDNS(windows.AF_INET6) + if t.wdfBypass != nil { + t.wdfBypass.Close() + t.wdfBypass = nil + } + return t.tun.Close() } diff --git a/udp/udp_android.go b/udp/udp_android.go index 3fc68003..213ab422 100644 --- a/udp/udp_android.go +++ b/udp/udp_android.go @@ -5,12 +5,11 @@ package udp import ( "fmt" + "log/slog" "net" "net/netip" "syscall" - "log/slog" - "golang.org/x/sys/unix" ) diff --git a/udp/udp_bsd.go b/udp/udp_bsd.go index c42a3c18..31ae9c5a 100644 --- a/udp/udp_bsd.go +++ b/udp/udp_bsd.go @@ -8,12 +8,11 @@ package udp import ( "fmt" + "log/slog" "net" "net/netip" "syscall" - "log/slog" - "golang.org/x/sys/unix" ) diff --git a/udp/udp_bypass_windows.go b/udp/udp_bypass_windows.go new file mode 100644 index 00000000..b8b06b1e --- /dev/null +++ b/udp/udp_bypass_windows.go @@ -0,0 +1,57 @@ +//go:build (amd64 || arm64) && !e2e_testing +// +build amd64 arm64 +// +build !e2e_testing + +package udp + +import ( + "log/slog" + "sync" + + "github.com/slackhq/nebula/config" + "github.com/slackhq/nebula/wfp" +) + +// wrapWithWDFBypass wraps a Conn so that the first ReloadConfig consults listen.windows_bypass_wdf +// and installs a WFP PERMIT filter for the listener's bound UDP port. The session is released when Close runs. +func wrapWithWDFBypass(l *slog.Logger, conn Conn) Conn { + return &bypassConn{Conn: conn, l: l} +} + +type bypassConn struct { + Conn + + l *slog.Logger + installOnce sync.Once + session *wfp.Session +} + +func (b *bypassConn) ReloadConfig(c *config.C) { + b.installOnce.Do(func() { + if !c.GetBool("listen.windows_bypass_wdf", true) { + return + } + addr, err := b.Conn.LocalAddr() + if err != nil { + b.l.Warn("Failed to query listener port for WFP bypass", "error", err) + return + } + s, err := wfp.PermitUDPPort(addr.Port()) + if err != nil { + b.l.Warn("Failed to install WFP bypass filters for listener", "error", err) + return + } + b.l.Info("Installed WFP filters bypassing Windows Defender Firewall on UDP listener port", + "port", addr.Port()) + b.session = s + }) + b.Conn.ReloadConfig(c) +} + +func (b *bypassConn) Close() error { + if b.session != nil { + b.session.Close() + b.session = nil + } + return b.Conn.Close() +} diff --git a/udp/udp_bypass_windows_386.go b/udp/udp_bypass_windows_386.go new file mode 100644 index 00000000..fa5a6eec --- /dev/null +++ b/udp/udp_bypass_windows_386.go @@ -0,0 +1,11 @@ +//go:build !e2e_testing +// +build !e2e_testing + +package udp + +import "log/slog" + +// wrapWithWDFBypass is a no-op on windows-386 since we don't currently build for it. +func wrapWithWDFBypass(_ *slog.Logger, conn Conn) Conn { + return conn +} diff --git a/udp/udp_netbsd.go b/udp/udp_netbsd.go index 4b2de75a..b0c81393 100644 --- a/udp/udp_netbsd.go +++ b/udp/udp_netbsd.go @@ -7,12 +7,11 @@ package udp import ( "fmt" + "log/slog" "net" "net/netip" "syscall" - "log/slog" - "golang.org/x/sys/unix" ) diff --git a/udp/udp_windows.go b/udp/udp_windows.go index 7969f7e8..1f34f0bc 100644 --- a/udp/udp_windows.go +++ b/udp/udp_windows.go @@ -19,13 +19,18 @@ func NewListener(l *slog.Logger, ip netip.Addr, port int, multi bool, batch int) return nil, fmt.Errorf("multiple udp listeners not supported on windows") } + var conn Conn rc, err := NewRIOListener(l, ip, port) if err == nil { - return rc, nil + conn = rc + } else { + l.Error("Falling back to standard udp sockets", "error", err) + conn, err = NewGenericListener(l, ip, port, multi, batch) + if err != nil { + return nil, err + } } - - l.Error("Falling back to standard udp sockets", "error", err) - return NewGenericListener(l, ip, port, multi, batch) + return wrapWithWDFBypass(l, conn), nil } func NewListenConfig(multi bool) net.ListenConfig { diff --git a/wfp/wfp_windows.go b/wfp/wfp_windows.go new file mode 100644 index 00000000..22aa0565 --- /dev/null +++ b/wfp/wfp_windows.go @@ -0,0 +1,377 @@ +//go:build (amd64 || arm64) && !e2e_testing +// +build amd64 arm64 +// +build !e2e_testing + +// Package wfp installs Windows Filtering Platform (WFP) PERMIT filters in a dynamic, session-scoped sublayer. +// Because WFP sits below Windows Defender Firewall, a high-weight permit at FWPM_LAYER_ALE_AUTH_RECV_ACCEPT_V4/V6 lets +// the matching inbound traffic through regardless of WDF rules. +// +// Each Session owns its own engine handle. When the handle closes, every dynamic object added during the session +// is auto-deleted by Windows, so there are no orphaned filters. +// +// Type definitions and constants are derived from the wireguard-windows firewall package (MIT). +// Only the subset we exercise is reproduced. +package wfp + +import ( + "fmt" + "unsafe" + + "golang.org/x/sys/windows" +) + +// FWPM layer GUIDs (fwpmu.h). +// +// FWPM_LAYER_ALE_AUTH_RECV_ACCEPT_V4 = e1cd9fe7-f4b5-4273-96c0-592e487b8650 +// FWPM_LAYER_ALE_AUTH_RECV_ACCEPT_V6 = a3b42c97-9f04-4672-b87e-cee9c483257f +var ( + fwpmLayerAleAuthRecvAcceptV4 = windows.GUID{ + Data1: 0xe1cd9fe7, Data2: 0xf4b5, Data3: 0x4273, + Data4: [8]byte{0x96, 0xc0, 0x59, 0x2e, 0x48, 0x7b, 0x86, 0x50}, + } + fwpmLayerAleAuthRecvAcceptV6 = windows.GUID{ + Data1: 0xa3b42c97, Data2: 0x9f04, Data3: 0x4672, + Data4: [8]byte{0xb8, 0x7e, 0xce, 0xe9, 0xc4, 0x83, 0x25, 0x7f}, + } +) + +// FWPM_CONDITION_IP_LOCAL_INTERFACE = 4cd62a49-59c3-4969-b7f3-bda5d32890a4 +var fwpmConditionIPLocalInterface = windows.GUID{ + Data1: 0x4cd62a49, Data2: 0x59c3, Data3: 0x4969, + Data4: [8]byte{0xb7, 0xf3, 0xbd, 0xa5, 0xd3, 0x28, 0x90, 0xa4}, +} + +// FWPM_CONDITION_IP_PROTOCOL = 3971ef2b-623e-4f9a-8cb1-6e79b806b9a7 +var fwpmConditionIPProtocol = windows.GUID{ + Data1: 0x3971ef2b, Data2: 0x623e, Data3: 0x4f9a, + Data4: [8]byte{0x8c, 0xb1, 0x6e, 0x79, 0xb8, 0x06, 0xb9, 0xa7}, +} + +// FWPM_CONDITION_IP_LOCAL_PORT = 0c1ba1af-5765-453f-af22-a8f791ac775b +var fwpmConditionIPLocalPort = windows.GUID{ + Data1: 0x0c1ba1af, Data2: 0x5765, Data3: 0x453f, + Data4: [8]byte{0xaf, 0x22, 0xa8, 0xf7, 0x91, 0xac, 0x77, 0x5b}, +} + +// IPPROTO_UDP from in.h. +const ipprotoUDP uint8 = 17 + +// FWP_ACTION_TYPE values (fwptypes.h). PERMIT is terminating. +const fwpActionPermit uint32 = 0x00001002 // 0x2 | FWP_ACTION_FLAG_TERMINATING(0x1000) + +// FWP_DATA_TYPE values we use. +const ( + fwpEmpty uint32 = 0 + fwpUint8 uint32 = 1 + fwpUint16 uint32 = 2 + fwpUint64 uint32 = 4 +) + +// FWP_MATCH_TYPE values. +const fwpMatchEqual uint32 = 0 + +// FWPM_SESSION flags. +const fwpmSessionFlagDynamic uint32 = 0x1 + +// FWPM_FILTER_FLAG_CLEAR_ACTION_RIGHT prevents lower-priority filters in other sublayers, +// notably Windows Defender Firewall's MPSSVC_WF sublayer, which shares our 0xFFFF weight from overriding this PERMIT. +// Without it, a default WDF block at the same sublayer weight can still win arbitration. +const fwpmFilterFlagClearActionRight uint32 = 0x8 + +// RPC authentication. +// RPC_C_AUTHN_WINNT works on workgroup machines with no domain context +// RPC_C_AUTHN_DEFAULT falls back through a chain that can land on something WFP doesn't accept on a fresh box. +const rpcCAuthnWinNT uint32 = 10 + +// fwpByteBlob (FWP_BYTE_BLOB). 16 bytes on 64-bit. +type fwpByteBlob struct { + size uint32 + _ uint32 // padding + data *uint8 +} + +// fwpValue0 / FWP_CONDITION_VALUE0 layout. 16 bytes on 64-bit. +// The union is pointer-sized; types <= 32 bits (UINT8/16/32, INT8/16/32, float) live inline in the low bytes +// of `value`, while UINT64/INT64/double and aggregate types are stored *by pointer*, even on 64-bit, where the +// union member is declared as UINT64*. So when populating an FWP_UINT64 condition, pass +// uintptr(unsafe.Pointer(&luidVar)) instead of the LUID inline. +type fwpValue0 struct { + type_ uint32 + _ uint32 // padding before union to 8-byte alignment + value uintptr +} + +// fwpmDisplayData0 / FWPM_DISPLAY_DATA0. 16 bytes on 64-bit. +type fwpmDisplayData0 struct { + name *uint16 + description *uint16 +} + +// fwpmAction0 / FWPM_ACTION0. 20 bytes; no leading padding because actionType +// is uint32 and GUID's first field is uint32. +type fwpmAction0 struct { + actionType uint32 + filterType windows.GUID +} + +// fwpmFilterCondition0. 40 bytes on 64-bit. +type fwpmFilterCondition0 struct { + fieldKey windows.GUID // 16 + matchType uint32 // 4 + _ uint32 // 4 padding + conditionValue fwpValue0 // 16 +} + +// fwpmFilter0. 200 bytes on 64-bit. +type fwpmFilter0 struct { + filterKey windows.GUID + displayData fwpmDisplayData0 + flags uint32 + _ uint32 // padding before *GUID + providerKey *windows.GUID + providerData fwpByteBlob + layerKey windows.GUID + subLayerKey windows.GUID + weight fwpValue0 + numFilterConditions uint32 + _ uint32 // padding before pointer + filterCondition *fwpmFilterCondition0 + action fwpmAction0 + _ [4]byte // layout correction + providerContextKey windows.GUID + reserved *windows.GUID + filterID uint64 + effectiveWeight fwpValue0 +} + +// fwpmSublayer0. 72 bytes on 64-bit. +type fwpmSublayer0 struct { + subLayerKey windows.GUID + displayData fwpmDisplayData0 + flags uint32 + _ uint32 // padding before *GUID + providerKey *windows.GUID + providerData fwpByteBlob + weight uint16 + _ [6]byte // padding to 72 bytes +} + +// fwpmSession0. 72 bytes on 64-bit. +type fwpmSession0 struct { + sessionKey windows.GUID + displayData fwpmDisplayData0 + flags uint32 + txnWaitTimeoutInMSec uint32 + processId uint32 + _ uint32 // padding before *SID + sid *windows.SID + username *uint16 + kernelMode uint8 + _ [7]byte // tail padding +} + +// fwpuclnt.dll bindings. Only the calls we use. +var ( + modFwpuclnt = windows.NewLazySystemDLL("fwpuclnt.dll") + procFwpmEngineOpen0 = modFwpuclnt.NewProc("FwpmEngineOpen0") + procFwpmEngineClose0 = modFwpuclnt.NewProc("FwpmEngineClose0") + procFwpmSubLayerAdd0 = modFwpuclnt.NewProc("FwpmSubLayerAdd0") + procFwpmFilterAdd0 = modFwpuclnt.NewProc("FwpmFilterAdd0") +) + +// Session holds the WFP engine handle for a single bypass operation. The handle owns a dynamic session: +// when it is closed, every WFP object added during the session (sublayer + filters) is automatically deleted by +// Windows. That gives us correct cleanup even if the host process is killed hard between Permit* and Close. +type Session struct { + engine uintptr +} + +// Close releases the engine handle. Windows deletes every dynamic object (sublayer + filters) the session installed. +// Safe to call on a nil receiver. +func (s *Session) Close() { + if s == nil || s.engine == 0 { + return + } + procFwpmEngineClose0.Call(s.engine) + s.engine = 0 +} + +// PermitInterface installs PERMIT filters at FWPM_LAYER_ALE_AUTH_RECV_ACCEPT_V4 and _V6 scoped to the given network +// interface LUID. Inbound traffic on that interface bypasses Windows Defender Firewall. +func PermitInterface(luid uint64) (*Session, error) { + s, sublayerKey, err := newSession() + if err != nil { + return nil, err + } + + if err := addInterfaceFilter(s.engine, sublayerKey, fwpmLayerAleAuthRecvAcceptV4, luid); err != nil { + s.Close() + return nil, fmt.Errorf("add v4 filter: %w", err) + } + if err := addInterfaceFilter(s.engine, sublayerKey, fwpmLayerAleAuthRecvAcceptV6, luid); err != nil { + s.Close() + return nil, fmt.Errorf("add v6 filter: %w", err) + } + return s, nil +} + +// PermitUDPPort installs PERMIT filters at FWPM_LAYER_ALE_AUTH_RECV_ACCEPT_V4 and _V6 scoped to UDP traffic with the +// given local port. Inbound UDP to that port on any interface bypasses Windows Defender Firewall. +func PermitUDPPort(port uint16) (*Session, error) { + s, sublayerKey, err := newSession() + if err != nil { + return nil, err + } + + if err := addUDPPortFilter(s.engine, sublayerKey, fwpmLayerAleAuthRecvAcceptV4, port); err != nil { + s.Close() + return nil, fmt.Errorf("add v4 filter: %w", err) + } + if err := addUDPPortFilter(s.engine, sublayerKey, fwpmLayerAleAuthRecvAcceptV6, port); err != nil { + s.Close() + return nil, fmt.Errorf("add v6 filter: %w", err) + } + return s, nil +} + +func newSession() (*Session, windows.GUID, error) { + engine, err := openDynamicEngine() + if err != nil { + return nil, windows.GUID{}, err + } + sublayerKey, err := registerSublayer(engine) + if err != nil { + procFwpmEngineClose0.Call(engine) + return nil, windows.GUID{}, err + } + return &Session{engine: engine}, sublayerKey, nil +} + +func openDynamicEngine() (uintptr, error) { + session := fwpmSession0{flags: fwpmSessionFlagDynamic} + var engine uintptr + r1, _, _ := procFwpmEngineOpen0.Call( + 0, // serverName == NULL (local) + uintptr(rpcCAuthnWinNT), + 0, // authIdentity == NULL + uintptr(unsafe.Pointer(&session)), + uintptr(unsafe.Pointer(&engine)), + ) + if r1 != 0 { + return 0, fmt.Errorf("FwpmEngineOpen0: 0x%x", r1) + } + return engine, nil +} + +// registerSublayer adds a session-scoped sublayer with a freshly generated GUID, weight 0xFFFF so its filters arbitrate +// above WDF's default sublayer. The sublayer is dynamic (no PERSISTENT flag) and goes away when the engine handle closes. +func registerSublayer(engine uintptr) (windows.GUID, error) { + key, err := windows.GenerateGUID() + if err != nil { + return windows.GUID{}, fmt.Errorf("GenerateGUID for sublayer: %w", err) + } + + name, _ := windows.UTF16PtrFromString("Nebula WDF bypass sublayer") + desc, _ := windows.UTF16PtrFromString("Permit filters bypassing Windows Defender Firewall") + sl := fwpmSublayer0{ + subLayerKey: key, + displayData: fwpmDisplayData0{name: name, description: desc}, + weight: 0xFFFF, + } + r1, _, _ := procFwpmSubLayerAdd0.Call( + engine, + uintptr(unsafe.Pointer(&sl)), + 0, // sd == NULL + ) + if r1 != 0 { + return windows.GUID{}, fmt.Errorf("FwpmSubLayerAdd0: 0x%x", r1) + } + return key, nil +} + +func addInterfaceFilter(engine uintptr, sublayerKey, layer windows.GUID, luid uint64) error { + name, _ := windows.UTF16PtrFromString("Nebula allow interface inbound") + desc, _ := windows.UTF16PtrFromString("Permits inbound traffic on a nebula interface") + + // luid must remain addressable through the syscall -- FWP_UINT64 is stored + // by pointer in the FWP_VALUE0 union. + cond := fwpmFilterCondition0{ + fieldKey: fwpmConditionIPLocalInterface, + matchType: fwpMatchEqual, + conditionValue: fwpValue0{ + type_: fwpUint64, + value: uintptr(unsafe.Pointer(&luid)), + }, + } + + filter := fwpmFilter0{ + // filterKey left zero: WFP assigns one when the filter is added. + displayData: fwpmDisplayData0{name: name, description: desc}, + flags: fwpmFilterFlagClearActionRight, + layerKey: layer, + subLayerKey: sublayerKey, + weight: fwpValue0{type_: fwpUint8, value: uintptr(15)}, + numFilterConditions: 1, + filterCondition: &cond, + action: fwpmAction0{actionType: fwpActionPermit}, + } + + r1, _, _ := procFwpmFilterAdd0.Call( + engine, + uintptr(unsafe.Pointer(&filter)), + 0, // sd == NULL + 0, // id == NULL + ) + if r1 != 0 { + return fmt.Errorf("FwpmFilterAdd0: 0x%x", r1) + } + return nil +} + +// addUDPPortFilter installs a PERMIT filter that matches (IP_PROTOCOL == UDP) AND (IP_LOCAL_PORT == port). +// FWP_UINT8 and FWP_UINT16 are <= 32 bits so they live inline in the FWP_VALUE0 union. +func addUDPPortFilter(engine uintptr, sublayerKey, layer windows.GUID, port uint16) error { + name, _ := windows.UTF16PtrFromString("Nebula allow UDP port inbound") + desc, _ := windows.UTF16PtrFromString("Permits inbound UDP to a nebula listener port") + + conds := [2]fwpmFilterCondition0{ + { + fieldKey: fwpmConditionIPProtocol, + matchType: fwpMatchEqual, + conditionValue: fwpValue0{ + type_: fwpUint8, + value: uintptr(ipprotoUDP), + }, + }, + { + fieldKey: fwpmConditionIPLocalPort, + matchType: fwpMatchEqual, + conditionValue: fwpValue0{ + type_: fwpUint16, + value: uintptr(port), + }, + }, + } + + filter := fwpmFilter0{ + displayData: fwpmDisplayData0{name: name, description: desc}, + flags: fwpmFilterFlagClearActionRight, + layerKey: layer, + subLayerKey: sublayerKey, + weight: fwpValue0{type_: fwpUint8, value: uintptr(15)}, + numFilterConditions: 2, + filterCondition: &conds[0], + action: fwpmAction0{actionType: fwpActionPermit}, + } + + r1, _, _ := procFwpmFilterAdd0.Call( + engine, + uintptr(unsafe.Pointer(&filter)), + 0, + 0, + ) + if r1 != 0 { + return fmt.Errorf("FwpmFilterAdd0: 0x%x", r1) + } + return nil +}