mirror of
https://github.com/slackhq/nebula.git
synced 2026-05-16 12:57:38 +02:00
PMTUD exploration, start small then grow
This commit is contained in:
@@ -20,6 +20,12 @@ type Conn interface {
|
||||
WriteTo(b []byte, addr netip.AddrPort) error
|
||||
ReloadConfig(c *config.C)
|
||||
SupportsMultipleReaders() bool
|
||||
// EnablePathMTUDiscovery sets the don't-fragment bit on outgoing packets for
|
||||
// this socket. Called by the pmtud manager when PMTUD is enabled. A no-op on
|
||||
// platforms that don't support it; nebula's default behavior (no DF, kernel
|
||||
// fragmentation allowed) is preserved on those platforms and on this one when
|
||||
// PMTUD is disabled.
|
||||
EnablePathMTUDiscovery() error
|
||||
Close() error
|
||||
}
|
||||
|
||||
@@ -43,6 +49,9 @@ func (NoopConn) WriteTo(_ []byte, _ netip.AddrPort) error {
|
||||
func (NoopConn) ReloadConfig(_ *config.C) {
|
||||
return
|
||||
}
|
||||
func (NoopConn) EnablePathMTUDiscovery() error {
|
||||
return nil
|
||||
}
|
||||
func (NoopConn) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -44,3 +44,17 @@ func NewListenConfig(multi bool) net.ListenConfig {
|
||||
func (u *GenericConn) Rebind() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// EnablePathMTUDiscovery sets the don't-fragment bit on outbound packets.
|
||||
// Android is Linux underneath, so we use IP_PMTUDISC_PROBE (kernel sets DF but
|
||||
// does not consume incoming ICMP frag-needed for its PMTU cache; the manager
|
||||
// drives discovery via authenticated probes).
|
||||
func (u *GenericConn) EnablePathMTUDiscovery() error {
|
||||
v4 := u.isV4Socket()
|
||||
return u.controlFD(func(fd uintptr) error {
|
||||
if v4 {
|
||||
return unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_MTU_DISCOVER, unix.IP_PMTUDISC_PROBE)
|
||||
}
|
||||
return unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_MTU_DISCOVER, unix.IPV6_PMTUDISC_PROBE)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -47,3 +47,8 @@ func NewListenConfig(multi bool) net.ListenConfig {
|
||||
func (u *GenericConn) Rebind() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// EnablePathMTUDiscovery is split into per-OS files: udp_freebsd.go handles
|
||||
// FreeBSD (which has both IP_DONTFRAG and IPV6_DONTFRAG in the unix package);
|
||||
// udp_openbsd.go handles OpenBSD (v6 only; the kernel doesn't expose a v4 DF
|
||||
// sockopt).
|
||||
|
||||
@@ -187,6 +187,17 @@ func (u *StdConn) SupportsMultipleReaders() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// EnablePathMTUDiscovery sets the don't-fragment bit on every outbound packet.
|
||||
// On darwin we use IP_DONTFRAG (v4) / IPV6_DONTFRAG (v6). The kernel will return
|
||||
// EMSGSIZE for sends that exceed the local interface MTU; ICMP-driven PMTU
|
||||
// updates from upstream routers are processed by the kernel as usual.
|
||||
func (u *StdConn) EnablePathMTUDiscovery() error {
|
||||
if u.isV4 {
|
||||
return syscall.SetsockoptInt(int(u.sysFd), syscall.IPPROTO_IP, unix.IP_DONTFRAG, 1)
|
||||
}
|
||||
return syscall.SetsockoptInt(int(u.sysFd), syscall.IPPROTO_IPV6, unix.IPV6_DONTFRAG, 1)
|
||||
}
|
||||
|
||||
func (u *StdConn) Rebind() error {
|
||||
var err error
|
||||
if u.isV4 {
|
||||
|
||||
25
udp/udp_freebsd.go
Normal file
25
udp/udp_freebsd.go
Normal file
@@ -0,0 +1,25 @@
|
||||
//go:build freebsd && !e2e_testing
|
||||
// +build freebsd,!e2e_testing
|
||||
|
||||
package udp
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// EnablePathMTUDiscovery sets the don't-fragment bit on outbound packets.
|
||||
// FreeBSD exposes IP_DONTFRAG (v4) and IPV6_DONTFRAG (v6) in golang.org/x/sys/unix.
|
||||
// Unlike Linux, BSDs don't have an explicit "don't consume incoming ICMP
|
||||
// frag-needed" knob for unconnected UDP sockets; the kernel's PMTU cache will
|
||||
// be updated from ICMP, which is benign for our usage (the cache only affects
|
||||
// what EMSGSIZE gets surfaced for; the manager drives its own discovery via
|
||||
// authenticated probes).
|
||||
func (u *GenericConn) EnablePathMTUDiscovery() error {
|
||||
v4 := u.isV4Socket()
|
||||
return u.controlFD(func(fd uintptr) error {
|
||||
if v4 {
|
||||
return unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_DONTFRAG, 1)
|
||||
}
|
||||
return unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_DONTFRAG, 1)
|
||||
})
|
||||
}
|
||||
@@ -100,3 +100,41 @@ func (u *GenericConn) ListenOut(r EncReader) error {
|
||||
func (u *GenericConn) SupportsMultipleReaders() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// EnablePathMTUDiscovery is implemented per-platform alongside Rebind, in
|
||||
// udp_android.go / udp_bsd.go / udp_netbsd.go / udp_windows.go.
|
||||
|
||||
// controlFD invokes f with the underlying UDP socket file descriptor (or
|
||||
// handle, on Windows). Used by platform files for setsockopt calls that the
|
||||
// stdlib net.UDPConn does not expose directly.
|
||||
func (u *GenericConn) controlFD(f func(fd uintptr) error) error {
|
||||
rc, err := u.UDPConn.SyscallConn()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var sockErr error
|
||||
err = rc.Control(func(fd uintptr) {
|
||||
sockErr = f(fd)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return sockErr
|
||||
}
|
||||
|
||||
// isV4Socket reports whether the local bind address looks like an IPv4 socket.
|
||||
// Used by EnablePathMTUDiscovery to pick IPPROTO_IP vs IPPROTO_IPV6 socket
|
||||
// options. Assumes pure-v4 or pure-v6 sockets; a dual-stack v6 socket bound to
|
||||
// :: will be treated as v6 (correct: setting IPV6_DONTFRAG covers v4-mapped
|
||||
// traffic too on most stacks).
|
||||
func (u *GenericConn) isV4Socket() bool {
|
||||
la := u.UDPConn.LocalAddr()
|
||||
if la == nil {
|
||||
return false
|
||||
}
|
||||
ua, ok := la.(*net.UDPAddr)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return ua.IP.To4() != nil
|
||||
}
|
||||
|
||||
@@ -73,6 +73,21 @@ func NewListener(l *slog.Logger, ip netip.Addr, port int, multi bool, batch int)
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// EnablePathMTUDiscovery sets IP_MTU_DISCOVER=IP_PMTUDISC_PROBE (IPV6 equivalent
|
||||
// for v6 sockets). This sets the don't-fragment bit on every outbound packet but
|
||||
// tells the kernel not to consume incoming ICMP frag-needed for its own PMTU
|
||||
// cache; we drive PMTU discovery from the application via authenticated probes
|
||||
// (RFC 8899). Called by the pmtud manager when PMTUD is enabled. Without this
|
||||
// call the socket retains nebula's historical behavior (no DF, kernel may
|
||||
// fragment), preserving compatibility with deployments that depend on UDP
|
||||
// fragmentation.
|
||||
func (u *StdConn) EnablePathMTUDiscovery() error {
|
||||
if u.isV4 {
|
||||
return u.setSockOptIPInt(unix.IPPROTO_IP, unix.IP_MTU_DISCOVER, unix.IP_PMTUDISC_PROBE)
|
||||
}
|
||||
return u.setSockOptIPInt(unix.IPPROTO_IPV6, unix.IPV6_MTU_DISCOVER, unix.IPV6_PMTUDISC_PROBE)
|
||||
}
|
||||
|
||||
func (u *StdConn) SupportsMultipleReaders() bool {
|
||||
return true
|
||||
}
|
||||
@@ -110,6 +125,21 @@ func (u *StdConn) setSockOptInt(opt int, n int) error {
|
||||
return opErr
|
||||
}
|
||||
|
||||
// setSockOptIPInt sets a socket option at a non-SOL_SOCKET level (e.g. IPPROTO_IP).
|
||||
func (u *StdConn) setSockOptIPInt(level, opt, n int) error {
|
||||
if u.rawConn == nil {
|
||||
return fmt.Errorf("no UDP connection")
|
||||
}
|
||||
var opErr error
|
||||
err := u.rawConn.Control(func(fd uintptr) {
|
||||
opErr = unix.SetsockoptInt(int(fd), level, opt, n)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return opErr
|
||||
}
|
||||
|
||||
func (u *StdConn) SetRecvBuffer(n int) error {
|
||||
return u.setSockOptInt(unix.SO_RCVBUFFORCE, n)
|
||||
}
|
||||
|
||||
@@ -46,3 +46,18 @@ func NewListenConfig(multi bool) net.ListenConfig {
|
||||
func (u *GenericConn) Rebind() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// EnablePathMTUDiscovery sets the don't-fragment bit on outbound packets.
|
||||
// NetBSD exposes IPV6_DONTFRAG via golang.org/x/sys/unix but the kernel does
|
||||
// not provide a socket-level knob for setting DF on v4 UDP. The only IP-layer
|
||||
// constant exposed is IP_DF, which is the wire header flag, not a sockopt.
|
||||
// quic-go skips NetBSD for the same reason. So v4 sockets stay at nebula's
|
||||
// historical behavior (kernel may fragment); v6 gets DF.
|
||||
func (u *GenericConn) EnablePathMTUDiscovery() error {
|
||||
if u.isV4Socket() {
|
||||
return nil
|
||||
}
|
||||
return u.controlFD(func(fd uintptr) error {
|
||||
return unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_DONTFRAG, 1)
|
||||
})
|
||||
}
|
||||
|
||||
23
udp/udp_openbsd.go
Normal file
23
udp/udp_openbsd.go
Normal file
@@ -0,0 +1,23 @@
|
||||
//go:build openbsd && !e2e_testing
|
||||
// +build openbsd,!e2e_testing
|
||||
|
||||
package udp
|
||||
|
||||
import (
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// EnablePathMTUDiscovery sets the don't-fragment bit on outbound packets.
|
||||
// OpenBSD exposes IPV6_DONTFRAG via golang.org/x/sys/unix but the kernel does
|
||||
// not provide a socket-level knob for setting DF on v4 UDP. The only IP-layer
|
||||
// constant exposed is IP_DF, which is the wire header flag, not a sockopt.
|
||||
// quic-go skips OpenBSD for the same reason. So v4 sockets stay at nebula's
|
||||
// historical behavior (kernel may fragment); v6 gets DF.
|
||||
func (u *GenericConn) EnablePathMTUDiscovery() error {
|
||||
if u.isV4Socket() {
|
||||
return nil
|
||||
}
|
||||
return u.controlFD(func(fd uintptr) error {
|
||||
return unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_DONTFRAG, 1)
|
||||
})
|
||||
}
|
||||
@@ -335,6 +335,12 @@ func (u *RIOConn) Rebind() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// EnablePathMTUDiscovery is a no-op on Windows for now. PMTUD is Linux-only in
|
||||
// the initial PoC; Windows support would set IP_DONTFRAGMENT here.
|
||||
func (u *RIOConn) EnablePathMTUDiscovery() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (u *RIOConn) ReloadConfig(*config.C) {}
|
||||
|
||||
func (u *RIOConn) Close() error {
|
||||
|
||||
@@ -152,6 +152,10 @@ func (u *TesterConn) Rebind() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (u *TesterConn) EnablePathMTUDiscovery() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (u *TesterConn) Close() error {
|
||||
u.closeOnce.Do(func() {
|
||||
close(u.done)
|
||||
|
||||
@@ -9,6 +9,8 @@ import (
|
||||
"net"
|
||||
"net/netip"
|
||||
"syscall"
|
||||
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
func NewListener(l *slog.Logger, ip netip.Addr, port int, multi bool, batch int) (Conn, error) {
|
||||
@@ -44,3 +46,27 @@ func NewListenConfig(multi bool) net.ListenConfig {
|
||||
func (u *GenericConn) Rebind() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Windows IP_DONTFRAGMENT and IPV6_DONTFRAG are not exposed in the
|
||||
// golang.org/x/sys/windows package. Defined locally per the values in
|
||||
// ws2ipdef.h / ws2tcpip.h. These are stable Win32 constants that have not
|
||||
// changed since at least Windows Vista.
|
||||
const (
|
||||
winIPDontFragment = 14
|
||||
winIPv6DontFrag = 14
|
||||
)
|
||||
|
||||
// EnablePathMTUDiscovery sets the don't-fragment bit on outbound packets.
|
||||
// Windows uses IP_DONTFRAGMENT (v4) and IPV6_DONTFRAG (v6) at IPPROTO_IP /
|
||||
// IPPROTO_IPV6 respectively. Note: this only enables DF on the GenericConn
|
||||
// fallback path. The RIO path (RIOConn) has its own EnablePathMTUDiscovery
|
||||
// in udp_rio_windows.go and is currently a no-op pending RIO-specific work.
|
||||
func (u *GenericConn) EnablePathMTUDiscovery() error {
|
||||
v4 := u.isV4Socket()
|
||||
return u.controlFD(func(fd uintptr) error {
|
||||
if v4 {
|
||||
return windows.SetsockoptInt(windows.Handle(fd), windows.IPPROTO_IP, winIPDontFragment, 1)
|
||||
}
|
||||
return windows.SetsockoptInt(windows.Handle(fd), windows.IPPROTO_IPV6, winIPv6DontFrag, 1)
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user