From 3954d9af343e955c0ce67e9239cfc7a0a981ae94 Mon Sep 17 00:00:00 2001 From: Nate Brown Date: Mon, 4 May 2026 11:12:30 -0500 Subject: [PATCH] Darwin and openbsd in line with the other bsds for tun support --- overlay/tun_darwin.go | 108 ++++++++++++++++++++++++++++------------- overlay/tun_openbsd.go | 97 ++++++++++++++++++++++++++---------- 2 files changed, 147 insertions(+), 58 deletions(-) diff --git a/overlay/tun_darwin.go b/overlay/tun_darwin.go index 524ef0cd..62e0477b 100644 --- a/overlay/tun_darwin.go +++ b/overlay/tun_darwin.go @@ -23,7 +23,8 @@ import ( ) type tun struct { - io.ReadWriteCloser + f *os.File + rc syscall.RawConn Device string vpnNetworks []netip.Prefix DefaultMTU int @@ -31,9 +32,6 @@ type tun struct { routeTree atomic.Pointer[bart.Table[routing.Gateways]] linkAddr *netroute.LinkAddr l *slog.Logger - - // cache out buffer since we need to prepend 4 bytes for tun metadata - out []byte } type ifReq struct { @@ -123,12 +121,19 @@ func newTun(c *config.C, l *slog.Logger, vpnNetworks []netip.Prefix, _ bool) (*t return nil, fmt.Errorf("SetNonblock: %v", err) } + f := os.NewFile(uintptr(fd), "") + rc, err := f.SyscallConn() + if err != nil { + return nil, fmt.Errorf("failed to get syscall conn for tun: %w", err) + } + t := &tun{ - ReadWriteCloser: os.NewFile(uintptr(fd), ""), - Device: name, - vpnNetworks: vpnNetworks, - DefaultMTU: c.GetInt("tun.mtu", DefaultMTU), - l: l, + f: f, + rc: rc, + Device: name, + vpnNetworks: vpnNetworks, + DefaultMTU: c.GetInt("tun.mtu", DefaultMTU), + l: l, } err = t.reload(c, true) @@ -158,8 +163,8 @@ func newTunFromFd(_ *config.C, _ *slog.Logger, _ int, _ []netip.Prefix) (*tun, e } func (t *tun) Close() error { - if t.ReadWriteCloser != nil { - return t.ReadWriteCloser.Close() + if t.f != nil { + return t.f.Close() } return nil } @@ -502,42 +507,79 @@ func delRoute(prefix netip.Prefix, gateway netroute.Addr) error { return nil } +// Read pulls one IP packet off the utun device. func (t *tun) Read(to []byte) (int, error) { - buf := make([]byte, len(to)+4) + var errno syscall.Errno + var n uintptr + err := t.rc.Read(func(fd uintptr) bool { + var head [4]byte + iovecs := [2]syscall.Iovec{ + {Base: &head[0], Len: 4}, + {Base: &to[0], Len: uint64(len(to))}, + } + n, _, errno = syscall.Syscall(syscall.SYS_READV, fd, uintptr(unsafe.Pointer(&iovecs[0])), 2) + // EAGAIN/EWOULDBLOCK: tell the runtime to wait for the fd to be + // readable and call us again. + if errno.Temporary() { + return false + } + return true + }) + if err != nil { + if err == syscall.EBADF || err.Error() == "use of closed file" { + return 0, os.ErrClosed + } + return 0, fmt.Errorf("failed to make read call for tun: %w", err) + } + if errno != 0 { + return 0, fmt.Errorf("failed to make inner read call for tun: %w", errno) + } - n, err := t.ReadWriteCloser.Read(buf) - - copy(to, buf[4:]) - return n - 4, err + bytesRead := int(n) + if bytesRead < 4 { + return 0, nil + } + return bytesRead - 4, nil } -// Write is only valid for single threaded use +// Write pushes one IP packet onto the utun device. func (t *tun) Write(from []byte) (int, error) { - buf := t.out - if cap(buf) < len(from)+4 { - buf = make([]byte, len(from)+4) - t.out = buf - } - buf = buf[:len(from)+4] - if len(from) == 0 { return 0, syscall.EIO } - // Determine the IP Family for the NULL L2 Header ipVer := from[0] >> 4 - if ipVer == 4 { - buf[3] = syscall.AF_INET - } else if ipVer == 6 { - buf[3] = syscall.AF_INET6 - } else { + var head [4]byte + switch ipVer { + case 4: + head[3] = syscall.AF_INET + case 6: + head[3] = syscall.AF_INET6 + default: return 0, fmt.Errorf("unable to determine IP version from packet") } - copy(buf[4:], from) + var errno syscall.Errno + var n uintptr + err := t.rc.Write(func(fd uintptr) bool { + iovecs := [2]syscall.Iovec{ + {Base: &head[0], Len: 4}, + {Base: &from[0], Len: uint64(len(from))}, + } + n, _, errno = syscall.Syscall(syscall.SYS_WRITEV, fd, uintptr(unsafe.Pointer(&iovecs[0])), 2) + if errno.Temporary() { + return false + } + return true + }) + if err != nil { + return 0, err + } + if errno != 0 { + return 0, errno + } - n, err := t.ReadWriteCloser.Write(buf) - return n - 4, err + return int(n) - 4, nil } func (t *tun) Networks() []netip.Prefix { diff --git a/overlay/tun_openbsd.go b/overlay/tun_openbsd.go index 81362184..58d4c904 100644 --- a/overlay/tun_openbsd.go +++ b/overlay/tun_openbsd.go @@ -57,8 +57,14 @@ type tun struct { l *slog.Logger f *os.File fd int - // cache out buffer since we need to prepend 4 bytes for tun metadata - out []byte + rc syscall.RawConn + + // readBuf is the per-tun read scratch reused across calls so we don't allocate per Read. + // OpenBSD's pinsyscall protection forbids raw syscall.Syscall(SYS_READV, ...) and stdlib doesn't keep syscall.readv + // alive for external linkname (only writev gets that treatment via linkname_libc.go) + // so the read path can't use readv and has to do the prefix-skip copy. + // https://github.com/golang/go/issues/78049 + readBuf []byte } var deviceNameRE = regexp.MustCompile(`^tun[0-9]+$`) @@ -88,13 +94,22 @@ func newTun(c *config.C, l *slog.Logger, vpnNetworks []netip.Prefix, _ bool) (*t l.Warn("Failed to set the tun device as nonblocking", "error", err) } + mtu := c.GetInt("tun.mtu", DefaultMTU) + f := os.NewFile(uintptr(fd), "") + rc, err := f.SyscallConn() + if err != nil { + return nil, fmt.Errorf("failed to get syscall conn for tun: %w", err) + } + t := &tun{ - f: os.NewFile(uintptr(fd), ""), + f: f, fd: fd, + rc: rc, Device: deviceName, vpnNetworks: vpnNetworks, - MTU: c.GetInt("tun.mtu", DefaultMTU), + MTU: mtu, l: l, + readBuf: make([]byte, mtu+4), } err = t.reload(c, true) @@ -124,42 +139,74 @@ func (t *tun) Close() error { return nil } +// tunWritev is linkname'd from the standard library so the writev call goes through libc's pinned syscall trampoline. +// OpenBSD's pinsyscall protection rejects raw syscall.Syscall(SYS_WRITEV, ...) calls because they don't originate from +// the libc-pinned addresses, so we can't use the same syscall.Syscall pattern that freebsd / netbsd use. +// Stdlib's $GOROOT/src/syscall/linkname_libc.go has a bare `//go:linkname writev` directive that both opt-ins external +// linkname and keeps the symbol alive for the linker. +// There is no equivalent for readv, which is why Read still uses a copy-based path. +// https://github.com/golang/go/issues/78049 + +//go:linkname tunWritev syscall.writev +//go:noescape +func tunWritev(fd int, iovecs []syscall.Iovec) (n uintptr, err error) + +// Read pulls one IP packet off the tun device. func (t *tun) Read(to []byte) (int, error) { - buf := make([]byte, len(to)+4) + if cap(t.readBuf) < len(to)+4 { + t.readBuf = make([]byte, len(to)+4) + } + buf := t.readBuf[:len(to)+4] n, err := t.f.Read(buf) - - copy(to, buf[4:]) - return n - 4, err + if err != nil { + return 0, err + } + if n < 4 { + return 0, nil + } + copy(to, buf[4:n]) + return n - 4, nil } -// Write is only valid for single threaded use +// Write pushes one IP packet onto the tun device. func (t *tun) Write(from []byte) (int, error) { - buf := t.out - if cap(buf) < len(from)+4 { - buf = make([]byte, len(from)+4) - t.out = buf - } - buf = buf[:len(from)+4] - if len(from) == 0 { return 0, syscall.EIO } - // Determine the IP Family for the NULL L2 Header ipVer := from[0] >> 4 - if ipVer == 4 { - buf[3] = syscall.AF_INET - } else if ipVer == 6 { - buf[3] = syscall.AF_INET6 - } else { + var head [4]byte + switch ipVer { + case 4: + head[3] = syscall.AF_INET + case 6: + head[3] = syscall.AF_INET6 + default: return 0, fmt.Errorf("unable to determine IP version from packet") } - copy(buf[4:], from) + var n uintptr + var callErr error + err := t.rc.Write(func(fd uintptr) bool { + iovecs := []syscall.Iovec{ + {Base: &head[0], Len: 4}, + {Base: &from[0], Len: uint64(len(from))}, + } + n, callErr = tunWritev(int(fd), iovecs) + if errors.Is(callErr, syscall.EAGAIN) || errors.Is(callErr, syscall.EWOULDBLOCK) || errors.Is(callErr, syscall.EINTR) { + return false + } + return true + }) + if err != nil { + return 0, err + } + if callErr != nil { + return 0, callErr + } - n, err := t.f.Write(buf) - return n - 4, err + return int(n) - 4, nil } func (t *tun) addIp(cidr netip.Prefix) error {