From f34e8fe0e6a0587839cd6ec3ff4047f62b5d46cd Mon Sep 17 00:00:00 2001 From: JackDoan Date: Mon, 20 Apr 2026 11:09:29 -0500 Subject: [PATCH] potential for bug --- inside.go | 4 ++-- overlay/device.go | 6 ++++++ overlay/noop.go | 4 ++++ overlay/tun_android.go | 4 ++++ overlay/tun_darwin.go | 4 ++++ overlay/tun_disabled.go | 4 ++++ overlay/tun_freebsd.go | 4 ++++ overlay/tun_ios.go | 4 ++++ overlay/tun_linux.go | 30 ++++++++++++++++++++++++++---- overlay/tun_netbsd.go | 4 ++++ overlay/tun_openbsd.go | 4 ++++ overlay/tun_tester.go | 4 ++++ overlay/tun_windows.go | 4 ++++ overlay/user.go | 3 +++ 14 files changed, 77 insertions(+), 6 deletions(-) diff --git a/inside.go b/inside.go index 69503abf..981e93a0 100644 --- a/inside.go +++ b/inside.go @@ -33,7 +33,7 @@ func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet // routes packets from the Nebula addr to the Nebula addr through the Nebula // TUN device. if immediatelyForwardToSelf { - _, err := f.readers[q].Write(packet) + _, err := f.readers[q].WriteReject(packet) if err != nil { f.l.WithError(err).Error("Failed to forward to tun") } @@ -148,7 +148,7 @@ func (f *Interface) rejectInside(packet []byte, out []byte, q int) { return } - _, err := f.readers[q].Write(out) + _, err := f.readers[q].WriteReject(out) if err != nil { f.l.WithError(err).Error("Failed to write to tun") } diff --git a/overlay/device.go b/overlay/device.go index c89bf5e9..de93f381 100644 --- a/overlay/device.go +++ b/overlay/device.go @@ -19,6 +19,12 @@ const defaultBatchBufSize = 65535 type Queue interface { io.ReadWriteCloser ReadBatch() ([][]byte, error) + // WriteReject writes a single packet that originated from the inside + // path (reject replies or self-forward) using scratch state distinct + // from Write, so it can run concurrently with Write on the same Queue + // without a data race. On backends without a shared-scratch Write, a + // trivial delegation to Write is acceptable. + WriteReject(p []byte) (int, error) } type Device interface { diff --git a/overlay/noop.go b/overlay/noop.go index 4892e937..15a36a22 100644 --- a/overlay/noop.go +++ b/overlay/noop.go @@ -37,6 +37,10 @@ func (NoopTun) Write([]byte) (int, error) { return 0, nil } +func (NoopTun) WriteReject(p []byte) (int, error) { + return 0, nil +} + func (NoopTun) SupportsMultiqueue() bool { return false } diff --git a/overlay/tun_android.go b/overlay/tun_android.go index 03d8ecad..d152732b 100644 --- a/overlay/tun_android.go +++ b/overlay/tun_android.go @@ -41,6 +41,10 @@ func (t *tun) ReadBatch() ([][]byte, error) { return t.batchRet[:], nil } +func (t *tun) WriteReject(p []byte) (int, error) { + return t.Write(p) +} + func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, vpnNetworks []netip.Prefix) (*tun, error) { // XXX Android returns an fd in non-blocking mode which is necessary for shutdown to work properly. // Be sure not to call file.Fd() as it will set the fd to blocking mode. diff --git a/overlay/tun_darwin.go b/overlay/tun_darwin.go index 495349b9..e9b39ce4 100644 --- a/overlay/tun_darwin.go +++ b/overlay/tun_darwin.go @@ -527,6 +527,10 @@ func (t *tun) ReadBatch() ([][]byte, error) { return t.batchRet[:], nil } +func (t *tun) WriteReject(p []byte) (int, error) { + return t.Write(p) +} + // Write is only valid for single threaded use func (t *tun) Write(from []byte) (int, error) { buf := t.out diff --git a/overlay/tun_disabled.go b/overlay/tun_disabled.go index ca407a53..c8a8afe5 100644 --- a/overlay/tun_disabled.go +++ b/overlay/tun_disabled.go @@ -120,6 +120,10 @@ func (t *disabledTun) Write(b []byte) (int, error) { return len(b), nil } +func (t *disabledTun) WriteReject(b []byte) (int, error) { + return t.Write(b) +} + func (t *disabledTun) SupportsMultiqueue() bool { return true } diff --git a/overlay/tun_freebsd.go b/overlay/tun_freebsd.go index 8165d43b..e4707e07 100644 --- a/overlay/tun_freebsd.go +++ b/overlay/tun_freebsd.go @@ -110,6 +110,10 @@ func (t *tun) ReadBatch() ([][]byte, error) { return t.batchRet[:], nil } +func (t *tun) WriteReject(p []byte) (int, error) { + return t.Write(p) +} + func (t *tun) Read(to []byte) (int, error) { // use readv() to read from the tunnel device, to eliminate the need for copying the buffer if t.devFd < 0 { diff --git a/overlay/tun_ios.go b/overlay/tun_ios.go index 6c3b661d..253644a5 100644 --- a/overlay/tun_ios.go +++ b/overlay/tun_ios.go @@ -43,6 +43,10 @@ func (t *tun) ReadBatch() ([][]byte, error) { return t.batchRet[:], nil } +func (t *tun) WriteReject(p []byte) (int, error) { + return t.Write(p) +} + func newTun(_ *config.C, _ *logrus.Logger, _ []netip.Prefix, _ bool) (*tun, error) { return nil, fmt.Errorf("newTun not supported in iOS") } diff --git a/overlay/tun_linux.go b/overlay/tun_linux.go index 60cf09c3..2e6a27e2 100644 --- a/overlay/tun_linux.go +++ b/overlay/tun_linux.go @@ -47,7 +47,12 @@ type tunFile struct { segOff int // cursor into segBuf for the current ReadBatch drain pending [][]byte // segments waiting to be drained by Read pendingIdx int - writeIovs [2]unix.Iovec // preallocated iovecs for vnetHdr writes; iovs[0] is fixed to zeroVnetHdr + writeIovs [2]unix.Iovec // preallocated iovecs for Write (coalescer passthrough); iovs[0] is fixed to zeroVnetHdr + // rejectIovs is a second preallocated iovec scratch used exclusively by + // WriteReject (reject + self-forward from the inside path). It mirrors + // writeIovs but lets listenIn goroutines emit reject packets without + // racing with the listenOut coalescer that owns writeIovs. + rejectIovs [2]unix.Iovec // gsoHdrBuf is a per-queue 10-byte scratch for the virtio_net_hdr emitted // by WriteGSO. Separate from zeroVnetHdr so a concurrent non-GSO Write on @@ -92,6 +97,8 @@ func (r *tunFile) newFriend(fd int) (*tunFile, error) { out.segBuf = make([]byte, tunSegBufCap) out.writeIovs[0].Base = &zeroVnetHdr[0] out.writeIovs[0].SetLen(virtioNetHdrLen) + out.rejectIovs[0].Base = &zeroVnetHdr[0] + out.rejectIovs[0].SetLen(virtioNetHdrLen) out.gsoIovs = make([]unix.Iovec, 2, 2+gsoInitialPayIovs) out.gsoIovs[0].Base = &out.gsoHdrBuf[0] out.gsoIovs[0].SetLen(virtioNetHdrLen) @@ -128,6 +135,8 @@ func newTunFd(fd int, vnetHdr bool) (*tunFile, error) { out.segBuf = make([]byte, tunSegBufCap) out.writeIovs[0].Base = &zeroVnetHdr[0] out.writeIovs[0].SetLen(virtioNetHdrLen) + out.rejectIovs[0].Base = &zeroVnetHdr[0] + out.rejectIovs[0].SetLen(virtioNetHdrLen) out.gsoIovs = make([]unix.Iovec, 2, 2+gsoInitialPayIovs) out.gsoIovs[0].Base = &out.gsoHdrBuf[0] out.gsoIovs[0].SetLen(virtioNetHdrLen) @@ -297,6 +306,19 @@ func (r *tunFile) Read(buf []byte) (int, error) { } func (r *tunFile) Write(buf []byte) (int, error) { + return r.writeWithScratch(buf, &r.writeIovs) +} + +// WriteReject emits a packet using a dedicated iovec scratch (rejectIovs) +// distinct from the one used by the coalescer's Write path. This avoids a +// data race between the inside (listenIn) goroutine emitting reject or +// self-forward packets and the outside (listenOut) goroutine flushing TCP +// coalescer passthroughs on the same tunFile. +func (r *tunFile) WriteReject(buf []byte) (int, error) { + return r.writeWithScratch(buf, &r.rejectIovs) +} + +func (r *tunFile) writeWithScratch(buf []byte, iovs *[2]unix.Iovec) (int, error) { if !r.vnetHdr { for { if n, err := unix.Write(r.fd, buf); err == nil { @@ -319,9 +341,9 @@ func (r *tunFile) Write(buf []byte) (int, error) { } // Point the payload iovec at the caller's buffer. iovs[0] is pre-wired // to zeroVnetHdr during tunFile construction so we don't rebuild it here. - r.writeIovs[1].Base = &buf[0] - r.writeIovs[1].SetLen(len(buf)) - iovPtr := uintptr(unsafe.Pointer(&r.writeIovs[0])) + iovs[1].Base = &buf[0] + iovs[1].SetLen(len(buf)) + iovPtr := uintptr(unsafe.Pointer(&iovs[0])) // The TUN fd is non-blocking (set in newTunFd / newFriend), so writev // either completes promptly or returns EAGAIN — it cannot park the // goroutine inside the kernel. That lets us use syscall.RawSyscall and diff --git a/overlay/tun_netbsd.go b/overlay/tun_netbsd.go index 5b8519f4..fe7827d3 100644 --- a/overlay/tun_netbsd.go +++ b/overlay/tun_netbsd.go @@ -82,6 +82,10 @@ func (t *tun) ReadBatch() ([][]byte, error) { return t.batchRet[:], nil } +func (t *tun) WriteReject(p []byte) (int, error) { + return t.Write(p) +} + var deviceNameRE = regexp.MustCompile(`^tun[0-9]+$`) func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ []netip.Prefix) (*tun, error) { diff --git a/overlay/tun_openbsd.go b/overlay/tun_openbsd.go index e3df3cad..1b8115f6 100644 --- a/overlay/tun_openbsd.go +++ b/overlay/tun_openbsd.go @@ -75,6 +75,10 @@ func (t *tun) ReadBatch() ([][]byte, error) { return t.batchRet[:], nil } +func (t *tun) WriteReject(p []byte) (int, error) { + return t.Write(p) +} + var deviceNameRE = regexp.MustCompile(`^tun[0-9]+$`) func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ []netip.Prefix) (*tun, error) { diff --git a/overlay/tun_tester.go b/overlay/tun_tester.go index 8ea00a6d..fb75e006 100644 --- a/overlay/tun_tester.go +++ b/overlay/tun_tester.go @@ -130,6 +130,10 @@ func (t *TestTun) Write(b []byte) (n int, err error) { return len(b), nil } +func (t *TestTun) WriteReject(b []byte) (int, error) { + return t.Write(b) +} + func (t *TestTun) Close() error { if t.closed.CompareAndSwap(false, true) { close(t.rxPackets) diff --git a/overlay/tun_windows.go b/overlay/tun_windows.go index 01910588..71e33919 100644 --- a/overlay/tun_windows.go +++ b/overlay/tun_windows.go @@ -52,6 +52,10 @@ func (t *winTun) ReadBatch() ([][]byte, error) { return t.batchRet[:], nil } +func (t *winTun) WriteReject(p []byte) (int, error) { + return t.Write(p) +} + func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ []netip.Prefix) (Device, error) { return nil, fmt.Errorf("newTunFromFd not supported in Windows") } diff --git a/overlay/user.go b/overlay/user.go index 0e2a06d7..71ff84d6 100644 --- a/overlay/user.go +++ b/overlay/user.go @@ -79,6 +79,9 @@ func (d *UserDevice) Read(p []byte) (n int, err error) { func (d *UserDevice) Write(p []byte) (n int, err error) { return d.inboundWriter.Write(p) } +func (d *UserDevice) WriteReject(p []byte) (n int, err error) { + return d.Write(p) +} func (d *UserDevice) Close() error { d.inboundWriter.Close() d.outboundWriter.Close()