no allocs

This commit is contained in:
JackDoan
2026-04-17 10:29:46 -05:00
parent 9d59cba7e1
commit 5241bf6d16
2 changed files with 59 additions and 13 deletions

View File

@@ -10,6 +10,7 @@ import (
"net" "net"
"net/netip" "net/netip"
"os" "os"
"runtime"
"strings" "strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
@@ -44,8 +45,14 @@ type tunFile struct {
segBuf []byte // backing store for segmented output segBuf []byte // backing store for segmented output
pending [][]byte // segments waiting to be drained by Read pending [][]byte // segments waiting to be drained by Read
pendingIdx int pendingIdx int
writeIovs [2]unix.Iovec // preallocated iovecs for vnetHdr writes; iovs[0] is fixed to zeroVnetHdr
} }
// zeroVnetHdr is the 10-byte virtio_net_hdr we prepend to every TUN write when
// IFF_VNET_HDR is active. All-zero signals "no GSO, no checksum offload"; the
// kernel accepts the packet as-is.
var zeroVnetHdr [virtioNetHdrLen]byte
// newFriend makes a tunFile for a MultiQueueReader that copies the shutdown eventfd from the parent tun // newFriend makes a tunFile for a MultiQueueReader that copies the shutdown eventfd from the parent tun
func (r *tunFile) newFriend(fd int) (*tunFile, error) { func (r *tunFile) newFriend(fd int) (*tunFile, error) {
if err := unix.SetNonblock(fd, true); err != nil { if err := unix.SetNonblock(fd, true); err != nil {
@@ -67,6 +74,8 @@ func (r *tunFile) newFriend(fd int) (*tunFile, error) {
if r.vnetHdr { if r.vnetHdr {
out.readBuf = make([]byte, tunReadBufSize) out.readBuf = make([]byte, tunReadBufSize)
out.segBuf = make([]byte, tunSegBufSize) out.segBuf = make([]byte, tunSegBufSize)
out.writeIovs[0].Base = &zeroVnetHdr[0]
out.writeIovs[0].SetLen(virtioNetHdrLen)
} }
return out, nil return out, nil
} }
@@ -98,6 +107,8 @@ func newTunFd(fd int, vnetHdr bool) (*tunFile, error) {
if vnetHdr { if vnetHdr {
out.readBuf = make([]byte, tunReadBufSize) out.readBuf = make([]byte, tunReadBufSize)
out.segBuf = make([]byte, tunSegBufSize) out.segBuf = make([]byte, tunSegBufSize)
out.writeIovs[0].Base = &zeroVnetHdr[0]
out.writeIovs[0].SetLen(virtioNetHdrLen)
} }
return out, nil return out, nil
@@ -203,10 +214,6 @@ func (r *tunFile) Read(buf []byte) (int, error) {
} }
} }
// zeroVnetHdr is the prefix we prepend to every write when IFF_VNET_HDR is
// active and we have no offload info to convey.
var zeroVnetHdr [virtioNetHdrLen]byte
func (r *tunFile) Write(buf []byte) (int, error) { func (r *tunFile) Write(buf []byte) (int, error) {
if !r.vnetHdr { if !r.vnetHdr {
for { for {
@@ -225,25 +232,34 @@ func (r *tunFile) Write(buf []byte) (int, error) {
} }
} }
iovs := [][]byte{zeroVnetHdr[:], buf} if len(buf) == 0 {
return 0, nil
}
// Point the payload iovec at the caller's buffer. iovs[0] is pre-wired
// to zeroVnetHdr during tunFile construction so we don't rebuild it here.
r.writeIovs[1].Base = &buf[0]
r.writeIovs[1].SetLen(len(buf))
iovPtr := uintptr(unsafe.Pointer(&r.writeIovs[0]))
for { for {
n, err := unix.Writev(r.fd, iovs) n, _, errno := unix.Syscall(unix.SYS_WRITEV, uintptr(r.fd), iovPtr, 2)
if err == nil { if errno == 0 {
if n < virtioNetHdrLen { runtime.KeepAlive(buf)
if int(n) < virtioNetHdrLen {
return 0, io.ErrShortWrite return 0, io.ErrShortWrite
} }
return n - virtioNetHdrLen, nil return int(n) - virtioNetHdrLen, nil
} }
if err == unix.EAGAIN { if errno == unix.EAGAIN {
if err = r.blockOnWrite(); err != nil { if err := r.blockOnWrite(); err != nil {
return 0, err return 0, err
} }
continue continue
} }
if err == unix.EINTR { if errno == unix.EINTR {
continue continue
} }
return 0, err runtime.KeepAlive(buf)
return 0, errno
} }
} }

View File

@@ -5,6 +5,7 @@ package overlay
import ( import (
"encoding/binary" "encoding/binary"
"os"
"testing" "testing"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
@@ -245,3 +246,32 @@ func TestSegmentRejectsUDP(t *testing.T) {
t.Fatalf("expected rejection for UDP GSO") t.Fatalf("expected rejection for UDP GSO")
} }
} }
// TestTunFileWriteVnetHdrNoAlloc verifies the IFF_VNET_HDR fast-path write is
// allocation-free. We write to /dev/null so every call succeeds synchronously.
func TestTunFileWriteVnetHdrNoAlloc(t *testing.T) {
fd, err := unix.Open("/dev/null", os.O_WRONLY, 0)
if err != nil {
t.Fatalf("open /dev/null: %v", err)
}
t.Cleanup(func() { _ = unix.Close(fd) })
tf := &tunFile{fd: fd, vnetHdr: true}
tf.writeIovs[0].Base = &zeroVnetHdr[0]
tf.writeIovs[0].SetLen(virtioNetHdrLen)
payload := make([]byte, 1400)
// Warm up (first call may trigger one-time internal allocations elsewhere).
if _, err := tf.Write(payload); err != nil {
t.Fatalf("Write: %v", err)
}
allocs := testing.AllocsPerRun(1000, func() {
if _, err := tf.Write(payload); err != nil {
t.Fatalf("Write: %v", err)
}
})
if allocs != 0 {
t.Fatalf("Write allocated %.1f times per call, want 0", allocs)
}
}