From 8b32382cd97919a65162c2f66cc8bcf271147c20 Mon Sep 17 00:00:00 2001 From: Jay Wren Date: Wed, 19 Nov 2025 12:03:38 -0500 Subject: [PATCH] zero copy even with virtioheder --- interface.go | 16 +++++++----- outside.go | 11 ++++---- overlay/tun.go | 1 + overlay/tun_linux.go | 61 ++++++++------------------------------------ stats.go | 1 + 5 files changed, 27 insertions(+), 63 deletions(-) diff --git a/interface.go b/interface.go index 725a6dd..5b96d1c 100644 --- a/interface.go +++ b/interface.go @@ -22,6 +22,7 @@ import ( ) const mtu = 9001 +const virtioNetHdrLen = overlay.VirtioNetHdrLen type InterfaceConfig struct { HostMap *HostMap @@ -266,13 +267,16 @@ func (f *Interface) listenOut(i int) { ctCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout) lhh := f.lightHouse.NewRequestHandler() - plaintext := make([]byte, udp.MTU) + + // Allocate plaintext buffer with virtio header headroom to avoid copies on TUN write + plaintext := make([]byte, virtioNetHdrLen+udp.MTU) + h := &header.H{} fwPacket := &firewall.Packet{} - nb := make([]byte, 12, 12) + nb := make([]byte, 12) li.ListenOut(func(fromUdpAddr netip.AddrPort, payload []byte) { - f.readOutsidePackets(fromUdpAddr, nil, plaintext[:0], payload, h, fwPacket, lhh, nb, i, ctCache.Get(f.l)) + f.readOutsidePackets(fromUdpAddr, nil, plaintext[:virtioNetHdrLen], payload, h, fwPacket, lhh, nb, i, ctCache.Get(f.l)) }) } @@ -298,11 +302,10 @@ func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) { func (f *Interface) listenInSingle(reader io.ReadWriteCloser, i int) { packet := make([]byte, mtu) // Allocate out buffer with virtio header headroom (10 bytes) to avoid copies on write - const virtioNetHdrLen = 10 outBuf := make([]byte, virtioNetHdrLen+mtu) out := outBuf[virtioNetHdrLen:] // Use slice starting after headroom fwPacket := &firewall.Packet{} - nb := make([]byte, 12, 12) + nb := make([]byte, 12) conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout) @@ -324,7 +327,6 @@ func (f *Interface) listenInSingle(reader io.ReadWriteCloser, i int) { func (f *Interface) listenInBatch(reader io.ReadWriteCloser, batchReader BatchReader, i int) { batchSize := batchReader.BatchSize() - const virtioNetHdrLen = 10 // Allocate buffers for batch reading bufs := make([][]byte, batchSize) @@ -346,7 +348,7 @@ func (f *Interface) listenInBatch(reader io.ReadWriteCloser, batchReader BatchRe batchAddrs := make([]netip.AddrPort, 0, batchSize) // Pre-allocate nonce buffer (reused for all encryptions) - nb := make([]byte, 12, 12) + nb := make([]byte, 12) conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout) diff --git a/outside.go b/outside.go index 5ff87bd..eae15f3 100644 --- a/outside.go +++ b/outside.go @@ -95,8 +95,7 @@ func (f *Interface) readOutsidePackets(ip netip.AddrPort, via *ViaSender, out [] switch relay.Type { case TerminalType: // If I am the target of this relay, process the unwrapped packet - // From this recursive point, all these variables are 'burned'. We shouldn't rely on them again. - f.readOutsidePackets(netip.AddrPort{}, &ViaSender{relayHI: hostinfo, remoteIdx: relay.RemoteIndex, relay: relay}, out[:0], signedPayload, h, fwPacket, lhf, nb, q, localCache) + f.readOutsidePackets(netip.AddrPort{}, &ViaSender{relayHI: hostinfo, remoteIdx: relay.RemoteIndex, relay: relay}, out[:virtioNetHdrLen], signedPayload, h, fwPacket, lhf, nb, q, localCache) return case ForwardingType: // Find the target HostInfo relay object @@ -474,9 +473,11 @@ func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out return false } - err = newPacket(out, true, fwPacket) + packetData := out[virtioNetHdrLen:] + + err = newPacket(packetData, true, fwPacket) if err != nil { - hostinfo.logger(f.l).WithError(err).WithField("packet", out). + hostinfo.logger(f.l).WithError(err).WithField("packet", packetData). Warnf("Error while validating inbound packet") return false } @@ -491,7 +492,7 @@ func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out if dropReason != nil { // NOTE: We give `packet` as the `out` here since we already decrypted from it and we don't need it anymore // This gives us a buffer to build the reject packet in - f.rejectOutside(out, hostinfo.ConnectionState, hostinfo, nb, packet, q) + f.rejectOutside(packetData, hostinfo.ConnectionState, hostinfo, nb, packet, q) if f.l.Level >= logrus.DebugLevel { hostinfo.logger(f.l).WithField("fwPacket", fwPacket). WithField("reason", dropReason). diff --git a/overlay/tun.go b/overlay/tun.go index 3a61d18..7947e29 100644 --- a/overlay/tun.go +++ b/overlay/tun.go @@ -11,6 +11,7 @@ import ( ) const DefaultMTU = 1300 +const VirtioNetHdrLen = 10 // Size of virtio_net_hdr structure // TODO: We may be able to remove routines type DeviceFactory func(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, routines int) (Device, error) diff --git a/overlay/tun_linux.go b/overlay/tun_linux.go index c5d7739..f40031c 100644 --- a/overlay/tun_linux.go +++ b/overlay/tun_linux.go @@ -66,10 +66,6 @@ type ifreqQLEN struct { pad [8]byte } -const ( - virtioNetHdrLen = 10 // Size of virtio_net_hdr structure -) - // wgDeviceWrapper wraps a wireguard Device to implement io.ReadWriteCloser // This allows multiqueue readers to use the same wireguard Device batching as the main device type wgDeviceWrapper struct { @@ -92,27 +88,11 @@ func (w *wgDeviceWrapper) Read(b []byte) (int, error) { } func (w *wgDeviceWrapper) Write(b []byte) (int, error) { - // Check if buffer has the expected headroom pattern to avoid copy - var buf []byte - - if cap(b) >= len(b)+virtioNetHdrLen { - buf = b[:cap(b)] - if len(buf) == len(b)+virtioNetHdrLen { - // Perfect! Buffer has headroom, no copy needed - buf = buf[:len(b)+virtioNetHdrLen] - } else { - // Unexpected capacity, safer to copy - buf = make([]byte, virtioNetHdrLen+len(b)) - copy(buf[virtioNetHdrLen:], b) - } - } else { - // No headroom, need to allocate and copy - buf = make([]byte, virtioNetHdrLen+len(b)) - copy(buf[virtioNetHdrLen:], b) - } - - bufs := [][]byte{buf} - n, err := w.dev.Write(bufs, virtioNetHdrLen) + // Buffer b should have virtio header space (10 bytes) at the beginning + // The decrypted packet data starts at offset 10 + // Pass the full buffer to WireGuard with offset=virtioNetHdrLen + bufs := [][]byte{b} + n, err := w.dev.Write(bufs, VirtioNetHdrLen) if err != nil { return 0, err } @@ -419,32 +399,11 @@ func (t *tun) BatchSize() int { func (t *tun) Write(b []byte) (int, error) { if t.wgDevice != nil { - // Use wireguard device which handles virtio headers internally - // Check if buffer has the expected headroom pattern: - // cap(b) should be len(b) + virtioNetHdrLen, indicating pre-allocated headroom - var buf []byte - - if cap(b) >= len(b)+virtioNetHdrLen { - // Buffer likely has headroom - use unsafe to access it - // Create a slice that includes the headroom by re-slicing from capacity - buf = b[:cap(b)] - // Check if we have exactly the right amount of extra capacity - if len(buf) == len(b)+virtioNetHdrLen { - // Perfect! This buffer was allocated with headroom, no copy needed - buf = buf[:len(b)+virtioNetHdrLen] - } else { - // Unexpected capacity, safer to copy - buf = make([]byte, virtioNetHdrLen+len(b)) - copy(buf[virtioNetHdrLen:], b) - } - } else { - // No headroom, need to allocate and copy - buf = make([]byte, virtioNetHdrLen+len(b)) - copy(buf[virtioNetHdrLen:], b) - } - - bufs := [][]byte{buf} - n, err := t.wgDevice.Write(bufs, virtioNetHdrLen) + // Buffer b should have virtio header space (10 bytes) at the beginning + // The decrypted packet data starts at offset 10 + // Pass the full buffer to WireGuard with offset=virtioNetHdrLen + bufs := [][]byte{b} + n, err := t.wgDevice.Write(bufs, VirtioNetHdrLen) if err != nil { return 0, err } diff --git a/stats.go b/stats.go index c88c45c..b86919c 100644 --- a/stats.go +++ b/stats.go @@ -6,6 +6,7 @@ import ( "log" "net" "net/http" + _ "net/http/pprof" "runtime" "strconv" "time"