cursed

hmmm
add locking for stop crash
2025-11-22 00:15:37 +01:00 · 2025-11-06 09:18:33 -06:00 · 2025-11-05 15:38:47 -06:00 · 2025-11-05 11:58:25 -05:00 · 2025-11-05 10:54:08 -05:00 · 2025-11-04 19:33:52 -05:00
99 changed files with 4938 additions and 5564 deletions
--- a/.github/workflows/gofmt.yml
+++ b/.github/workflows/gofmt.yml
@@ -14,7 +14,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:

-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v4

    - uses: actions/setup-go@v6
      with:
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -10,7 +10,7 @@ jobs:
    name: Build Linux/BSD All
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v4

      - uses: actions/setup-go@v6
        with:
@@ -24,7 +24,7 @@ jobs:
          mv build/*.tar.gz release

      - name: Upload artifacts
-        uses: actions/upload-artifact@v5
+        uses: actions/upload-artifact@v4
        with:
          name: linux-latest
          path: release
@@ -33,7 +33,7 @@ jobs:
    name: Build Windows
    runs-on: windows-latest
    steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v4

      - uses: actions/setup-go@v6
        with:
@@ -55,7 +55,7 @@ jobs:
          mv dist\windows\wintun build\dist\windows\

      - name: Upload artifacts
-        uses: actions/upload-artifact@v5
+        uses: actions/upload-artifact@v4
        with:
          name: windows-latest
          path: build
@@ -66,7 +66,7 @@ jobs:
      HAS_SIGNING_CREDS: ${{ secrets.AC_USERNAME != '' }}
    runs-on: macos-latest
    steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v4

      - uses: actions/setup-go@v6
        with:
@@ -104,7 +104,7 @@ jobs:
          fi

      - name: Upload artifacts
-        uses: actions/upload-artifact@v5
+        uses: actions/upload-artifact@v4
        with:
          name: darwin-latest
          path: ./release/*
@@ -124,11 +124,11 @@ jobs:
      # be overwritten
      - name: Checkout code
        if: ${{ env.HAS_DOCKER_CREDS == 'true' }}
-        uses: actions/checkout@v5
+        uses: actions/checkout@v4

      - name: Download artifacts
        if: ${{ env.HAS_DOCKER_CREDS == 'true' }}
-        uses: actions/download-artifact@v6
+        uses: actions/download-artifact@v4
        with:
          name: linux-latest
          path: artifacts
@@ -160,10 +160,10 @@ jobs:
    needs: [build-linux, build-darwin, build-windows]
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v5
+      - uses: actions/checkout@v4

      - name: Download artifacts
-        uses: actions/download-artifact@v6
+        uses: actions/download-artifact@v4
        with:
          path: artifacts

--- a/.github/workflows/smoke-extra.yml
+++ b/.github/workflows/smoke-extra.yml
@@ -20,7 +20,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:

-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v4

    - uses: actions/setup-go@v6
      with:
--- a/.github/workflows/smoke.yml
+++ b/.github/workflows/smoke.yml
@@ -18,7 +18,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:

-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v4

    - uses: actions/setup-go@v6
      with:
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -18,7 +18,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:

-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v4

    - uses: actions/setup-go@v6
      with:
@@ -32,7 +32,7 @@ jobs:
      run: make vet

    - name: golangci-lint
-      uses: golangci/golangci-lint-action@v9
+      uses: golangci/golangci-lint-action@v8
      with:
        version: v2.5

@@ -45,7 +45,7 @@ jobs:
    - name: Build test mobile
      run: make build-test-mobile

-    - uses: actions/upload-artifact@v5
+    - uses: actions/upload-artifact@v4
      with:
        name: e2e packet flow linux-latest
        path: e2e/mermaid/linux-latest
@@ -56,7 +56,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:

-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v4

    - uses: actions/setup-go@v6
      with:
@@ -77,7 +77,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:

-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v4

    - uses: actions/setup-go@v6
      with:
@@ -98,7 +98,7 @@ jobs:
        os: [windows-latest, macos-latest]
    steps:

-    - uses: actions/checkout@v5
+    - uses: actions/checkout@v4

    - uses: actions/setup-go@v6
      with:
@@ -115,7 +115,7 @@ jobs:
      run: make vet

    - name: golangci-lint
-      uses: golangci/golangci-lint-action@v9
+      uses: golangci/golangci-lint-action@v8
      with:
        version: v2.5

@@ -125,7 +125,7 @@ jobs:
    - name: End 2 end
      run: make e2evv

-    - uses: actions/upload-artifact@v5
+    - uses: actions/upload-artifact@v4
      with:
        name: e2e packet flow ${{ matrix.os }}
        path: e2e/mermaid/${{ matrix.os }}
--- a/batch_pipeline.go
+++ b/batch_pipeline.go
@@ -0,0 +1,164 @@
+package nebula
+
+import (
+	"net/netip"
+
+	"github.com/slackhq/nebula/overlay"
+	"github.com/slackhq/nebula/udp"
+)
+
+// batchPipelines tracks whether the inside device can operate on packet batches
+// and, if so, holds the shared packet pool sized for the virtio headroom and
+// payload limits advertised by the device. It also owns the fan-in/fan-out
+// queues between the TUN readers, encrypt/decrypt workers, and the UDP writers.
+type batchPipelines struct {
+	enabled    bool
+	inside     overlay.BatchCapableDevice
+	headroom   int
+	payloadCap int
+	pool       *overlay.PacketPool
+	batchSize  int
+	routines   int
+	rxQueues   []chan *overlay.Packet
+	txQueues   []chan queuedDatagram
+	tunQueues  []chan *overlay.Packet
+}
+
+type queuedDatagram struct {
+	packet *overlay.Packet
+	addr   netip.AddrPort
+}
+
+func (bp *batchPipelines) init(device overlay.Device, routines int, queueDepth int, maxSegments int) {
+	if device == nil || routines <= 0 {
+		return
+	}
+	bcap, ok := device.(overlay.BatchCapableDevice)
+	if !ok {
+		return
+	}
+	headroom := bcap.BatchHeadroom()
+	payload := bcap.BatchPayloadCap()
+	if maxSegments < 1 {
+		maxSegments = 1
+	}
+	requiredPayload := udp.MTU * maxSegments
+	if payload < requiredPayload {
+		payload = requiredPayload
+	}
+	batchSize := bcap.BatchSize()
+	if headroom <= 0 || payload <= 0 || batchSize <= 0 {
+		return
+	}
+	bp.enabled = true
+	bp.inside = bcap
+	bp.headroom = headroom
+	bp.payloadCap = payload
+	bp.batchSize = batchSize
+	bp.routines = routines
+	bp.pool = overlay.NewPacketPool(headroom, payload)
+	queueCap := batchSize * defaultBatchQueueDepthFactor
+	if queueDepth > 0 {
+		queueCap = queueDepth
+	}
+	if queueCap < batchSize {
+		queueCap = batchSize
+	}
+	bp.rxQueues = make([]chan *overlay.Packet, routines)
+	bp.txQueues = make([]chan queuedDatagram, routines)
+	bp.tunQueues = make([]chan *overlay.Packet, routines)
+	for i := 0; i < routines; i++ {
+		bp.rxQueues[i] = make(chan *overlay.Packet, queueCap)
+		bp.txQueues[i] = make(chan queuedDatagram, queueCap)
+		bp.tunQueues[i] = make(chan *overlay.Packet, queueCap)
+	}
+}
+
+func (bp *batchPipelines) Pool() *overlay.PacketPool {
+	if bp == nil || !bp.enabled {
+		return nil
+	}
+	return bp.pool
+}
+
+func (bp *batchPipelines) Enabled() bool {
+	return bp != nil && bp.enabled
+}
+
+func (bp *batchPipelines) batchSizeHint() int {
+	if bp == nil || bp.batchSize <= 0 {
+		return 1
+	}
+	return bp.batchSize
+}
+
+func (bp *batchPipelines) rxQueue(i int) chan *overlay.Packet {
+	if bp == nil || !bp.enabled || i < 0 || i >= len(bp.rxQueues) {
+		return nil
+	}
+	return bp.rxQueues[i]
+}
+
+func (bp *batchPipelines) txQueue(i int) chan queuedDatagram {
+	if bp == nil || !bp.enabled || i < 0 || i >= len(bp.txQueues) {
+		return nil
+	}
+	return bp.txQueues[i]
+}
+
+func (bp *batchPipelines) tunQueue(i int) chan *overlay.Packet {
+	if bp == nil || !bp.enabled || i < 0 || i >= len(bp.tunQueues) {
+		return nil
+	}
+	return bp.tunQueues[i]
+}
+
+func (bp *batchPipelines) txQueueLen(i int) int {
+	q := bp.txQueue(i)
+	if q == nil {
+		return 0
+	}
+	return len(q)
+}
+
+func (bp *batchPipelines) tunQueueLen(i int) int {
+	q := bp.tunQueue(i)
+	if q == nil {
+		return 0
+	}
+	return len(q)
+}
+
+func (bp *batchPipelines) enqueueRx(i int, pkt *overlay.Packet) bool {
+	q := bp.rxQueue(i)
+	if q == nil {
+		return false
+	}
+	q <- pkt
+	return true
+}
+
+func (bp *batchPipelines) enqueueTx(i int, pkt *overlay.Packet, addr netip.AddrPort) bool {
+	q := bp.txQueue(i)
+	if q == nil {
+		return false
+	}
+	q <- queuedDatagram{packet: pkt, addr: addr}
+	return true
+}
+
+func (bp *batchPipelines) enqueueTun(i int, pkt *overlay.Packet) bool {
+	q := bp.tunQueue(i)
+	if q == nil {
+		return false
+	}
+	q <- pkt
+	return true
+}
+
+func (bp *batchPipelines) newPacket() *overlay.Packet {
+	if bp == nil || !bp.enabled || bp.pool == nil {
+		return nil
+	}
+	return bp.pool.Get()
+}
--- a/bits.go
+++ b/bits.go
@@ -43,7 +43,7 @@ func (b *Bits) Check(l logrus.FieldLogger, i uint64) bool {
 	}

 	// Not within the window
-	l.Debugf("rejected a packet (top) %d %d delta %d\n", b.current, i, b.current-i)
+	l.Debugf("rejected a packet (top) %d %d\n", b.current, i)
 	return false
 }

--- a/cert/pem.go
+++ b/cert/pem.go
@@ -1,8 +1,10 @@
 package cert

 import (
+	"encoding/hex"
 	"encoding/pem"
 	"fmt"
+	"time"

 	"golang.org/x/crypto/ed25519"
 )
@@ -138,6 +140,101 @@ func MarshalSigningPrivateKeyToPEM(curve Curve, b []byte) []byte {
 	}
 }

+// Backward compatibility functions for older API
+func MarshalX25519PublicKey(b []byte) []byte {
+	return MarshalPublicKeyToPEM(Curve_CURVE25519, b)
+}
+
+func MarshalX25519PrivateKey(b []byte) []byte {
+	return MarshalPrivateKeyToPEM(Curve_CURVE25519, b)
+}
+
+func MarshalPublicKey(curve Curve, b []byte) []byte {
+	return MarshalPublicKeyToPEM(curve, b)
+}
+
+func MarshalPrivateKey(curve Curve, b []byte) []byte {
+	return MarshalPrivateKeyToPEM(curve, b)
+}
+
+// NebulaCertificate is a compatibility wrapper for the old API
+type NebulaCertificate struct {
+	Details   NebulaCertificateDetails
+	Signature []byte
+	cert      Certificate
+}
+
+// NebulaCertificateDetails is a compatibility wrapper for certificate details
+type NebulaCertificateDetails struct {
+	Name      string
+	NotBefore time.Time
+	NotAfter  time.Time
+	PublicKey []byte
+	IsCA      bool
+	Issuer    []byte
+	Curve     Curve
+}
+
+// UnmarshalNebulaCertificateFromPEM provides backward compatibility with the old API
+func UnmarshalNebulaCertificateFromPEM(b []byte) (*NebulaCertificate, []byte, error) {
+	c, rest, err := UnmarshalCertificateFromPEM(b)
+	if err != nil {
+		return nil, rest, err
+	}
+
+	issuerBytes, err := func() ([]byte, error) {
+		issuer := c.Issuer()
+		if issuer == "" {
+			return nil, nil
+		}
+		decoded, err := hex.DecodeString(issuer)
+		if err != nil {
+			return nil, fmt.Errorf("failed to decode issuer fingerprint: %w", err)
+		}
+		return decoded, nil
+	}()
+	if err != nil {
+		return nil, rest, err
+	}
+
+	pubKey := c.PublicKey()
+	if pubKey != nil {
+		pubKey = append([]byte(nil), pubKey...)
+	}
+
+	sig := c.Signature()
+	if sig != nil {
+		sig = append([]byte(nil), sig...)
+	}
+
+	return &NebulaCertificate{
+		Details: NebulaCertificateDetails{
+			Name:      c.Name(),
+			NotBefore: c.NotBefore(),
+			NotAfter:  c.NotAfter(),
+			PublicKey: pubKey,
+			IsCA:      c.IsCA(),
+			Issuer:    issuerBytes,
+			Curve:     c.Curve(),
+		},
+		Signature: sig,
+		cert:      c,
+	}, rest, nil
+}
+
+// IssuerString returns the issuer in hex format for compatibility
+func (n *NebulaCertificate) IssuerString() string {
+	if n.Details.Issuer == nil {
+		return ""
+	}
+	return hex.EncodeToString(n.Details.Issuer)
+}
+
+// Certificate returns the underlying certificate (read-only)
+func (n *NebulaCertificate) Certificate() Certificate {
+	return n.cert
+}
+
 // UnmarshalPrivateKeyFromPEM will try to unmarshal the first pem block in a byte array, returning any non
 // consumed data or an error on failure
 func UnmarshalPrivateKeyFromPEM(b []byte) ([]byte, []byte, Curve, error) {
--- a/cmd/nebula/main.go
+++ b/cmd/nebula/main.go
@@ -3,9 +3,6 @@ package main
 import (
 	"flag"
 	"fmt"
-	"log"
-	"net/http"
-	_ "net/http/pprof"
 	"os"

 	"github.com/sirupsen/logrus"
@@ -61,10 +58,6 @@ func main() {
 		os.Exit(1)
 	}

-	go func() {
-		log.Println(http.ListenAndServe("0.0.0.0:6060", nil))
-	}()
-
 	if !*configTest {
 		ctrl.Start()
 		notifyReady(l)
--- a/config/config.go
+++ b/config/config.go
@@ -17,7 +17,7 @@ import (

 	"dario.cat/mergo"
 	"github.com/sirupsen/logrus"
-	"go.yaml.in/yaml/v3"
+	"gopkg.in/yaml.v3"
 )

 type C struct {
--- a/config/config_test.go
+++ b/config/config_test.go
@@ -10,7 +10,7 @@ import (
 	"github.com/slackhq/nebula/test"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
-	"go.yaml.in/yaml/v3"
+	"gopkg.in/yaml.v3"
 )

 func TestConfig_Load(t *testing.T) {
--- a/connection_state.go
+++ b/connection_state.go
@@ -13,7 +13,7 @@ import (
 	"github.com/slackhq/nebula/noiseutil"
 )

-const ReplayWindow = 4096
+const ReplayWindow = 1024

 type ConnectionState struct {
 	eKey           *NebulaCipherState
--- a/control_tester.go
+++ b/control_tester.go
@@ -174,10 +174,6 @@ func (c *Control) GetHostmap() *HostMap {
 	return c.f.hostMap
 }

-func (c *Control) GetF() *Interface {
-	return c.f
-}
-
 func (c *Control) GetCertState() *CertState {
 	return c.f.pki.getCertState()
 }
--- a/e2e/handshakes_test.go
+++ b/e2e/handshakes_test.go
@@ -20,7 +20,7 @@ import (
 	"github.com/slackhq/nebula/udp"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
-	"go.yaml.in/yaml/v3"
+	"gopkg.in/yaml.v3"
 )

 func BenchmarkHotPath(b *testing.B) {
@@ -97,41 +97,6 @@ func TestGoodHandshake(t *testing.T) {
 	theirControl.Stop()
 }

-func TestGoodHandshakeNoOverlap(t *testing.T) {
-	ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
-	myControl, myVpnIpNet, myUdpAddr, _ := newSimpleServer(cert.Version2, ca, caKey, "me", "10.128.0.1/24", nil)
-	theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServer(cert.Version2, ca, caKey, "them", "2001::69/24", nil) //look ma, cross-stack!
-
-	// Put their info in our lighthouse
-	myControl.InjectLightHouseAddr(theirVpnIpNet[0].Addr(), theirUdpAddr)
-
-	// Start the servers
-	myControl.Start()
-	theirControl.Start()
-
-	empty := []byte{}
-	t.Log("do something to cause a handshake")
-	myControl.GetF().SendMessageToVpnAddr(header.Test, header.MessageNone, theirVpnIpNet[0].Addr(), empty, empty, empty)
-
-	t.Log("Have them consume my stage 0 packet. They have a tunnel now")
-	theirControl.InjectUDPPacket(myControl.GetFromUDP(true))
-
-	t.Log("Get their stage 1 packet")
-	stage1Packet := theirControl.GetFromUDP(true)
-
-	t.Log("Have me consume their stage 1 packet. I have a tunnel now")
-	myControl.InjectUDPPacket(stage1Packet)
-
-	t.Log("Wait until we see a test packet come through to make sure we give the tunnel time to complete")
-	myControl.WaitForType(header.Test, 0, theirControl)
-
-	t.Log("Make sure our host infos are correct")
-	assertHostInfoPair(t, myUdpAddr, theirUdpAddr, myVpnIpNet, theirVpnIpNet, myControl, theirControl)
-
-	myControl.Stop()
-	theirControl.Stop()
-}
-
 func TestWrongResponderHandshake(t *testing.T) {
 	ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})

@@ -499,35 +464,6 @@ func TestRelays(t *testing.T) {
 	r.RenderHostmaps("Final hostmaps", myControl, relayControl, theirControl)
 }

-func TestRelaysDontCareAboutIps(t *testing.T) {
-	ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
-	myControl, myVpnIpNet, _, _ := newSimpleServer(cert.Version2, ca, caKey, "me     ", "10.128.0.1/24", m{"relay": m{"use_relays": true}})
-	relayControl, relayVpnIpNet, relayUdpAddr, _ := newSimpleServer(cert.Version2, ca, caKey, "relay  ", "2001::9999/24", m{"relay": m{"am_relay": true}})
-	theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServer(cert.Version2, ca, caKey, "them   ", "10.128.0.2/24", m{"relay": m{"use_relays": true}})
-
-	// Teach my how to get to the relay and that their can be reached via the relay
-	myControl.InjectLightHouseAddr(relayVpnIpNet[0].Addr(), relayUdpAddr)
-	myControl.InjectRelays(theirVpnIpNet[0].Addr(), []netip.Addr{relayVpnIpNet[0].Addr()})
-	relayControl.InjectLightHouseAddr(theirVpnIpNet[0].Addr(), theirUdpAddr)
-
-	// Build a router so we don't have to reason who gets which packet
-	r := router.NewR(t, myControl, relayControl, theirControl)
-	defer r.RenderFlow()
-
-	// Start the servers
-	myControl.Start()
-	relayControl.Start()
-	theirControl.Start()
-
-	t.Log("Trigger a handshake from me to them via the relay")
-	myControl.InjectTunUDPPacket(theirVpnIpNet[0].Addr(), 80, myVpnIpNet[0].Addr(), 80, []byte("Hi from me"))
-
-	p := r.RouteForAllUntilTxTun(theirControl)
-	r.Log("Assert the tunnel works")
-	assertUdpPacket(t, []byte("Hi from me"), p, myVpnIpNet[0].Addr(), theirVpnIpNet[0].Addr(), 80, 80)
-	r.RenderHostmaps("Final hostmaps", myControl, relayControl, theirControl)
-}
-
 func TestReestablishRelays(t *testing.T) {
 	ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
 	myControl, myVpnIpNet, _, _ := newSimpleServer(cert.Version1, ca, caKey, "me     ", "10.128.0.1/24", m{"relay": m{"use_relays": true}})
@@ -1291,109 +1227,3 @@ func TestV2NonPrimaryWithLighthouse(t *testing.T) {
 	myControl.Stop()
 	theirControl.Stop()
 }
-
-func TestV2NonPrimaryWithOffNetLighthouse(t *testing.T) {
-	ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
-	lhControl, lhVpnIpNet, lhUdpAddr, _ := newSimpleServer(cert.Version2, ca, caKey, "lh  ", "2001::1/64", m{"lighthouse": m{"am_lighthouse": true}})
-
-	o := m{
-		"static_host_map": m{
-			lhVpnIpNet[0].Addr().String(): []string{lhUdpAddr.String()},
-		},
-		"lighthouse": m{
-			"hosts": []string{lhVpnIpNet[0].Addr().String()},
-			"local_allow_list": m{
-				// Try and block our lighthouse updates from using the actual addresses assigned to this computer
-				// If we start discovering addresses the test router doesn't know about then test traffic cant flow
-				"10.0.0.0/24": true,
-				"::/0":        false,
-			},
-		},
-	}
-	myControl, myVpnIpNet, _, _ := newSimpleServer(cert.Version2, ca, caKey, "me  ", "10.128.0.2/24, ff::2/64", o)
-	theirControl, theirVpnIpNet, _, _ := newSimpleServer(cert.Version2, ca, caKey, "them", "10.128.0.3/24, ff::3/64", o)
-
-	// Build a router so we don't have to reason who gets which packet
-	r := router.NewR(t, lhControl, myControl, theirControl)
-	defer r.RenderFlow()
-
-	// Start the servers
-	lhControl.Start()
-	myControl.Start()
-	theirControl.Start()
-
-	t.Log("Stand up an ipv6 tunnel between me and them")
-	assert.True(t, myVpnIpNet[1].Addr().Is6())
-	assert.True(t, theirVpnIpNet[1].Addr().Is6())
-	assertTunnel(t, myVpnIpNet[1].Addr(), theirVpnIpNet[1].Addr(), myControl, theirControl, r)
-
-	lhControl.Stop()
-	myControl.Stop()
-	theirControl.Stop()
-}
-
-func TestGoodHandshakeUnsafeDest(t *testing.T) {
-	unsafePrefix := "192.168.6.0/24"
-	ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
-	theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServerWithUdpAndUnsafeNetworks(cert.Version2, ca, caKey, "spooky", "10.128.0.2/24", netip.MustParseAddrPort("10.64.0.2:4242"), unsafePrefix, nil)
-	route := m{"route": unsafePrefix, "via": theirVpnIpNet[0].Addr().String()}
-	myCfg := m{
-		"tun": m{
-			"unsafe_routes": []m{route},
-		},
-	}
-	myControl, myVpnIpNet, myUdpAddr, myConfig := newSimpleServer(cert.Version2, ca, caKey, "me", "10.128.0.1/24", myCfg)
-	t.Logf("my config %v", myConfig)
-	// Put their info in our lighthouse
-	myControl.InjectLightHouseAddr(theirVpnIpNet[0].Addr(), theirUdpAddr)
-
-	spookyDest := netip.MustParseAddr("192.168.6.4")
-
-	// Start the servers
-	myControl.Start()
-	theirControl.Start()
-
-	t.Log("Send a udp packet through to begin standing up the tunnel, this should come out the other side")
-	myControl.InjectTunUDPPacket(spookyDest, 80, myVpnIpNet[0].Addr(), 80, []byte("Hi from me"))
-
-	t.Log("Have them consume my stage 0 packet. They have a tunnel now")
-	theirControl.InjectUDPPacket(myControl.GetFromUDP(true))
-
-	t.Log("Get their stage 1 packet so that we can play with it")
-	stage1Packet := theirControl.GetFromUDP(true)
-
-	t.Log("I consume a garbage packet with a proper nebula header for our tunnel")
-	// this should log a statement and get ignored, allowing the real handshake packet to complete the tunnel
-	badPacket := stage1Packet.Copy()
-	badPacket.Data = badPacket.Data[:len(badPacket.Data)-header.Len]
-	myControl.InjectUDPPacket(badPacket)
-
-	t.Log("Have me consume their real stage 1 packet. I have a tunnel now")
-	myControl.InjectUDPPacket(stage1Packet)
-
-	t.Log("Wait until we see my cached packet come through")
-	myControl.WaitForType(1, 0, theirControl)
-
-	t.Log("Make sure our host infos are correct")
-	assertHostInfoPair(t, myUdpAddr, theirUdpAddr, myVpnIpNet, theirVpnIpNet, myControl, theirControl)
-
-	t.Log("Get that cached packet and make sure it looks right")
-	myCachedPacket := theirControl.GetFromTun(true)
-	assertUdpPacket(t, []byte("Hi from me"), myCachedPacket, myVpnIpNet[0].Addr(), spookyDest, 80, 80)
-
-	//reply
-	theirControl.InjectTunUDPPacket(myVpnIpNet[0].Addr(), 80, spookyDest, 80, []byte("Hi from the spookyman"))
-	//wait for reply
-	theirControl.WaitForType(1, 0, myControl)
-	theirCachedPacket := myControl.GetFromTun(true)
-	assertUdpPacket(t, []byte("Hi from the spookyman"), theirCachedPacket, spookyDest, myVpnIpNet[0].Addr(), 80, 80)
-
-	t.Log("Do a bidirectional tunnel test")
-	r := router.NewR(t, myControl, theirControl)
-	defer r.RenderFlow()
-	assertTunnel(t, myVpnIpNet[0].Addr(), theirVpnIpNet[0].Addr(), myControl, theirControl, r)
-
-	r.RenderHostmaps("Final hostmaps", myControl, theirControl)
-	myControl.Stop()
-	theirControl.Stop()
-}
--- a/e2e/helpers_test.go
+++ b/e2e/helpers_test.go
@@ -22,14 +22,15 @@ import (
 	"github.com/slackhq/nebula/config"
 	"github.com/slackhq/nebula/e2e/router"
 	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"go.yaml.in/yaml/v3"
+	"gopkg.in/yaml.v3"
 )

 type m = map[string]any

 // newSimpleServer creates a nebula instance with many assumptions
 func newSimpleServer(v cert.Version, caCrt cert.Certificate, caKey []byte, name string, sVpnNetworks string, overrides m) (*nebula.Control, []netip.Prefix, netip.AddrPort, *config.C) {
+	l := NewTestLogger()
+
 	var vpnNetworks []netip.Prefix
 	for _, sn := range strings.Split(sVpnNetworks, ",") {
 		vpnIpNet, err := netip.ParsePrefix(strings.TrimSpace(sn))
@@ -55,54 +56,7 @@ func newSimpleServer(v cert.Version, caCrt cert.Certificate, caKey []byte, name
 		budpIp[3] = 239
 		udpAddr = netip.AddrPortFrom(netip.AddrFrom16(budpIp), 4242)
 	}
-	return newSimpleServerWithUdp(v, caCrt, caKey, name, sVpnNetworks, udpAddr, overrides)
-}
-
-func newSimpleServerWithUdp(v cert.Version, caCrt cert.Certificate, caKey []byte, name string, sVpnNetworks string, udpAddr netip.AddrPort, overrides m) (*nebula.Control, []netip.Prefix, netip.AddrPort, *config.C) {
-	return newSimpleServerWithUdpAndUnsafeNetworks(v, caCrt, caKey, name, sVpnNetworks, udpAddr, "", overrides)
-}
-
-func newSimpleServerWithUdpAndUnsafeNetworks(v cert.Version, caCrt cert.Certificate, caKey []byte, name string, sVpnNetworks string, udpAddr netip.AddrPort, sUnsafeNetworks string, overrides m) (*nebula.Control, []netip.Prefix, netip.AddrPort, *config.C) {
-	l := NewTestLogger()
-
-	var vpnNetworks []netip.Prefix
-	for _, sn := range strings.Split(sVpnNetworks, ",") {
-		vpnIpNet, err := netip.ParsePrefix(strings.TrimSpace(sn))
-		if err != nil {
-			panic(err)
-		}
-		vpnNetworks = append(vpnNetworks, vpnIpNet)
-	}
-
-	if len(vpnNetworks) == 0 {
-		panic("no vpn networks")
-	}
-
-	firewallInbound := []m{{
-		"proto": "any",
-		"port":  "any",
-		"host":  "any",
-	}}
-
-	var unsafeNetworks []netip.Prefix
-	if sUnsafeNetworks != "" {
-		firewallInbound = []m{{
-			"proto":      "any",
-			"port":       "any",
-			"host":       "any",
-			"local_cidr": "0.0.0.0/0",
-		}}
-
-		for _, sn := range strings.Split(sUnsafeNetworks, ",") {
-			x, err := netip.ParsePrefix(strings.TrimSpace(sn))
-			if err != nil {
-				panic(err)
-			}
-			unsafeNetworks = append(unsafeNetworks, x)
-		}
-	}
-
-	_, _, myPrivKey, myPEM := cert_test.NewTestCert(v, cert.Curve_CURVE25519, caCrt, caKey, name, time.Now(), time.Now().Add(5*time.Minute), vpnNetworks, unsafeNetworks, []string{})
+	_, _, myPrivKey, myPEM := cert_test.NewTestCert(v, cert.Curve_CURVE25519, caCrt, caKey, name, time.Now(), time.Now().Add(5*time.Minute), vpnNetworks, nil, []string{})

 	caB, err := caCrt.MarshalPEM()
 	if err != nil {
@@ -122,7 +76,11 @@ func newSimpleServerWithUdpAndUnsafeNetworks(v cert.Version, caCrt cert.Certific
 				"port":  "any",
 				"host":  "any",
 			}},
-			"inbound": firewallInbound,
+			"inbound": []m{{
+				"proto": "any",
+				"port":  "any",
+				"host":  "any",
+			}},
 		},
 		//"handshakes": m{
 		//	"try_interval": "1s",
@@ -308,10 +266,10 @@ func assertHostInfoPair(t *testing.T, addrA, addrB netip.AddrPort, vpnNetsA, vpn
 	// Get both host infos
 	//TODO: CERT-V2 we may want to loop over each vpnAddr and assert all the things
 	hBinA := controlA.GetHostInfoByVpnAddr(vpnNetsB[0].Addr(), false)
-	require.NotNil(t, hBinA, "Host B was not found by vpnAddr in controlA")
+	assert.NotNil(t, hBinA, "Host B was not found by vpnAddr in controlA")

 	hAinB := controlB.GetHostInfoByVpnAddr(vpnNetsA[0].Addr(), false)
-	require.NotNil(t, hAinB, "Host A was not found by vpnAddr in controlB")
+	assert.NotNil(t, hAinB, "Host A was not found by vpnAddr in controlB")

 	// Check that both vpn and real addr are correct
 	assert.EqualValues(t, getAddrs(vpnNetsB), hBinA.VpnAddrs, "Host B VpnIp is wrong in control A")
--- a/e2e/tunnels_test.go
+++ b/e2e/tunnels_test.go
@@ -318,50 +318,3 @@ func TestCertMismatchCorrection(t *testing.T) {
 	myControl.Stop()
 	theirControl.Stop()
 }
-
-func TestCrossStackRelaysWork(t *testing.T) {
-	ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
-	myControl, myVpnIpNet, _, _ := newSimpleServer(cert.Version2, ca, caKey, "me     ", "10.128.0.1/24,fc00::1/64", m{"relay": m{"use_relays": true}})
-	relayControl, relayVpnIpNet, relayUdpAddr, _ := newSimpleServer(cert.Version2, ca, caKey, "relay  ", "10.128.0.128/24,fc00::128/64", m{"relay": m{"am_relay": true}})
-	theirUdp := netip.MustParseAddrPort("10.0.0.2:4242")
-	theirControl, theirVpnIpNet, theirUdpAddr, _ := newSimpleServerWithUdp(cert.Version2, ca, caKey, "them   ", "fc00::2/64", theirUdp, m{"relay": m{"use_relays": true}})
-
-	//myVpnV4 := myVpnIpNet[0]
-	myVpnV6 := myVpnIpNet[1]
-	relayVpnV4 := relayVpnIpNet[0]
-	relayVpnV6 := relayVpnIpNet[1]
-	theirVpnV6 := theirVpnIpNet[0]
-
-	// Teach my how to get to the relay and that their can be reached via the relay
-	myControl.InjectLightHouseAddr(relayVpnV4.Addr(), relayUdpAddr)
-	myControl.InjectLightHouseAddr(relayVpnV6.Addr(), relayUdpAddr)
-	myControl.InjectRelays(theirVpnV6.Addr(), []netip.Addr{relayVpnV6.Addr()})
-	relayControl.InjectLightHouseAddr(theirVpnV6.Addr(), theirUdpAddr)
-
-	// Build a router so we don't have to reason who gets which packet
-	r := router.NewR(t, myControl, relayControl, theirControl)
-	defer r.RenderFlow()
-
-	// Start the servers
-	myControl.Start()
-	relayControl.Start()
-	theirControl.Start()
-
-	t.Log("Trigger a handshake from me to them via the relay")
-	myControl.InjectTunUDPPacket(theirVpnV6.Addr(), 80, myVpnV6.Addr(), 80, []byte("Hi from me"))
-
-	p := r.RouteForAllUntilTxTun(theirControl)
-	r.Log("Assert the tunnel works")
-	assertUdpPacket(t, []byte("Hi from me"), p, myVpnV6.Addr(), theirVpnV6.Addr(), 80, 80)
-
-	t.Log("reply?")
-	theirControl.InjectTunUDPPacket(myVpnV6.Addr(), 80, theirVpnV6.Addr(), 80, []byte("Hi from them"))
-	p = r.RouteForAllUntilTxTun(myControl)
-	assertUdpPacket(t, []byte("Hi from them"), p, theirVpnV6.Addr(), myVpnV6.Addr(), 80, 80)
-
-	r.RenderHostmaps("Final hostmaps", myControl, relayControl, theirControl)
-	//t.Log("finish up")
-	//myControl.Stop()
-	//theirControl.Stop()
-	//relayControl.Stop()
-}
--- a/firewall.go
+++ b/firewall.go
@@ -417,45 +417,30 @@ func AddFirewallRulesFromConfig(l *logrus.Logger, inbound bool, c *config.C, fw
 	return nil
 }

-var ErrUnknownNetworkType = errors.New("unknown network type")
-var ErrPeerRejected = errors.New("remote address is not within a network that we handle")
-var ErrInvalidRemoteIP = errors.New("remote address is not in remote certificate networks")
-var ErrInvalidLocalIP = errors.New("local address is not in list of handled local addresses")
+var ErrInvalidRemoteIP = errors.New("remote IP is not in remote certificate subnets")
+var ErrInvalidLocalIP = errors.New("local IP is not in list of handled local IPs")
 var ErrNoMatchingRule = errors.New("no matching rule in firewall table")

 // Drop returns an error if the packet should be dropped, explaining why. It
 // returns nil if the packet should not be dropped.
-func (f *Firewall) Drop(fp firewall.Packet, incoming bool, h *HostInfo, caPool *cert.CAPool, localCache firewall.ConntrackCache, now time.Time) error {
+func (f *Firewall) Drop(fp firewall.Packet, incoming bool, h *HostInfo, caPool *cert.CAPool, localCache *firewall.ConntrackCache) error {
 	// Check if we spoke to this tuple, if we did then allow this packet
-	if f.inConns(fp, h, caPool, localCache, now) {
+	if f.inConns(fp, h, caPool, localCache) {
 		return nil
 	}

-	// Make sure remote address matches nebula certificate, and determine how to treat it
-	if h.networks == nil {
-		// Simple case: Certificate has one address and no unsafe networks
-		if h.vpnAddrs[0] != fp.RemoteAddr {
+	// Make sure remote address matches nebula certificate
+	if h.networks != nil {
+		if !h.networks.Contains(fp.RemoteAddr) {
 			f.metrics(incoming).droppedRemoteAddr.Inc(1)
 			return ErrInvalidRemoteIP
 		}
 	} else {
-		nwType, ok := h.networks.Lookup(fp.RemoteAddr)
-		if !ok {
+		// Simple case: Certificate has one address and no unsafe networks
+		if h.vpnAddrs[0] != fp.RemoteAddr {
 			f.metrics(incoming).droppedRemoteAddr.Inc(1)
 			return ErrInvalidRemoteIP
 		}
-		switch nwType {
-		case NetworkTypeVPN:
-			break // nothing special
-		case NetworkTypeVPNPeer:
-			f.metrics(incoming).droppedRemoteAddr.Inc(1)
-			return ErrPeerRejected // reject for now, one day this may have different FW rules
-		case NetworkTypeUnsafe:
-			break // nothing special, one day this may have different FW rules
-		default:
-			f.metrics(incoming).droppedRemoteAddr.Inc(1)
-			return ErrUnknownNetworkType //should never happen
-		}
 	}

 	// Make sure we are supposed to be handling this local ip address
@@ -476,7 +461,7 @@ func (f *Firewall) Drop(fp firewall.Packet, incoming bool, h *HostInfo, caPool *
 	}

 	// We always want to conntrack since it is a faster operation
-	f.addConn(fp, incoming, now)
+	f.addConn(fp, incoming)

 	return nil
 }
@@ -505,11 +490,9 @@ func (f *Firewall) EmitStats() {
 	metrics.GetOrRegisterGauge("firewall.rules.hash", nil).Update(int64(f.GetRuleHashFNV()))
 }

-func (f *Firewall) inConns(fp firewall.Packet, h *HostInfo, caPool *cert.CAPool, localCache firewall.ConntrackCache, now time.Time) bool {
-	if localCache != nil {
-		if _, ok := localCache[fp]; ok {
-			return true
-		}
+func (f *Firewall) inConns(fp firewall.Packet, h *HostInfo, caPool *cert.CAPool, localCache *firewall.ConntrackCache) bool {
+	if localCache != nil && localCache.Has(fp) {
+		return true
 	}
 	conntrack := f.Conntrack
 	conntrack.Lock()
@@ -517,7 +500,7 @@ func (f *Firewall) inConns(fp firewall.Packet, h *HostInfo, caPool *cert.CAPool,
 	// Purge every time we test
 	ep, has := conntrack.TimerWheel.Purge()
 	if has {
-		f.evict(ep, now)
+		f.evict(ep)
 	}

 	c, ok := conntrack.Conns[fp]
@@ -564,23 +547,23 @@ func (f *Firewall) inConns(fp firewall.Packet, h *HostInfo, caPool *cert.CAPool,

 	switch fp.Protocol {
 	case firewall.ProtoTCP:
-		c.Expires = now.Add(f.TCPTimeout)
+		c.Expires = time.Now().Add(f.TCPTimeout)
 	case firewall.ProtoUDP:
-		c.Expires = now.Add(f.UDPTimeout)
+		c.Expires = time.Now().Add(f.UDPTimeout)
 	default:
-		c.Expires = now.Add(f.DefaultTimeout)
+		c.Expires = time.Now().Add(f.DefaultTimeout)
 	}

 	conntrack.Unlock()

 	if localCache != nil {
-		localCache[fp] = struct{}{}
+		localCache.Add(fp)
 	}

 	return true
 }

-func (f *Firewall) addConn(fp firewall.Packet, incoming bool, now time.Time) {
+func (f *Firewall) addConn(fp firewall.Packet, incoming bool) {
 	var timeout time.Duration
 	c := &conn{}

@@ -596,7 +579,7 @@ func (f *Firewall) addConn(fp firewall.Packet, incoming bool, now time.Time) {
 	conntrack := f.Conntrack
 	conntrack.Lock()
 	if _, ok := conntrack.Conns[fp]; !ok {
-		conntrack.TimerWheel.Advance(now)
+		conntrack.TimerWheel.Advance(time.Now())
 		conntrack.TimerWheel.Add(fp, timeout)
 	}

@@ -604,14 +587,14 @@ func (f *Firewall) addConn(fp firewall.Packet, incoming bool, now time.Time) {
 	// firewall reload
 	c.incoming = incoming
 	c.rulesVersion = f.rulesVersion
-	c.Expires = now.Add(timeout)
+	c.Expires = time.Now().Add(timeout)
 	conntrack.Conns[fp] = c
 	conntrack.Unlock()
 }

 // Evict checks if a conntrack entry has expired, if so it is removed, if not it is re-added to the wheel
 // Caller must own the connMutex lock!
-func (f *Firewall) evict(p firewall.Packet, now time.Time) {
+func (f *Firewall) evict(p firewall.Packet) {
 	// Are we still tracking this conn?
 	conntrack := f.Conntrack
 	t, ok := conntrack.Conns[p]
@@ -619,11 +602,11 @@ func (f *Firewall) evict(p firewall.Packet, now time.Time) {
 		return
 	}

-	newT := t.Expires.Sub(now)
+	newT := t.Expires.Sub(time.Now())

 	// Timeout is in the future, re-add the timer
 	if newT > 0 {
-		conntrack.TimerWheel.Advance(now)
+		conntrack.TimerWheel.Advance(time.Now())
 		conntrack.TimerWheel.Add(p, newT)
 		return
 	}
--- a/firewall/cache.go
+++ b/firewall/cache.go
@@ -1,6 +1,7 @@
 package firewall

 import (
+	"sync"
 	"sync/atomic"
 	"time"

@@ -9,13 +10,58 @@ import (

 // ConntrackCache is used as a local routine cache to know if a given flow
 // has been seen in the conntrack table.
-type ConntrackCache map[Packet]struct{}
+type ConntrackCache struct {
+	mu      sync.Mutex
+	entries map[Packet]struct{}
+}
+
+func newConntrackCache() *ConntrackCache {
+	return &ConntrackCache{entries: make(map[Packet]struct{})}
+}
+
+func (c *ConntrackCache) Has(p Packet) bool {
+	if c == nil {
+		return false
+	}
+	c.mu.Lock()
+	_, ok := c.entries[p]
+	c.mu.Unlock()
+	return ok
+}
+
+func (c *ConntrackCache) Add(p Packet) {
+	if c == nil {
+		return
+	}
+	c.mu.Lock()
+	c.entries[p] = struct{}{}
+	c.mu.Unlock()
+}
+
+func (c *ConntrackCache) Len() int {
+	if c == nil {
+		return 0
+	}
+	c.mu.Lock()
+	l := len(c.entries)
+	c.mu.Unlock()
+	return l
+}
+
+func (c *ConntrackCache) Reset(capHint int) {
+	if c == nil {
+		return
+	}
+	c.mu.Lock()
+	c.entries = make(map[Packet]struct{}, capHint)
+	c.mu.Unlock()
+}

 type ConntrackCacheTicker struct {
 	cacheV    uint64
 	cacheTick atomic.Uint64

-	cache ConntrackCache
+	cache *ConntrackCache
 }

 func NewConntrackCacheTicker(d time.Duration) *ConntrackCacheTicker {
@@ -23,9 +69,7 @@ func NewConntrackCacheTicker(d time.Duration) *ConntrackCacheTicker {
 		return nil
 	}

-	c := &ConntrackCacheTicker{
-		cache: ConntrackCache{},
-	}
+	c := &ConntrackCacheTicker{cache: newConntrackCache()}

 	go c.tick(d)

@@ -41,17 +85,17 @@ func (c *ConntrackCacheTicker) tick(d time.Duration) {

 // Get checks if the cache ticker has moved to the next version before returning
 // the map. If it has moved, we reset the map.
-func (c *ConntrackCacheTicker) Get(l *logrus.Logger) ConntrackCache {
+func (c *ConntrackCacheTicker) Get(l *logrus.Logger) *ConntrackCache {
 	if c == nil {
 		return nil
 	}
 	if tick := c.cacheTick.Load(); tick != c.cacheV {
 		c.cacheV = tick
-		if ll := len(c.cache); ll > 0 {
+		if ll := c.cache.Len(); ll > 0 {
 			if l.Level == logrus.DebugLevel {
 				l.WithField("len", ll).Debug("resetting conntrack cache")
 			}
-			c.cache = make(ConntrackCache, ll)
+			c.cache.Reset(ll)
 		}
 	}

--- a/firewall_test.go
+++ b/firewall_test.go
@@ -8,8 +8,6 @@ import (
 	"testing"
 	"time"

-	"github.com/gaissmai/bart"
-	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/cert"
 	"github.com/slackhq/nebula/config"
 	"github.com/slackhq/nebula/firewall"
@@ -151,8 +149,7 @@ func TestFirewall_Drop(t *testing.T) {
 	l := test.NewLogger()
 	ob := &bytes.Buffer{}
 	l.SetOutput(ob)
-	myVpnNetworksTable := new(bart.Lite)
-	myVpnNetworksTable.Insert(netip.MustParsePrefix("1.1.1.1/8"))
+
 	p := firewall.Packet{
 		LocalAddr:  netip.MustParseAddr("1.2.3.4"),
 		RemoteAddr: netip.MustParseAddr("1.2.3.4"),
@@ -177,7 +174,7 @@ func TestFirewall_Drop(t *testing.T) {
 		},
 		vpnAddrs: []netip.Addr{netip.MustParseAddr("1.2.3.4")},
 	}
-	h.buildNetworks(myVpnNetworksTable, &c)
+	h.buildNetworks(c.networks, c.unsafeNetworks)

 	fw := NewFirewall(l, time.Second, time.Minute, time.Hour, &c)
 	require.NoError(t, fw.AddRule(true, firewall.ProtoAny, 0, 0, []string{"any"}, "", netip.Prefix{}, netip.Prefix{}, "", ""))
@@ -229,9 +226,6 @@ func TestFirewall_DropV6(t *testing.T) {
 	ob := &bytes.Buffer{}
 	l.SetOutput(ob)

-	myVpnNetworksTable := new(bart.Lite)
-	myVpnNetworksTable.Insert(netip.MustParsePrefix("fd00::/7"))
-
 	p := firewall.Packet{
 		LocalAddr:  netip.MustParseAddr("fd12::34"),
 		RemoteAddr: netip.MustParseAddr("fd12::34"),
@@ -256,7 +250,7 @@ func TestFirewall_DropV6(t *testing.T) {
 		},
 		vpnAddrs: []netip.Addr{netip.MustParseAddr("fd12::34")},
 	}
-	h.buildNetworks(myVpnNetworksTable, &c)
+	h.buildNetworks(c.networks, c.unsafeNetworks)

 	fw := NewFirewall(l, time.Second, time.Minute, time.Hour, &c)
 	require.NoError(t, fw.AddRule(true, firewall.ProtoAny, 0, 0, []string{"any"}, "", netip.Prefix{}, netip.Prefix{}, "", ""))
@@ -459,8 +453,6 @@ func TestFirewall_Drop2(t *testing.T) {
 	l := test.NewLogger()
 	ob := &bytes.Buffer{}
 	l.SetOutput(ob)
-	myVpnNetworksTable := new(bart.Lite)
-	myVpnNetworksTable.Insert(netip.MustParsePrefix("1.1.1.1/8"))

 	p := firewall.Packet{
 		LocalAddr:  netip.MustParseAddr("1.2.3.4"),
@@ -486,7 +478,7 @@ func TestFirewall_Drop2(t *testing.T) {
 		},
 		vpnAddrs: []netip.Addr{network.Addr()},
 	}
-	h.buildNetworks(myVpnNetworksTable, c.Certificate)
+	h.buildNetworks(c.Certificate.Networks(), c.Certificate.UnsafeNetworks())

 	c1 := cert.CachedCertificate{
 		Certificate: &dummyCert{
@@ -501,7 +493,7 @@ func TestFirewall_Drop2(t *testing.T) {
 			peerCert: &c1,
 		},
 	}
-	h1.buildNetworks(myVpnNetworksTable, c1.Certificate)
+	h1.buildNetworks(c1.Certificate.Networks(), c1.Certificate.UnsafeNetworks())

 	fw := NewFirewall(l, time.Second, time.Minute, time.Hour, c.Certificate)
 	require.NoError(t, fw.AddRule(true, firewall.ProtoAny, 0, 0, []string{"default-group", "test-group"}, "", netip.Prefix{}, netip.Prefix{}, "", ""))
@@ -518,8 +510,6 @@ func TestFirewall_Drop3(t *testing.T) {
 	l := test.NewLogger()
 	ob := &bytes.Buffer{}
 	l.SetOutput(ob)
-	myVpnNetworksTable := new(bart.Lite)
-	myVpnNetworksTable.Insert(netip.MustParsePrefix("1.1.1.1/8"))

 	p := firewall.Packet{
 		LocalAddr:  netip.MustParseAddr("1.2.3.4"),
@@ -551,7 +541,7 @@ func TestFirewall_Drop3(t *testing.T) {
 		},
 		vpnAddrs: []netip.Addr{network.Addr()},
 	}
-	h1.buildNetworks(myVpnNetworksTable, c1.Certificate)
+	h1.buildNetworks(c1.Certificate.Networks(), c1.Certificate.UnsafeNetworks())

 	c2 := cert.CachedCertificate{
 		Certificate: &dummyCert{
@@ -566,7 +556,7 @@ func TestFirewall_Drop3(t *testing.T) {
 		},
 		vpnAddrs: []netip.Addr{network.Addr()},
 	}
-	h2.buildNetworks(myVpnNetworksTable, c2.Certificate)
+	h2.buildNetworks(c2.Certificate.Networks(), c2.Certificate.UnsafeNetworks())

 	c3 := cert.CachedCertificate{
 		Certificate: &dummyCert{
@@ -581,7 +571,7 @@ func TestFirewall_Drop3(t *testing.T) {
 		},
 		vpnAddrs: []netip.Addr{network.Addr()},
 	}
-	h3.buildNetworks(myVpnNetworksTable, c3.Certificate)
+	h3.buildNetworks(c3.Certificate.Networks(), c3.Certificate.UnsafeNetworks())

 	fw := NewFirewall(l, time.Second, time.Minute, time.Hour, c.Certificate)
 	require.NoError(t, fw.AddRule(true, firewall.ProtoAny, 1, 1, []string{}, "host1", netip.Prefix{}, netip.Prefix{}, "", ""))
@@ -607,8 +597,6 @@ func TestFirewall_Drop3V6(t *testing.T) {
 	l := test.NewLogger()
 	ob := &bytes.Buffer{}
 	l.SetOutput(ob)
-	myVpnNetworksTable := new(bart.Lite)
-	myVpnNetworksTable.Insert(netip.MustParsePrefix("fd00::/7"))

 	p := firewall.Packet{
 		LocalAddr:  netip.MustParseAddr("fd12::34"),
@@ -632,7 +620,7 @@ func TestFirewall_Drop3V6(t *testing.T) {
 		},
 		vpnAddrs: []netip.Addr{network.Addr()},
 	}
-	h.buildNetworks(myVpnNetworksTable, c.Certificate)
+	h.buildNetworks(c.Certificate.Networks(), c.Certificate.UnsafeNetworks())

 	// Test a remote address match
 	fw := NewFirewall(l, time.Second, time.Minute, time.Hour, c.Certificate)
@@ -645,8 +633,6 @@ func TestFirewall_DropConntrackReload(t *testing.T) {
 	l := test.NewLogger()
 	ob := &bytes.Buffer{}
 	l.SetOutput(ob)
-	myVpnNetworksTable := new(bart.Lite)
-	myVpnNetworksTable.Insert(netip.MustParsePrefix("1.1.1.1/8"))

 	p := firewall.Packet{
 		LocalAddr:  netip.MustParseAddr("1.2.3.4"),
@@ -673,7 +659,7 @@ func TestFirewall_DropConntrackReload(t *testing.T) {
 		},
 		vpnAddrs: []netip.Addr{network.Addr()},
 	}
-	h.buildNetworks(myVpnNetworksTable, c.Certificate)
+	h.buildNetworks(c.Certificate.Networks(), c.Certificate.UnsafeNetworks())

 	fw := NewFirewall(l, time.Second, time.Minute, time.Hour, c.Certificate)
 	require.NoError(t, fw.AddRule(true, firewall.ProtoAny, 0, 0, []string{"any"}, "", netip.Prefix{}, netip.Prefix{}, "", ""))
@@ -710,8 +696,6 @@ func TestFirewall_DropIPSpoofing(t *testing.T) {
 	l := test.NewLogger()
 	ob := &bytes.Buffer{}
 	l.SetOutput(ob)
-	myVpnNetworksTable := new(bart.Lite)
-	myVpnNetworksTable.Insert(netip.MustParsePrefix("192.0.2.1/24"))

 	c := cert.CachedCertificate{
 		Certificate: &dummyCert{
@@ -733,7 +717,7 @@ func TestFirewall_DropIPSpoofing(t *testing.T) {
 		},
 		vpnAddrs: []netip.Addr{c1.Certificate.Networks()[0].Addr()},
 	}
-	h1.buildNetworks(myVpnNetworksTable, c1.Certificate)
+	h1.buildNetworks(c1.Certificate.Networks(), c1.Certificate.UnsafeNetworks())

 	fw := NewFirewall(l, time.Second, time.Minute, time.Hour, c.Certificate)

@@ -1063,171 +1047,6 @@ func TestFirewall_convertRule(t *testing.T) {
 	assert.Equal(t, "group1", r.Group)
 }

-type testcase struct {
-	h   *HostInfo
-	p   firewall.Packet
-	c   cert.Certificate
-	err error
-}
-
-func (c *testcase) Test(t *testing.T, fw *Firewall) {
-	t.Helper()
-	cp := cert.NewCAPool()
-	resetConntrack(fw)
-	err := fw.Drop(c.p, true, c.h, cp, nil)
-	if c.err == nil {
-		require.NoError(t, err, "failed to not drop remote address %s", c.p.RemoteAddr)
-	} else {
-		require.ErrorIs(t, c.err, err, "failed to drop remote address %s", c.p.RemoteAddr)
-	}
-}
-
-func buildTestCase(setup testsetup, err error, theirPrefixes ...netip.Prefix) testcase {
-	c1 := dummyCert{
-		name:     "host1",
-		networks: theirPrefixes,
-		groups:   []string{"default-group"},
-		issuer:   "signer-shasum",
-	}
-	h := HostInfo{
-		ConnectionState: &ConnectionState{
-			peerCert: &cert.CachedCertificate{
-				Certificate:    &c1,
-				InvertedGroups: map[string]struct{}{"default-group": {}},
-			},
-		},
-		vpnAddrs: make([]netip.Addr, len(theirPrefixes)),
-	}
-	for i := range theirPrefixes {
-		h.vpnAddrs[i] = theirPrefixes[i].Addr()
-	}
-	h.buildNetworks(setup.myVpnNetworksTable, &c1)
-	p := firewall.Packet{
-		LocalAddr:  setup.c.Networks()[0].Addr(), //todo?
-		RemoteAddr: theirPrefixes[0].Addr(),
-		LocalPort:  10,
-		RemotePort: 90,
-		Protocol:   firewall.ProtoUDP,
-		Fragment:   false,
-	}
-	return testcase{
-		h:   &h,
-		p:   p,
-		c:   &c1,
-		err: err,
-	}
-}
-
-type testsetup struct {
-	c                  dummyCert
-	myVpnNetworksTable *bart.Lite
-	fw                 *Firewall
-}
-
-func newSetup(t *testing.T, l *logrus.Logger, myPrefixes ...netip.Prefix) testsetup {
-	c := dummyCert{
-		name:     "me",
-		networks: myPrefixes,
-		groups:   []string{"default-group"},
-		issuer:   "signer-shasum",
-	}
-
-	return newSetupFromCert(t, l, c)
-}
-
-func newSetupFromCert(t *testing.T, l *logrus.Logger, c dummyCert) testsetup {
-	myVpnNetworksTable := new(bart.Lite)
-	for _, prefix := range c.Networks() {
-		myVpnNetworksTable.Insert(prefix)
-	}
-	fw := NewFirewall(l, time.Second, time.Minute, time.Hour, &c)
-	require.NoError(t, fw.AddRule(true, firewall.ProtoAny, 0, 0, []string{"any"}, "", netip.Prefix{}, netip.Prefix{}, "", ""))
-
-	return testsetup{
-		c:                  c,
-		fw:                 fw,
-		myVpnNetworksTable: myVpnNetworksTable,
-	}
-}
-
-func TestFirewall_Drop_EnforceIPMatch(t *testing.T) {
-	t.Parallel()
-	l := test.NewLogger()
-	ob := &bytes.Buffer{}
-	l.SetOutput(ob)
-
-	myPrefix := netip.MustParsePrefix("1.1.1.1/8")
-	// for now, it's okay that these are all "incoming", the logic this test tries to check doesn't care about in/out
-	t.Run("allow inbound all matching", func(t *testing.T) {
-		t.Parallel()
-		setup := newSetup(t, l, myPrefix)
-		tc := buildTestCase(setup, nil, netip.MustParsePrefix("1.2.3.4/24"))
-		tc.Test(t, setup.fw)
-	})
-	t.Run("allow inbound local matching", func(t *testing.T) {
-		t.Parallel()
-		setup := newSetup(t, l, myPrefix)
-		tc := buildTestCase(setup, ErrInvalidLocalIP, netip.MustParsePrefix("1.2.3.4/24"))
-		tc.p.LocalAddr = netip.MustParseAddr("1.2.3.8")
-		tc.Test(t, setup.fw)
-	})
-	t.Run("block inbound remote mismatched", func(t *testing.T) {
-		t.Parallel()
-		setup := newSetup(t, l, myPrefix)
-		tc := buildTestCase(setup, ErrInvalidRemoteIP, netip.MustParsePrefix("1.2.3.4/24"))
-		tc.p.RemoteAddr = netip.MustParseAddr("9.9.9.9")
-		tc.Test(t, setup.fw)
-	})
-	t.Run("Block a vpn peer packet", func(t *testing.T) {
-		t.Parallel()
-		setup := newSetup(t, l, myPrefix)
-		tc := buildTestCase(setup, ErrPeerRejected, netip.MustParsePrefix("2.2.2.2/24"))
-		tc.Test(t, setup.fw)
-	})
-	twoPrefixes := []netip.Prefix{
-		netip.MustParsePrefix("1.2.3.4/24"), netip.MustParsePrefix("2.2.2.2/24"),
-	}
-	t.Run("allow inbound one matching", func(t *testing.T) {
-		t.Parallel()
-		setup := newSetup(t, l, myPrefix)
-		tc := buildTestCase(setup, nil, twoPrefixes...)
-		tc.Test(t, setup.fw)
-	})
-	t.Run("block inbound multimismatch", func(t *testing.T) {
-		t.Parallel()
-		setup := newSetup(t, l, myPrefix)
-		tc := buildTestCase(setup, ErrInvalidRemoteIP, twoPrefixes...)
-		tc.p.RemoteAddr = netip.MustParseAddr("9.9.9.9")
-		tc.Test(t, setup.fw)
-	})
-	t.Run("allow inbound 2nd one matching", func(t *testing.T) {
-		t.Parallel()
-		setup2 := newSetup(t, l, netip.MustParsePrefix("2.2.2.1/24"))
-		tc := buildTestCase(setup2, nil, twoPrefixes...)
-		tc.p.RemoteAddr = twoPrefixes[1].Addr()
-		tc.Test(t, setup2.fw)
-	})
-	t.Run("allow inbound unsafe route", func(t *testing.T) {
-		t.Parallel()
-		unsafePrefix := netip.MustParsePrefix("192.168.0.0/24")
-		c := dummyCert{
-			name:           "me",
-			networks:       []netip.Prefix{myPrefix},
-			unsafeNetworks: []netip.Prefix{unsafePrefix},
-			groups:         []string{"default-group"},
-			issuer:         "signer-shasum",
-		}
-		unsafeSetup := newSetupFromCert(t, l, c)
-		tc := buildTestCase(unsafeSetup, nil, twoPrefixes...)
-		tc.p.LocalAddr = netip.MustParseAddr("192.168.0.3")
-		tc.err = ErrNoMatchingRule
-		tc.Test(t, unsafeSetup.fw) //should hit firewall and bounce off
-		require.NoError(t, unsafeSetup.fw.AddRule(true, firewall.ProtoAny, 0, 0, []string{"any"}, "", netip.Prefix{}, unsafePrefix, "", ""))
-		tc.err = nil
-		tc.Test(t, unsafeSetup.fw) //should pass
-	})
-}
-
 type addRuleCall struct {
 	incoming  bool
 	proto     uint8
--- a/go.mod
+++ b/go.mod
@@ -6,6 +6,7 @@ require (
 	dario.cat/mergo v1.0.2
 	github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be
 	github.com/armon/go-radix v1.0.0
+	github.com/cilium/ebpf v0.12.3
 	github.com/cyberdelia/go-metrics-graphite v0.0.0-20161219230853-39f87cc3b432
 	github.com/flynn/noise v1.1.0
 	github.com/gaissmai/bart v0.25.0
@@ -22,17 +23,16 @@ require (
 	github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6
 	github.com/stretchr/testify v1.11.1
 	github.com/vishvananda/netlink v1.3.1
-	go.yaml.in/yaml/v3 v3.0.4
-	golang.org/x/crypto v0.44.0
+	golang.org/x/crypto v0.43.0
 	golang.org/x/exp v0.0.0-20230725093048-515e97ebf090
-	golang.org/x/net v0.46.0
-	golang.org/x/sync v0.18.0
-	golang.org/x/sys v0.38.0
-	golang.org/x/term v0.37.0
+	golang.org/x/net v0.45.0
+	golang.org/x/sync v0.17.0
+	golang.org/x/sys v0.37.0
+	golang.org/x/term v0.36.0
 	golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2
 	golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b
 	golang.zx2c4.com/wireguard/windows v0.5.3
-	google.golang.org/protobuf v1.36.10
+	google.golang.org/protobuf v1.36.8
 	gopkg.in/yaml.v3 v3.0.1
 	gvisor.dev/gvisor v0.0.0-20240423190808-9d7a357edefe
 )
@@ -50,6 +50,6 @@ require (
 	github.com/vishvananda/netns v0.0.5 // indirect
 	go.yaml.in/yaml/v2 v2.4.2 // indirect
 	golang.org/x/mod v0.24.0 // indirect
-	golang.org/x/time v0.7.0 // indirect
+	golang.org/x/time v0.5.0 // indirect
 	golang.org/x/tools v0.33.0 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -17,6 +17,8 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r
 github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
 github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cilium/ebpf v0.12.3 h1:8ht6F9MquybnY97at+VDZb3eQQr8ev79RueWeVaEcG4=
+github.com/cilium/ebpf v0.12.3/go.mod h1:TctK1ivibvI3znr66ljgi4hqOT8EYQjz1KWBfb1UVgM=
 github.com/cyberdelia/go-metrics-graphite v0.0.0-20161219230853-39f87cc3b432 h1:M5QgkYacWj0Xs8MhpIK/5uwU02icXpEoSo9sM2aRCps=
 github.com/cyberdelia/go-metrics-graphite v0.0.0-20161219230853-39f87cc3b432/go.mod h1:xwIwAxMvYnVrGJPe2FKx5prTrnAjGOD8zvDOnxnrrkM=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -24,6 +26,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/flynn/noise v1.1.0 h1:KjPQoQCEFdZDiP03phOvGi11+SVVhBG2wOWAorLsstg=
 github.com/flynn/noise v1.1.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag=
+github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA=
+github.com/frankban/quicktest v1.14.5/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
 github.com/gaissmai/bart v0.25.0 h1:eqiokVPqM3F94vJ0bTHXHtH91S8zkKL+bKh+BsGOsJM=
 github.com/gaissmai/bart v0.25.0/go.mod h1:GREWQfTLRWz/c5FTOsIw+KkscuFkIV5t8Rp7Nd1Td5c=
 github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
@@ -78,8 +82,9 @@ github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfn
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
 github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
@@ -155,15 +160,13 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
 go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
 go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
 go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
-go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
-go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
 golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
-golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU=
-golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc=
+golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
+golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
 golang.org/x/exp v0.0.0-20230725093048-515e97ebf090 h1:Di6/M8l0O2lCLc6VVRWhgCiApHV8MnQurBnFSHsQtNY=
 golang.org/x/exp v0.0.0-20230725093048-515e97ebf090/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc=
 golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
@@ -182,8 +185,8 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL
 golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
-golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
+golang.org/x/net v0.45.0 h1:RLBg5JKixCy82FtLJpeNlVM0nrSqpCRYzVU1n8kj0tM=
+golang.org/x/net v0.45.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -191,8 +194,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
-golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
+golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
 golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -209,16 +212,16 @@ golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
-golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
+golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
-golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
-golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
+golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q=
+golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
-golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ=
-golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
+golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
+golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
@@ -244,8 +247,8 @@ google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miE
 google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
-google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
-google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
+google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
+google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
 gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
--- a/handshake_ix.go
+++ b/handshake_ix.go
@@ -2,6 +2,7 @@ package nebula

 import (
 	"net/netip"
+	"slices"
 	"time"

 	"github.com/flynn/noise"
@@ -191,17 +192,17 @@ func ixHandshakeStage1(f *Interface, addr netip.AddrPort, via *ViaSender, packet
 		return
 	}

+	var vpnAddrs []netip.Addr
+	var filteredNetworks []netip.Prefix
 	certName := remoteCert.Certificate.Name()
 	certVersion := remoteCert.Certificate.Version()
 	fingerprint := remoteCert.Fingerprint
 	issuer := remoteCert.Certificate.Issuer()
-	vpnNetworks := remoteCert.Certificate.Networks()

-	anyVpnAddrsInCommon := false
-	vpnAddrs := make([]netip.Addr, len(vpnNetworks))
-	for i, network := range vpnNetworks {
-		if f.myVpnAddrsTable.Contains(network.Addr()) {
-			f.l.WithField("vpnNetworks", vpnNetworks).WithField("udpAddr", addr).
+	for _, network := range remoteCert.Certificate.Networks() {
+		vpnAddr := network.Addr()
+		if f.myVpnAddrsTable.Contains(vpnAddr) {
+			f.l.WithField("vpnAddr", vpnAddr).WithField("udpAddr", addr).
 				WithField("certName", certName).
 				WithField("certVersion", certVersion).
 				WithField("fingerprint", fingerprint).
@@ -209,10 +210,24 @@ func ixHandshakeStage1(f *Interface, addr netip.AddrPort, via *ViaSender, packet
 				WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("Refusing to handshake with myself")
 			return
 		}
-		vpnAddrs[i] = network.Addr()
-		if f.myVpnNetworksTable.Contains(network.Addr()) {
-			anyVpnAddrsInCommon = true
+
+		// vpnAddrs outside our vpn networks are of no use to us, filter them out
+		if !f.myVpnNetworksTable.Contains(vpnAddr) {
+			continue
 		}
+
+		filteredNetworks = append(filteredNetworks, network)
+		vpnAddrs = append(vpnAddrs, vpnAddr)
+	}
+
+	if len(vpnAddrs) == 0 {
+		f.l.WithError(err).WithField("udpAddr", addr).
+			WithField("certName", certName).
+			WithField("certVersion", certVersion).
+			WithField("fingerprint", fingerprint).
+			WithField("issuer", issuer).
+			WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).Error("No usable vpn addresses from host, refusing handshake")
+		return
 	}

 	if addr.IsValid() {
@@ -249,30 +264,26 @@ func ixHandshakeStage1(f *Interface, addr netip.AddrPort, via *ViaSender, packet
 		},
 	}

-	msgRxL := f.l.WithFields(m{
-		"vpnAddrs":       vpnAddrs,
-		"udpAddr":        addr,
-		"certName":       certName,
-		"certVersion":    certVersion,
-		"fingerprint":    fingerprint,
-		"issuer":         issuer,
-		"initiatorIndex": hs.Details.InitiatorIndex,
-		"responderIndex": hs.Details.ResponderIndex,
-		"remoteIndex":    h.RemoteIndex,
-		"handshake":      m{"stage": 1, "style": "ix_psk0"},
-	})
-
-	if anyVpnAddrsInCommon {
-		msgRxL.Info("Handshake message received")
-	} else {
-		//todo warn if not lighthouse or relay?
-		msgRxL.Info("Handshake message received, but no vpnNetworks in common.")
-	}
+	f.l.WithField("vpnAddrs", vpnAddrs).WithField("udpAddr", addr).
+		WithField("certName", certName).
+		WithField("certVersion", certVersion).
+		WithField("fingerprint", fingerprint).
+		WithField("issuer", issuer).
+		WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
+		WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
+		Info("Handshake message received")

 	hs.Details.ResponderIndex = myIndex
 	hs.Details.Cert = cs.getHandshakeBytes(ci.myCert.Version())
 	if hs.Details.Cert == nil {
-		msgRxL.WithField("myCertVersion", ci.myCert.Version()).
+		f.l.WithField("vpnAddrs", vpnAddrs).WithField("udpAddr", addr).
+			WithField("certName", certName).
+			WithField("certVersion", certVersion).
+			WithField("fingerprint", fingerprint).
+			WithField("issuer", issuer).
+			WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
+			WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).
+			WithField("certVersion", ci.myCert.Version()).
 			Error("Unable to handshake with host because no certificate handshake bytes is available")
 		return
 	}
@@ -330,7 +341,7 @@ func ixHandshakeStage1(f *Interface, addr netip.AddrPort, via *ViaSender, packet

 	hostinfo.remotes = f.lightHouse.QueryCache(vpnAddrs)
 	hostinfo.SetRemote(addr)
-	hostinfo.buildNetworks(f.myVpnNetworksTable, remoteCert.Certificate)
+	hostinfo.buildNetworks(filteredNetworks, remoteCert.Certificate.UnsafeNetworks())

 	existing, err := f.handshakeManager.CheckAndComplete(hostinfo, 0, f)
 	if err != nil {
@@ -571,22 +582,31 @@ func ixHandshakeStage2(f *Interface, addr netip.AddrPort, via *ViaSender, hh *Ha
 		hostinfo.relayState.InsertRelayTo(via.relayHI.vpnAddrs[0])
 	}

-	correctHostResponded := false
-	anyVpnAddrsInCommon := false
-	vpnAddrs := make([]netip.Addr, len(vpnNetworks))
-	for i, network := range vpnNetworks {
-		vpnAddrs[i] = network.Addr()
-		if f.myVpnNetworksTable.Contains(network.Addr()) {
-			anyVpnAddrsInCommon = true
-		}
-		if hostinfo.vpnAddrs[0] == network.Addr() {
-			// todo is it more correct to see if any of hostinfo.vpnAddrs are in the cert? it should have len==1, but one day it might not?
-			correctHostResponded = true
+	var vpnAddrs []netip.Addr
+	var filteredNetworks []netip.Prefix
+	for _, network := range vpnNetworks {
+		// vpnAddrs outside our vpn networks are of no use to us, filter them out
+		vpnAddr := network.Addr()
+		if !f.myVpnNetworksTable.Contains(vpnAddr) {
+			continue
 		}
+
+		filteredNetworks = append(filteredNetworks, network)
+		vpnAddrs = append(vpnAddrs, vpnAddr)
+	}
+
+	if len(vpnAddrs) == 0 {
+		f.l.WithError(err).WithField("udpAddr", addr).
+			WithField("certName", certName).
+			WithField("certVersion", certVersion).
+			WithField("fingerprint", fingerprint).
+			WithField("issuer", issuer).
+			WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).Error("No usable vpn addresses from host, refusing handshake")
+		return true
 	}

 	// Ensure the right host responded
-	if !correctHostResponded {
+	if !slices.Contains(vpnAddrs, hostinfo.vpnAddrs[0]) {
 		f.l.WithField("intendedVpnAddrs", hostinfo.vpnAddrs).WithField("haveVpnNetworks", vpnNetworks).
 			WithField("udpAddr", addr).
 			WithField("certName", certName).
@@ -598,7 +618,6 @@ func ixHandshakeStage2(f *Interface, addr netip.AddrPort, via *ViaSender, hh *Ha
 		f.handshakeManager.DeleteHostInfo(hostinfo)

 		// Create a new hostinfo/handshake for the intended vpn ip
-		//TODO is hostinfo.vpnAddrs[0] always the address to use?
 		f.handshakeManager.StartHandshake(hostinfo.vpnAddrs[0], func(newHH *HandshakeHostInfo) {
 			// Block the current used address
 			newHH.hostinfo.remotes = hostinfo.remotes
@@ -625,7 +644,7 @@ func ixHandshakeStage2(f *Interface, addr netip.AddrPort, via *ViaSender, hh *Ha
 	ci.window.Update(f.l, 2)

 	duration := time.Since(hh.startTime).Nanoseconds()
-	msgRxL := f.l.WithField("vpnAddrs", vpnAddrs).WithField("udpAddr", addr).
+	f.l.WithField("vpnAddrs", vpnAddrs).WithField("udpAddr", addr).
 		WithField("certName", certName).
 		WithField("certVersion", certVersion).
 		WithField("fingerprint", fingerprint).
@@ -633,17 +652,12 @@ func ixHandshakeStage2(f *Interface, addr netip.AddrPort, via *ViaSender, hh *Ha
 		WithField("initiatorIndex", hs.Details.InitiatorIndex).WithField("responderIndex", hs.Details.ResponderIndex).
 		WithField("remoteIndex", h.RemoteIndex).WithField("handshake", m{"stage": 2, "style": "ix_psk0"}).
 		WithField("durationNs", duration).
-		WithField("sentCachedPackets", len(hh.packetStore))
-	if anyVpnAddrsInCommon {
-		msgRxL.Info("Handshake message received")
-	} else {
-		//todo warn if not lighthouse or relay?
-		msgRxL.Info("Handshake message received, but no vpnNetworks in common.")
-	}
+		WithField("sentCachedPackets", len(hh.packetStore)).
+		Info("Handshake message received")

 	// Build up the radix for the firewall if we have subnets in the cert
 	hostinfo.vpnAddrs = vpnAddrs
-	hostinfo.buildNetworks(f.myVpnNetworksTable, remoteCert.Certificate)
+	hostinfo.buildNetworks(filteredNetworks, remoteCert.Certificate.UnsafeNetworks())

 	// Complete our handshake and update metrics, this will replace any existing tunnels for the vpnAddrs here
 	f.handshakeManager.Complete(hostinfo, f)
--- a/handshake_manager.go
+++ b/handshake_manager.go
@@ -269,12 +269,12 @@ func (hm *HandshakeManager) handleOutbound(vpnIp netip.Addr, lighthouseTriggered
 		hostinfo.logger(hm.l).WithField("relays", hostinfo.remotes.relays).Info("Attempt to relay through hosts")
 		// Send a RelayRequest to all known Relay IP's
 		for _, relay := range hostinfo.remotes.relays {
-			// Don't relay through the host I'm trying to connect to
+			// Don't relay to myself
 			if relay == vpnIp {
 				continue
 			}

-			// Don't relay to myself
+			// Don't relay through the host I'm trying to connect to
 			if hm.f.myVpnAddrsTable.Contains(relay) {
 				continue
 			}
--- a/hostmap.go
+++ b/hostmap.go
@@ -212,18 +212,6 @@ func (rs *RelayState) InsertRelay(ip netip.Addr, idx uint32, r *Relay) {
 	rs.relayForByIdx[idx] = r
 }

-type NetworkType uint8
-
-const (
-	NetworkTypeUnknown NetworkType = iota
-	// NetworkTypeVPN is a network that overlaps one or more of the vpnNetworks in our certificate
-	NetworkTypeVPN
-	// NetworkTypeVPNPeer is a network that does not overlap one of our networks
-	NetworkTypeVPNPeer
-	// NetworkTypeUnsafe is a network from Certificate.UnsafeNetworks()
-	NetworkTypeUnsafe
-)
-
 type HostInfo struct {
 	remote          netip.AddrPort
 	remotes         *RemoteList
@@ -237,8 +225,8 @@ type HostInfo struct {
 	// vpn networks but were removed because they are not usable
 	vpnAddrs []netip.Addr

-	// networks is a combination of specific vpn addresses (not prefixes!) and full unsafe networks assigned to this host.
-	networks   *bart.Table[NetworkType]
+	// networks are both all vpn and unsafe networks assigned to this host
+	networks   *bart.Lite
 	relayState RelayState

 	// HandshakePacket records the packets used to create this hostinfo
@@ -742,26 +730,20 @@ func (i *HostInfo) SetRemoteIfPreferred(hm *HostMap, newRemote netip.AddrPort) b
 	return false
 }

-// buildNetworks fills in the networks field of HostInfo. It accepts a cert.Certificate so you never ever mix the network types up.
-func (i *HostInfo) buildNetworks(myVpnNetworksTable *bart.Lite, c cert.Certificate) {
-	if len(c.Networks()) == 1 && len(c.UnsafeNetworks()) == 0 {
-		if myVpnNetworksTable.Contains(c.Networks()[0].Addr()) {
-			return // Simple case, no BART needed
-		}
+func (i *HostInfo) buildNetworks(networks, unsafeNetworks []netip.Prefix) {
+	if len(networks) == 1 && len(unsafeNetworks) == 0 {
+		// Simple case, no CIDRTree needed
+		return
 	}

-	i.networks = new(bart.Table[NetworkType])
-	for _, network := range c.Networks() {
+	i.networks = new(bart.Lite)
+	for _, network := range networks {
 		nprefix := netip.PrefixFrom(network.Addr(), network.Addr().BitLen())
-		if myVpnNetworksTable.Contains(network.Addr()) {
-			i.networks.Insert(nprefix, NetworkTypeVPN)
-		} else {
-			i.networks.Insert(nprefix, NetworkTypeVPNPeer)
-		}
+		i.networks.Insert(nprefix)
 	}

-	for _, network := range c.UnsafeNetworks() {
-		i.networks.Insert(network, NetworkTypeUnsafe)
+	for _, network := range unsafeNetworks {
+		i.networks.Insert(network)
 	}
 }

--- a/inside.go
+++ b/inside.go
@@ -2,18 +2,18 @@ package nebula

 import (
 	"net/netip"
-	"time"
+	"unsafe"

 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/firewall"
 	"github.com/slackhq/nebula/header"
 	"github.com/slackhq/nebula/iputil"
 	"github.com/slackhq/nebula/noiseutil"
-	"github.com/slackhq/nebula/packet"
+	"github.com/slackhq/nebula/overlay"
 	"github.com/slackhq/nebula/routing"
 )

-func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet, nb []byte, out *packet.Packet, q int, localCache firewall.ConntrackCache, now time.Time) {
+func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet, nb, out []byte, q int, localCache *firewall.ConntrackCache) {
 	err := newPacket(packet, false, fwPacket)
 	if err != nil {
 		if f.l.Level >= logrus.DebugLevel {
@@ -55,7 +55,7 @@ func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet
 	})

 	if hostinfo == nil {
-		f.rejectInside(packet, out.Payload, q) //todo vector?
+		f.rejectInside(packet, out, q)
 		if f.l.Level >= logrus.DebugLevel {
 			f.l.WithField("vpnAddr", fwPacket.RemoteAddr).
 				WithField("fwPacket", fwPacket).
@@ -68,11 +68,12 @@ func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet
 		return
 	}

-	dropReason := f.firewall.Drop(*fwPacket, false, hostinfo, f.pki.GetCAPool(), localCache, now)
+	dropReason := f.firewall.Drop(*fwPacket, false, hostinfo, f.pki.GetCAPool(), localCache)
 	if dropReason == nil {
-		f.sendNoMetricsDelayed(header.Message, 0, hostinfo.ConnectionState, hostinfo, netip.AddrPort{}, packet, nb, out, q)
+		f.sendNoMetrics(header.Message, 0, hostinfo.ConnectionState, hostinfo, netip.AddrPort{}, packet, nb, out, q)
+
 	} else {
-		f.rejectInside(packet, out.Payload, q) //todo vector?
+		f.rejectInside(packet, out, q)
 		if f.l.Level >= logrus.DebugLevel {
 			hostinfo.logger(f.l).
 				WithField("fwPacket", fwPacket).
@@ -121,10 +122,9 @@ func (f *Interface) rejectOutside(packet []byte, ci *ConnectionState, hostinfo *
 	f.sendNoMetrics(header.Message, 0, ci, hostinfo, netip.AddrPort{}, out, nb, packet, q)
 }

-// Handshake will attempt to initiate a tunnel with the provided vpn address. This is a no-op if the tunnel is already established or being established
-// it does not check if it is within our vpn networks!
+// Handshake will attempt to initiate a tunnel with the provided vpn address if it is within our vpn networks. This is a no-op if the tunnel is already established or being established
 func (f *Interface) Handshake(vpnAddr netip.Addr) {
-	f.handshakeManager.GetOrHandshake(vpnAddr, nil)
+	f.getOrHandshakeNoRouting(vpnAddr, nil)
 }

 // getOrHandshakeNoRouting returns nil if the vpnAddr is not routable.
@@ -140,6 +140,7 @@ func (f *Interface) getOrHandshakeNoRouting(vpnAddr netip.Addr, cacheCallback fu
 // getOrHandshakeConsiderRouting will try to find the HostInfo to handle this packet, starting a handshake if necessary.
 // If the 2nd return var is false then the hostinfo is not ready to be used in a tunnel.
 func (f *Interface) getOrHandshakeConsiderRouting(fwPacket *firewall.Packet, cacheCallback func(*HandshakeHostInfo)) (*HostInfo, bool) {
+
 	destinationAddr := fwPacket.RemoteAddr

 	hostinfo, ready := f.getOrHandshakeNoRouting(destinationAddr, cacheCallback)
@@ -219,7 +220,7 @@ func (f *Interface) sendMessageNow(t header.MessageType, st header.MessageSubTyp
 	}

 	// check if packet is in outbound fw rules
-	dropReason := f.firewall.Drop(*fp, false, hostinfo, f.pki.GetCAPool(), nil, time.Now())
+	dropReason := f.firewall.Drop(*fp, false, hostinfo, f.pki.GetCAPool(), nil)
 	if dropReason != nil {
 		if f.l.Level >= logrus.DebugLevel {
 			f.l.WithField("fwPacket", fp).
@@ -232,10 +233,9 @@ func (f *Interface) sendMessageNow(t header.MessageType, st header.MessageSubTyp
 	f.sendNoMetrics(header.Message, st, hostinfo.ConnectionState, hostinfo, netip.AddrPort{}, p, nb, out, 0)
 }

-// SendMessageToVpnAddr handles real addr:port lookup and sends to the current best known address for vpnAddr.
-// This function ignores myVpnNetworksTable, and will always attempt to treat the address as a vpnAddr
+// SendMessageToVpnAddr handles real addr:port lookup and sends to the current best known address for vpnAddr
 func (f *Interface) SendMessageToVpnAddr(t header.MessageType, st header.MessageSubType, vpnAddr netip.Addr, p, nb, out []byte) {
-	hostInfo, ready := f.handshakeManager.GetOrHandshake(vpnAddr, func(hh *HandshakeHostInfo) {
+	hostInfo, ready := f.getOrHandshakeNoRouting(vpnAddr, func(hh *HandshakeHostInfo) {
 		hh.cachePacket(f.l, t, st, p, f.SendMessageToHostInfo, f.cachedPacketMetrics)
 	})

@@ -337,9 +337,21 @@ func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType
 	if ci.eKey == nil {
 		return
 	}
-	useRelay := !remote.IsValid() && !hostinfo.remote.IsValid()
+	target := remote
+	if !target.IsValid() {
+		target = hostinfo.remote
+	}
+	useRelay := !target.IsValid()
 	fullOut := out

+	var pkt *overlay.Packet
+	if !useRelay && f.batches.Enabled() {
+		pkt = f.batches.newPacket()
+		if pkt != nil {
+			out = pkt.Payload()[:0]
+		}
+	}
+
 	if useRelay {
 		if len(out) < header.Len {
 			// out always has a capacity of mtu, but not always a length greater than the header.Len.
@@ -373,119 +385,85 @@ func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType
 	}

 	var err error
+	if len(p) > 0 && slicesOverlap(out, p) {
+		tmp := make([]byte, len(p))
+		copy(tmp, p)
+		p = tmp
+	}
 	out, err = ci.eKey.EncryptDanger(out, out, p, c, nb)
 	if noiseutil.EncryptLockNeeded {
 		ci.writeLock.Unlock()
 	}
 	if err != nil {
+		if pkt != nil {
+			pkt.Release()
+		}
 		hostinfo.logger(f.l).WithError(err).
-			WithField("udpAddr", remote).WithField("counter", c).
+			WithField("udpAddr", target).WithField("counter", c).
 			WithField("attemptedCounter", c).
 			Error("Failed to encrypt outgoing packet")
 		return
 	}

-	if remote.IsValid() {
-		err = f.writers[q].WriteTo(out, remote)
-		if err != nil {
-			hostinfo.logger(f.l).WithError(err).
-				WithField("udpAddr", remote).Error("Failed to write outgoing packet")
-		}
-	} else if hostinfo.remote.IsValid() {
-		err = f.writers[q].WriteTo(out, hostinfo.remote)
-		if err != nil {
-			hostinfo.logger(f.l).WithError(err).
-				WithField("udpAddr", remote).Error("Failed to write outgoing packet")
-		}
-	} else {
-		// Try to send via a relay
-		for _, relayIP := range hostinfo.relayState.CopyRelayIps() {
-			relayHostInfo, relay, err := f.hostMap.QueryVpnAddrsRelayFor(hostinfo.vpnAddrs, relayIP)
-			if err != nil {
-				hostinfo.relayState.DeleteRelay(relayIP)
-				hostinfo.logger(f.l).WithField("relay", relayIP).WithError(err).Info("sendNoMetrics failed to find HostInfo")
-				continue
+	if target.IsValid() {
+		if pkt != nil {
+			pkt.Len = len(out)
+			if f.l.Level >= logrus.DebugLevel {
+				f.l.WithFields(logrus.Fields{
+					"queue":        q,
+					"dest":         target,
+					"payload_len":  pkt.Len,
+					"use_batches":  true,
+					"remote_index": hostinfo.remoteIndexId,
+				}).Debug("enqueueing packet to UDP batch queue")
 			}
-			f.SendVia(relayHostInfo, relay, out, nb, fullOut[:header.Len+len(out)], true)
-			break
+			if f.tryQueuePacket(q, pkt, target) {
+				return
+			}
+			if f.l.Level >= logrus.DebugLevel {
+				f.l.WithFields(logrus.Fields{
+					"queue": q,
+					"dest":  target,
+				}).Debug("failed to enqueue packet; falling back to immediate send")
+			}
+			f.writeImmediatePacket(q, pkt, target, hostinfo)
+			return
 		}
-	}
-}
-
-func (f *Interface) sendNoMetricsDelayed(t header.MessageType, st header.MessageSubType, ci *ConnectionState, hostinfo *HostInfo, remote netip.AddrPort, p, nb []byte, out *packet.Packet, q int) {
-	if ci.eKey == nil {
-		return
-	}
-	useRelay := !remote.IsValid() && !hostinfo.remote.IsValid()
-	fullOut := out.Payload
-
-	if useRelay {
-		if len(out.Payload) < header.Len {
-			// out always has a capacity of mtu, but not always a length greater than the header.Len.
-			// Grow it to make sure the next operation works.
-			out.Payload = out.Payload[:header.Len]
+		if f.tryQueueDatagram(q, out, target) {
+			return
 		}
-		// Save a header's worth of data at the front of the 'out' buffer.
-		out.Payload = out.Payload[header.Len:]
-	}
-
-	if noiseutil.EncryptLockNeeded {
-		// NOTE: for goboring AESGCMTLS we need to lock because of the nonce check
-		ci.writeLock.Lock()
-	}
-	c := ci.messageCounter.Add(1)
-
-	//l.WithField("trace", string(debug.Stack())).Error("out Header ", &Header{Version, t, st, 0, hostinfo.remoteIndexId, c}, p)
-	out.Payload = header.Encode(out.Payload, header.Version, t, st, hostinfo.remoteIndexId, c)
-	f.connectionManager.Out(hostinfo)
-
-	// Query our LH if we haven't since the last time we've been rebound, this will cause the remote to punch against
-	// all our addrs and enable a faster roaming.
-	if t != header.CloseTunnel && hostinfo.lastRebindCount != f.rebindCount {
-		//NOTE: there is an update hole if a tunnel isn't used and exactly 256 rebinds occur before the tunnel is
-		// finally used again. This tunnel would eventually be torn down and recreated if this action didn't help.
-		f.lightHouse.QueryServer(hostinfo.vpnAddrs[0])
-		hostinfo.lastRebindCount = f.rebindCount
-		if f.l.Level >= logrus.DebugLevel {
-			f.l.WithField("vpnAddrs", hostinfo.vpnAddrs).Debug("Lighthouse update triggered for punch due to rebind counter")
-		}
-	}
-
-	var err error
-	out.Payload, err = ci.eKey.EncryptDanger(out.Payload, out.Payload, p, c, nb)
-	if noiseutil.EncryptLockNeeded {
-		ci.writeLock.Unlock()
-	}
-	if err != nil {
-		hostinfo.logger(f.l).WithError(err).
-			WithField("udpAddr", remote).WithField("counter", c).
-			WithField("attemptedCounter", c).
-			Error("Failed to encrypt outgoing packet")
+		f.writeImmediate(q, out, target, hostinfo)
 		return
 	}

-	if remote.IsValid() {
-		err = f.writers[q].Prep(out, remote)
+	// fall back to relay path
+	if pkt != nil {
+		pkt.Release()
+	}
+
+	// Try to send via a relay
+	for _, relayIP := range hostinfo.relayState.CopyRelayIps() {
+		relayHostInfo, relay, err := f.hostMap.QueryVpnAddrsRelayFor(hostinfo.vpnAddrs, relayIP)
 		if err != nil {
-			hostinfo.logger(f.l).WithError(err).WithField("udpAddr", remote).Error("Failed to write outgoing packet")
-		}
-	} else if hostinfo.remote.IsValid() {
-		err = f.writers[q].Prep(out, hostinfo.remote)
-		if err != nil {
-			hostinfo.logger(f.l).WithError(err).WithField("udpAddr", remote).Error("Failed to write outgoing packet")
-		}
-	} else {
-		// Try to send via a relay
-		for _, relayIP := range hostinfo.relayState.CopyRelayIps() {
-			relayHostInfo, relay, err := f.hostMap.QueryVpnAddrsRelayFor(hostinfo.vpnAddrs, relayIP)
-			if err != nil {
-				hostinfo.relayState.DeleteRelay(relayIP)
-				hostinfo.logger(f.l).WithField("relay", relayIP).WithError(err).Info("sendNoMetrics failed to find HostInfo")
-				continue
-			}
-			//todo vector!!
-			f.SendVia(relayHostInfo, relay, out.Payload, nb, fullOut[:header.Len+len(out.Payload)], true)
-			break
+			hostinfo.relayState.DeleteRelay(relayIP)
+			hostinfo.logger(f.l).WithField("relay", relayIP).WithError(err).Info("sendNoMetrics failed to find HostInfo")
+			continue
 		}
+		f.SendVia(relayHostInfo, relay, out, nb, fullOut[:header.Len+len(out)], true)
+		break
 	}
 }
+
+// slicesOverlap reports whether the two byte slices share any portion of memory.
+// cipher.AEAD.Seal requires plaintext and dst to live in disjoint regions.
+func slicesOverlap(a, b []byte) bool {
+	if len(a) == 0 || len(b) == 0 {
+		return false
+	}
+
+	aStart := uintptr(unsafe.Pointer(&a[0]))
+	aEnd := aStart + uintptr(len(a))
+	bStart := uintptr(unsafe.Pointer(&b[0]))
+	bEnd := bStart + uintptr(len(b))
+	return aStart < bEnd && bStart < aEnd
+}
--- a/interface.go
+++ b/interface.go
@@ -4,9 +4,11 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"io"
 	"net/netip"
 	"os"
 	"runtime"
+	"strings"
 	"sync/atomic"
 	"time"

@@ -17,12 +19,16 @@ import (
 	"github.com/slackhq/nebula/firewall"
 	"github.com/slackhq/nebula/header"
 	"github.com/slackhq/nebula/overlay"
-	"github.com/slackhq/nebula/packet"
 	"github.com/slackhq/nebula/udp"
 )

-const mtu = 9001
-const batch = 1024 //todo config!
+const (
+	mtu                          = 9001
+	defaultGSOFlushInterval      = 150 * time.Microsecond
+	defaultBatchQueueDepthFactor = 4
+	defaultGSOMaxSegments        = 8
+	maxKernelGSOSegments         = 64
+)

 type InterfaceConfig struct {
 	HostMap            *HostMap
@@ -37,6 +43,9 @@ type InterfaceConfig struct {
 	connectionManager  *connectionManager
 	DropLocalBroadcast bool
 	DropMulticast      bool
+	EnableGSO          bool
+	EnableGRO          bool
+	GSOMaxSegments     int
 	routines           int
 	MessageMetrics     *MessageMetrics
 	version            string
@@ -48,6 +57,8 @@ type InterfaceConfig struct {
 	reQueryWait     time.Duration

 	ConntrackCacheTimeout time.Duration
+	BatchFlushInterval    time.Duration
+	BatchQueueDepth       int
 	l                     *logrus.Logger
 }

@@ -85,20 +96,25 @@ type Interface struct {
 	version     string

 	conntrackCacheTimeout time.Duration
+	batchQueueDepth       int
+	enableGSO             bool
+	enableGRO             bool
+	gsoMaxSegments        int
+	batchUDPQueueGauge    metrics.Gauge
+	batchUDPFlushCounter  metrics.Counter
+	batchTunQueueGauge    metrics.Gauge
+	batchTunFlushCounter  metrics.Counter
+	batchFlushInterval    atomic.Int64
+	sendSem               chan struct{}

 	writers []udp.Conn
-	readers []overlay.TunDev
+	readers []io.ReadWriteCloser
+	batches batchPipelines

 	metricHandshakes    metrics.Histogram
 	messageMetrics      *MessageMetrics
 	cachedPacketMetrics *cachedPacketMetrics

-	listenInN  int
-	listenOutN int
-
-	listenInMetric  metrics.Histogram
-	listenOutMetric metrics.Histogram
-
 	l *logrus.Logger
 }

@@ -168,6 +184,22 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
 		return nil, errors.New("no connection manager")
 	}

+	if c.GSOMaxSegments <= 0 {
+		c.GSOMaxSegments = defaultGSOMaxSegments
+	}
+	if c.GSOMaxSegments > maxKernelGSOSegments {
+		c.GSOMaxSegments = maxKernelGSOSegments
+	}
+	if c.BatchQueueDepth <= 0 {
+		c.BatchQueueDepth = c.routines * defaultBatchQueueDepthFactor
+	}
+	if c.BatchFlushInterval < 0 {
+		c.BatchFlushInterval = 0
+	}
+	if c.BatchFlushInterval == 0 && c.EnableGSO {
+		c.BatchFlushInterval = defaultGSOFlushInterval
+	}
+
 	cs := c.pki.getCertState()
 	ifce := &Interface{
 		pki:                   c.pki,
@@ -184,7 +216,7 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
 		routines:              c.routines,
 		version:               c.version,
 		writers:               make([]udp.Conn, c.routines),
-		readers:               make([]overlay.TunDev, c.routines),
+		readers:               make([]io.ReadWriteCloser, c.routines),
 		myVpnNetworks:         cs.myVpnNetworks,
 		myVpnNetworksTable:    cs.myVpnNetworksTable,
 		myVpnAddrs:            cs.myVpnAddrs,
@@ -193,6 +225,10 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
 		relayManager:          c.relayManager,
 		connectionManager:     c.connectionManager,
 		conntrackCacheTimeout: c.ConntrackCacheTimeout,
+		batchQueueDepth:       c.BatchQueueDepth,
+		enableGSO:             c.EnableGSO,
+		enableGRO:             c.EnableGRO,
+		gsoMaxSegments:        c.GSOMaxSegments,

 		metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
 		messageMetrics:   c.MessageMetrics,
@@ -203,12 +239,27 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {

 		l: c.l,
 	}
-	ifce.listenInMetric = metrics.GetOrRegisterHistogram("vhost.listenIn.n", nil, metrics.NewExpDecaySample(1028, 0.015))
-	ifce.listenOutMetric = metrics.GetOrRegisterHistogram("vhost.listenOut.n", nil, metrics.NewExpDecaySample(1028, 0.015))

 	ifce.tryPromoteEvery.Store(c.tryPromoteEvery)
+	ifce.batchUDPQueueGauge = metrics.GetOrRegisterGauge("batch.udp.queue_depth", nil)
+	ifce.batchUDPFlushCounter = metrics.GetOrRegisterCounter("batch.udp.flushes", nil)
+	ifce.batchTunQueueGauge = metrics.GetOrRegisterGauge("batch.tun.queue_depth", nil)
+	ifce.batchTunFlushCounter = metrics.GetOrRegisterCounter("batch.tun.flushes", nil)
+	ifce.batchFlushInterval.Store(int64(c.BatchFlushInterval))
+	ifce.sendSem = make(chan struct{}, c.routines)
+	ifce.batches.init(c.Inside, c.routines, c.BatchQueueDepth, c.GSOMaxSegments)
 	ifce.reQueryEvery.Store(c.reQueryEvery)
 	ifce.reQueryWait.Store(int64(c.reQueryWait))
+	if c.l.Level >= logrus.DebugLevel {
+		c.l.WithFields(logrus.Fields{
+			"enableGSO":       c.EnableGSO,
+			"enableGRO":       c.EnableGRO,
+			"gsoMaxSegments":  c.GSOMaxSegments,
+			"batchQueueDepth": c.BatchQueueDepth,
+			"batchFlush":      c.BatchFlushInterval,
+			"batching":        ifce.batches.Enabled(),
+		}).Debug("initialized batch pipelines")
+	}

 	ifce.connectionManager.intf = ifce

@@ -234,7 +285,7 @@ func (f *Interface) activate() {
 	metrics.GetOrRegisterGauge("routines", nil).Update(int64(f.routines))

 	// Prepare n tun queues
-	var reader overlay.TunDev = f.inside
+	var reader io.ReadWriteCloser = f.inside
 	for i := 0; i < f.routines; i++ {
 		if i > 0 {
 			reader, err = f.inside.NewMultiQueueReader()
@@ -257,77 +308,69 @@ func (f *Interface) run() {
 		go f.listenOut(i)
 	}

+	if f.l.Level >= logrus.DebugLevel {
+		f.l.WithField("batching", f.batches.Enabled()).Debug("starting interface run loops")
+	}
+
+	if f.batches.Enabled() {
+		for i := 0; i < f.routines; i++ {
+			go f.runInsideBatchWorker(i)
+			go f.runTunWriteQueue(i)
+			go f.runSendQueue(i)
+		}
+	}
+
 	// Launch n queues to read packets from tun dev
 	for i := 0; i < f.routines; i++ {
 		go f.listenIn(f.readers[i], i)
 	}
 }

-func (f *Interface) listenOut(q int) {
+func (f *Interface) listenOut(i int) {
 	runtime.LockOSThread()

 	var li udp.Conn
-	if q > 0 {
-		li = f.writers[q]
+	if i > 0 {
+		li = f.writers[i]
 	} else {
 		li = f.outside
 	}

 	ctCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
 	lhh := f.lightHouse.NewRequestHandler()
-
-	outPackets := make([]*packet.OutPacket, batch)
-	for i := 0; i < batch; i++ {
-		outPackets[i] = packet.NewOut()
-	}
-
+	plaintext := make([]byte, udp.MTU)
 	h := &header.H{}
 	fwPacket := &firewall.Packet{}
 	nb := make([]byte, 12, 12)

-	toSend := make([][]byte, batch)
-
-	li.ListenOut(func(pkts []*packet.Packet) {
-		toSend = toSend[:0]
-		for i := range outPackets {
-			outPackets[i].Valid = false
-			outPackets[i].SegCounter = 0
-		}
-
-		f.readOutsidePacketsMany(pkts, outPackets, h, fwPacket, lhh, nb, q, ctCache.Get(f.l), time.Now())
-		//we opportunistically tx, but try to also send stragglers
-		if _, err := f.readers[q].WriteMany(outPackets, q); err != nil {
-			f.l.WithError(err).Error("Failed to send packets")
-		}
-		//todo I broke this
-		//n := len(toSend)
-		//if f.l.Level == logrus.DebugLevel {
-		//	f.listenOutMetric.Update(int64(n))
-		//}
-		//f.listenOutN = n
-
+	li.ListenOut(func(fromUdpAddr netip.AddrPort, payload []byte) {
+		f.readOutsidePackets(fromUdpAddr, nil, plaintext[:0], payload, h, fwPacket, lhh, nb, i, ctCache.Get(f.l))
 	})
 }

-func (f *Interface) listenIn(reader overlay.TunDev, queueNum int) {
+func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
 	runtime.LockOSThread()

+	if f.batches.Enabled() {
+		if br, ok := reader.(overlay.BatchReader); ok {
+			f.listenInBatchLocked(reader, br, i)
+			return
+		}
+	}
+
+	f.listenInLegacyLocked(reader, i)
+}
+
+func (f *Interface) listenInLegacyLocked(reader io.ReadWriteCloser, i int) {
+	packet := make([]byte, mtu)
+	out := make([]byte, mtu)
 	fwPacket := &firewall.Packet{}
 	nb := make([]byte, 12, 12)

 	conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)

-	packets := make([]*packet.VirtIOPacket, batch)
-	outPackets := make([]*packet.Packet, batch)
-	for i := 0; i < batch; i++ {
-		packets[i] = packet.NewVIO()
-		outPackets[i] = packet.New(false) //todo?
-	}
-
 	for {
-		n, err := reader.ReadMany(packets, queueNum)
-
-		//todo!!
+		n, err := reader.Read(packet)
 		if err != nil {
 			if errors.Is(err, os.ErrClosed) && f.closed.Load() {
 				return
@@ -338,21 +381,581 @@ func (f *Interface) listenIn(reader overlay.TunDev, queueNum int) {
 			os.Exit(2)
 		}

-		if f.l.Level == logrus.DebugLevel {
-			f.listenInMetric.Update(int64(n))
-		}
-		f.listenInN = n
+		f.consumeInsidePacket(packet[:n], fwPacket, nb, out, i, conntrackCache.Get(f.l))
+	}
+}

-		now := time.Now()
-		for i, pkt := range packets[:n] {
-			outPackets[i].OutLen = -1
-			f.consumeInsidePacket(pkt.Payload, fwPacket, nb, outPackets[i], queueNum, conntrackCache.Get(f.l), now)
-			reader.RecycleRxSeg(pkt, i == (n-1), queueNum) //todo handle err?
-			pkt.Reset()
-		}
-		_, err = f.writers[queueNum].WriteBatch(outPackets[:n])
+func (f *Interface) listenInBatchLocked(raw io.ReadWriteCloser, reader overlay.BatchReader, i int) {
+	pool := f.batches.Pool()
+	if pool == nil {
+		f.l.Warn("batch pipeline enabled without an allocated pool; falling back to single-packet reads")
+		f.listenInLegacyLocked(raw, i)
+		return
+	}
+
+	for {
+		packets, err := reader.ReadIntoBatch(pool)
 		if err != nil {
-			f.l.WithError(err).Error("Error while writing outbound packets")
+			if errors.Is(err, os.ErrClosed) && f.closed.Load() {
+				return
+			}
+
+			if isVirtioHeadroomError(err) {
+				f.l.WithError(err).Warn("Batch reader fell back due to tun headroom issue")
+				f.listenInLegacyLocked(raw, i)
+				return
+			}
+
+			f.l.WithError(err).Error("Error while reading outbound packet batch")
+			os.Exit(2)
+		}
+
+		if len(packets) == 0 {
+			continue
+		}
+
+		for _, pkt := range packets {
+			if pkt == nil {
+				continue
+			}
+			if !f.batches.enqueueRx(i, pkt) {
+				pkt.Release()
+			}
+		}
+	}
+}
+
+func (f *Interface) runInsideBatchWorker(i int) {
+	queue := f.batches.rxQueue(i)
+	if queue == nil {
+		return
+	}
+
+	out := make([]byte, mtu)
+	fwPacket := &firewall.Packet{}
+	nb := make([]byte, 12, 12)
+	conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
+
+	for pkt := range queue {
+		if pkt == nil {
+			continue
+		}
+		f.consumeInsidePacket(pkt.Payload(), fwPacket, nb, out, i, conntrackCache.Get(f.l))
+		pkt.Release()
+	}
+}
+
+func (f *Interface) runSendQueue(i int) {
+	queue := f.batches.txQueue(i)
+	if queue == nil {
+		if f.l.Level >= logrus.DebugLevel {
+			f.l.WithField("queue", i).Debug("tx queue not initialized; batching disabled for writer")
+		}
+		return
+	}
+	writer := f.writerForIndex(i)
+	if writer == nil {
+		if f.l.Level >= logrus.DebugLevel {
+			f.l.WithField("queue", i).Debug("no UDP writer for batch queue")
+		}
+		return
+	}
+	if f.l.Level >= logrus.DebugLevel {
+		f.l.WithField("queue", i).Debug("send queue worker started")
+	}
+	defer func() {
+		if f.l.Level >= logrus.WarnLevel {
+			f.l.WithField("queue", i).Warn("send queue worker exited")
+		}
+	}()
+
+	batchCap := f.batches.batchSizeHint()
+	if batchCap <= 0 {
+		batchCap = 1
+	}
+	gsoLimit := f.effectiveGSOMaxSegments()
+	if gsoLimit > batchCap {
+		batchCap = gsoLimit
+	}
+	pending := make([]queuedDatagram, 0, batchCap)
+	var (
+		flushTimer *time.Timer
+		flushC     <-chan time.Time
+	)
+	dispatch := func(reason string, timerFired bool) {
+		if len(pending) == 0 {
+			return
+		}
+		batch := pending
+		f.flushAndReleaseBatch(i, writer, batch, reason)
+		for idx := range batch {
+			batch[idx] = queuedDatagram{}
+		}
+		pending = pending[:0]
+		if flushTimer != nil {
+			if !timerFired {
+				if !flushTimer.Stop() {
+					select {
+					case <-flushTimer.C:
+					default:
+					}
+				}
+			}
+			flushTimer = nil
+			flushC = nil
+		}
+	}
+	armTimer := func() {
+		delay := f.currentBatchFlushInterval()
+		if delay <= 0 {
+			dispatch("nogso", false)
+			return
+		}
+		if flushTimer == nil {
+			flushTimer = time.NewTimer(delay)
+			flushC = flushTimer.C
+		}
+	}
+
+	for {
+		select {
+		case d := <-queue:
+			if d.packet == nil {
+				continue
+			}
+			if f.l.Level >= logrus.DebugLevel {
+				f.l.WithFields(logrus.Fields{
+					"queue":       i,
+					"payload_len": d.packet.Len,
+					"dest":        d.addr,
+				}).Debug("send queue received packet")
+			}
+			pending = append(pending, d)
+			if gsoLimit > 0 && len(pending) >= gsoLimit {
+				dispatch("gso", false)
+				continue
+			}
+			if len(pending) >= cap(pending) {
+				dispatch("cap", false)
+				continue
+			}
+			armTimer()
+			f.observeUDPQueueLen(i)
+		case <-flushC:
+			dispatch("timer", true)
+		}
+	}
+}
+
+func (f *Interface) runTunWriteQueue(i int) {
+	queue := f.batches.tunQueue(i)
+	if queue == nil {
+		return
+	}
+	writer := f.batches.inside
+	if writer == nil {
+		return
+	}
+	requiredHeadroom := writer.BatchHeadroom()
+
+	batchCap := f.batches.batchSizeHint()
+	if batchCap <= 0 {
+		batchCap = 1
+	}
+	pending := make([]*overlay.Packet, 0, batchCap)
+	var (
+		flushTimer *time.Timer
+		flushC     <-chan time.Time
+	)
+	flush := func(reason string, timerFired bool) {
+		if len(pending) == 0 {
+			return
+		}
+		valid := pending[:0]
+		for idx := range pending {
+			if !f.ensurePacketHeadroom(&pending[idx], requiredHeadroom, i, reason) {
+				pending[idx] = nil
+				continue
+			}
+			if pending[idx] != nil {
+				valid = append(valid, pending[idx])
+			}
+		}
+		if len(valid) > 0 {
+			if _, err := writer.WriteBatch(valid); err != nil {
+				f.l.WithError(err).
+					WithField("queue", i).
+					WithField("reason", reason).
+					Warn("Failed to write tun batch")
+				for _, pkt := range valid {
+					if pkt != nil {
+						f.writePacketToTun(i, pkt)
+					}
+				}
+			}
+		}
+		pending = pending[:0]
+		if flushTimer != nil {
+			if !timerFired {
+				if !flushTimer.Stop() {
+					select {
+					case <-flushTimer.C:
+					default:
+					}
+				}
+			}
+			flushTimer = nil
+			flushC = nil
+		}
+	}
+	armTimer := func() {
+		delay := f.currentBatchFlushInterval()
+		if delay <= 0 {
+			return
+		}
+		if flushTimer == nil {
+			flushTimer = time.NewTimer(delay)
+			flushC = flushTimer.C
+		}
+	}
+
+	for {
+		select {
+		case pkt := <-queue:
+			if pkt == nil {
+				continue
+			}
+			if f.ensurePacketHeadroom(&pkt, requiredHeadroom, i, "queue") {
+				pending = append(pending, pkt)
+			}
+			if len(pending) >= cap(pending) {
+				flush("cap", false)
+				continue
+			}
+			armTimer()
+			f.observeTunQueueLen(i)
+		case <-flushC:
+			flush("timer", true)
+		}
+	}
+}
+
+func (f *Interface) flushAndReleaseBatch(index int, writer udp.Conn, batch []queuedDatagram, reason string) {
+	if len(batch) == 0 {
+		return
+	}
+	f.flushDatagrams(index, writer, batch, reason)
+	for idx := range batch {
+		if batch[idx].packet != nil {
+			batch[idx].packet.Release()
+			batch[idx].packet = nil
+		}
+	}
+	if f.batchUDPFlushCounter != nil {
+		f.batchUDPFlushCounter.Inc(int64(len(batch)))
+	}
+}
+
+func (f *Interface) flushDatagrams(index int, writer udp.Conn, batch []queuedDatagram, reason string) {
+	if len(batch) == 0 {
+		return
+	}
+	if f.l.Level >= logrus.DebugLevel {
+		f.l.WithFields(logrus.Fields{
+			"writer":  index,
+			"reason":  reason,
+			"pending": len(batch),
+		}).Debug("udp batch flush summary")
+	}
+	maxSeg := f.effectiveGSOMaxSegments()
+	if bw, ok := writer.(udp.BatchConn); ok {
+		chunkCap := maxSeg
+		if chunkCap <= 0 {
+			chunkCap = len(batch)
+		}
+		chunk := make([]udp.Datagram, 0, chunkCap)
+		var (
+			currentAddr netip.AddrPort
+			segments    int
+		)
+		flushChunk := func() {
+			if len(chunk) == 0 {
+				return
+			}
+			if f.l.Level >= logrus.DebugLevel {
+				f.l.WithFields(logrus.Fields{
+					"writer":        index,
+					"segments":      len(chunk),
+					"dest":          chunk[0].Addr,
+					"reason":        reason,
+					"pending_total": len(batch),
+				}).Debug("flushing UDP batch")
+			}
+			if err := bw.WriteBatch(chunk); err != nil {
+				f.l.WithError(err).
+					WithField("writer", index).
+					WithField("reason", reason).
+					Warn("Failed to write UDP batch")
+			}
+			chunk = chunk[:0]
+			segments = 0
+		}
+		for _, item := range batch {
+			if item.packet == nil || !item.addr.IsValid() {
+				continue
+			}
+			payload := item.packet.Payload()[:item.packet.Len]
+			if segments == 0 {
+				currentAddr = item.addr
+			}
+			if item.addr != currentAddr || (maxSeg > 0 && segments >= maxSeg) {
+				flushChunk()
+				currentAddr = item.addr
+			}
+			chunk = append(chunk, udp.Datagram{Payload: payload, Addr: item.addr})
+			segments++
+		}
+		flushChunk()
+		return
+	}
+	for _, item := range batch {
+		if item.packet == nil || !item.addr.IsValid() {
+			continue
+		}
+		if f.l.Level >= logrus.DebugLevel {
+			f.l.WithFields(logrus.Fields{
+				"writer":   index,
+				"reason":   reason,
+				"dest":     item.addr,
+				"segments": 1,
+			}).Debug("flushing UDP batch")
+		}
+		if err := writer.WriteTo(item.packet.Payload()[:item.packet.Len], item.addr); err != nil {
+			f.l.WithError(err).
+				WithField("writer", index).
+				WithField("udpAddr", item.addr).
+				WithField("reason", reason).
+				Warn("Failed to write UDP packet")
+		}
+	}
+}
+
+func (f *Interface) tryQueueDatagram(q int, buf []byte, addr netip.AddrPort) bool {
+	if !addr.IsValid() || !f.batches.Enabled() {
+		return false
+	}
+	pkt := f.batches.newPacket()
+	if pkt == nil {
+		return false
+	}
+	payload := pkt.Payload()
+	if len(payload) < len(buf) {
+		pkt.Release()
+		return false
+	}
+	copy(payload, buf)
+	pkt.Len = len(buf)
+	if f.batches.enqueueTx(q, pkt, addr) {
+		f.observeUDPQueueLen(q)
+		return true
+	}
+	pkt.Release()
+	return false
+}
+
+func (f *Interface) writerForIndex(i int) udp.Conn {
+	if i < 0 || i >= len(f.writers) {
+		return nil
+	}
+	return f.writers[i]
+}
+
+func (f *Interface) writeImmediate(q int, buf []byte, addr netip.AddrPort, hostinfo *HostInfo) {
+	writer := f.writerForIndex(q)
+	if writer == nil {
+		f.l.WithField("udpAddr", addr).
+			WithField("writer", q).
+			Error("Failed to write outgoing packet: no writer available")
+		return
+	}
+	if err := writer.WriteTo(buf, addr); err != nil {
+		hostinfo.logger(f.l).
+			WithError(err).
+			WithField("udpAddr", addr).
+			Error("Failed to write outgoing packet")
+	}
+}
+
+func (f *Interface) tryQueuePacket(q int, pkt *overlay.Packet, addr netip.AddrPort) bool {
+	if pkt == nil || !addr.IsValid() || !f.batches.Enabled() {
+		return false
+	}
+	if f.batches.enqueueTx(q, pkt, addr) {
+		f.observeUDPQueueLen(q)
+		return true
+	}
+	return false
+}
+
+func (f *Interface) writeImmediatePacket(q int, pkt *overlay.Packet, addr netip.AddrPort, hostinfo *HostInfo) {
+	if pkt == nil {
+		return
+	}
+	writer := f.writerForIndex(q)
+	if writer == nil {
+		f.l.WithField("udpAddr", addr).
+			WithField("writer", q).
+			Error("Failed to write outgoing packet: no writer available")
+		pkt.Release()
+		return
+	}
+	if err := writer.WriteTo(pkt.Payload()[:pkt.Len], addr); err != nil {
+		hostinfo.logger(f.l).
+			WithError(err).
+			WithField("udpAddr", addr).
+			Error("Failed to write outgoing packet")
+	}
+	pkt.Release()
+}
+
+func (f *Interface) writePacketToTun(q int, pkt *overlay.Packet) {
+	if pkt == nil {
+		return
+	}
+	writer := f.readers[q]
+	if writer == nil {
+		pkt.Release()
+		return
+	}
+	if bw, ok := writer.(interface {
+		WriteBatch([]*overlay.Packet) (int, error)
+	}); ok {
+		if _, err := bw.WriteBatch([]*overlay.Packet{pkt}); err != nil {
+			f.l.WithError(err).WithField("queue", q).Warn("Failed to write tun packet via batch writer")
+			pkt.Release()
+		}
+		return
+	}
+	if _, err := writer.Write(pkt.Payload()[:pkt.Len]); err != nil {
+		f.l.WithError(err).Error("Failed to write to tun")
+	}
+	pkt.Release()
+}
+
+func (f *Interface) clonePacketWithHeadroom(pkt *overlay.Packet, required int) *overlay.Packet {
+	if pkt == nil {
+		return nil
+	}
+	payload := pkt.Payload()[:pkt.Len]
+	if len(payload) == 0 && required <= 0 {
+		return pkt
+	}
+
+	pool := f.batches.Pool()
+	if pool != nil {
+		if clone := pool.Get(); clone != nil {
+			if len(clone.Payload()) >= len(payload) {
+				clone.Len = copy(clone.Payload(), payload)
+				pkt.Release()
+				return clone
+			}
+			clone.Release()
+		}
+	}
+
+	if required < 0 {
+		required = 0
+	}
+	buf := make([]byte, required+len(payload))
+	n := copy(buf[required:], payload)
+	pkt.Release()
+	return &overlay.Packet{
+		Buf:    buf,
+		Offset: required,
+		Len:    n,
+	}
+}
+
+func (f *Interface) observeUDPQueueLen(i int) {
+	if f.batchUDPQueueGauge == nil {
+		return
+	}
+	f.batchUDPQueueGauge.Update(int64(f.batches.txQueueLen(i)))
+}
+
+func (f *Interface) observeTunQueueLen(i int) {
+	if f.batchTunQueueGauge == nil {
+		return
+	}
+	f.batchTunQueueGauge.Update(int64(f.batches.tunQueueLen(i)))
+}
+
+func (f *Interface) currentBatchFlushInterval() time.Duration {
+	if v := f.batchFlushInterval.Load(); v > 0 {
+		return time.Duration(v)
+	}
+	return 0
+}
+
+func (f *Interface) ensurePacketHeadroom(pkt **overlay.Packet, required int, queue int, reason string) bool {
+	p := *pkt
+	if p == nil {
+		return false
+	}
+	if required <= 0 || p.Offset >= required {
+		return true
+	}
+	clone := f.clonePacketWithHeadroom(p, required)
+	if clone == nil {
+		f.l.WithFields(logrus.Fields{
+			"queue":  queue,
+			"reason": reason,
+		}).Warn("dropping packet lacking tun headroom")
+		return false
+	}
+	*pkt = clone
+	return true
+}
+
+func isVirtioHeadroomError(err error) bool {
+	if err == nil {
+		return false
+	}
+	msg := err.Error()
+	return strings.Contains(msg, "headroom") || strings.Contains(msg, "virtio")
+}
+
+func (f *Interface) effectiveGSOMaxSegments() int {
+	max := f.gsoMaxSegments
+	if max <= 0 {
+		max = defaultGSOMaxSegments
+	}
+	if max > maxKernelGSOSegments {
+		max = maxKernelGSOSegments
+	}
+	if !f.enableGSO {
+		return 1
+	}
+	return max
+}
+
+type udpOffloadConfigurator interface {
+	ConfigureOffload(enableGSO, enableGRO bool, maxSegments int)
+}
+
+func (f *Interface) applyOffloadConfig(enableGSO, enableGRO bool, maxSegments int) {
+	if maxSegments <= 0 {
+		maxSegments = defaultGSOMaxSegments
+	}
+	if maxSegments > maxKernelGSOSegments {
+		maxSegments = maxKernelGSOSegments
+	}
+	f.enableGSO = enableGSO
+	f.enableGRO = enableGRO
+	f.gsoMaxSegments = maxSegments
+	for _, writer := range f.writers {
+		if cfg, ok := writer.(udpOffloadConfigurator); ok {
+			cfg.ConfigureOffload(enableGSO, enableGRO, maxSegments)
 		}
 	}
 }
@@ -459,6 +1062,42 @@ func (f *Interface) reloadMisc(c *config.C) {
 		f.reQueryWait.Store(int64(n))
 		f.l.Info("timers.requery_wait_duration has changed")
 	}
+
+	if c.HasChanged("listen.gso_flush_timeout") {
+		d := c.GetDuration("listen.gso_flush_timeout", defaultGSOFlushInterval)
+		if d < 0 {
+			d = 0
+		}
+		f.batchFlushInterval.Store(int64(d))
+		f.l.WithField("duration", d).Info("listen.gso_flush_timeout has changed")
+	} else if c.HasChanged("batch.flush_interval") {
+		d := c.GetDuration("batch.flush_interval", defaultGSOFlushInterval)
+		if d < 0 {
+			d = 0
+		}
+		f.batchFlushInterval.Store(int64(d))
+		f.l.WithField("duration", d).Warn("batch.flush_interval is deprecated; use listen.gso_flush_timeout")
+	}
+
+	if c.HasChanged("batch.queue_depth") {
+		n := c.GetInt("batch.queue_depth", f.batchQueueDepth)
+		if n != f.batchQueueDepth {
+			f.batchQueueDepth = n
+			f.l.Warn("batch.queue_depth changes require a restart to take effect")
+		}
+	}
+
+	if c.HasChanged("listen.enable_gso") || c.HasChanged("listen.enable_gro") || c.HasChanged("listen.gso_max_segments") {
+		enableGSO := c.GetBool("listen.enable_gso", f.enableGSO)
+		enableGRO := c.GetBool("listen.enable_gro", f.enableGRO)
+		maxSeg := c.GetInt("listen.gso_max_segments", f.gsoMaxSegments)
+		f.applyOffloadConfig(enableGSO, enableGRO, maxSeg)
+		f.l.WithFields(logrus.Fields{
+			"enableGSO":      enableGSO,
+			"enableGRO":      enableGRO,
+			"gsoMaxSegments": maxSeg,
+		}).Info("listen GSO/GRO configuration updated")
+	}
 }

 func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
@@ -491,11 +1130,6 @@ func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
 			} else {
 				certMaxVersion.Update(int64(certState.v1Cert.Version()))
 			}
-			if f.l.Level != logrus.DebugLevel {
-				f.listenInMetric.Update(int64(f.listenInN))
-				f.listenOutMetric.Update(int64(f.listenOutN))
-			}
-
 		}
 	}
 }
--- a/lighthouse.go
+++ b/lighthouse.go
@@ -360,8 +360,7 @@ func (lh *LightHouse) parseLighthouses(c *config.C) ([]netip.Addr, error) {
 		}

 		if !lh.myVpnNetworksTable.Contains(addr) {
-			lh.l.WithFields(m{"vpnAddr": addr, "networks": lh.myVpnNetworks}).
-				Warn("lighthouse host is not within our networks, lighthouse functionality will work but layer 3 network traffic to the lighthouse will not")
+			return nil, util.NewContextualError("lighthouse host is not in our networks, invalid", m{"vpnAddr": addr, "networks": lh.myVpnNetworks}, nil)
 		}
 		out[i] = addr
 	}
@@ -432,8 +431,7 @@ func (lh *LightHouse) loadStaticMap(c *config.C, staticList map[netip.Addr]struc
 		}

 		if !lh.myVpnNetworksTable.Contains(vpnAddr) {
-			lh.l.WithFields(m{"vpnAddr": vpnAddr, "networks": lh.myVpnNetworks, "entry": i + 1}).
-				Warn("static_host_map key is not within our networks, layer 3 network traffic to this host will not work")
+			return util.NewContextualError("static_host_map key is not in our network, invalid", m{"vpnAddr": vpnAddr, "networks": lh.myVpnNetworks, "entry": i + 1}, nil)
 		}

 		vals, ok := v.([]any)
@@ -1339,19 +1337,12 @@ func (lhh *LightHouseHandler) handleHostPunchNotification(n *NebulaMeta, fromVpn
 		}
 	}

-	remoteAllowList := lhh.lh.GetRemoteAllowList()
 	for _, a := range n.Details.V4AddrPorts {
-		b := protoV4AddrPortToNetAddrPort(a)
-		if remoteAllowList.Allow(detailsVpnAddr, b.Addr()) {
-			punch(b, detailsVpnAddr)
-		}
+		punch(protoV4AddrPortToNetAddrPort(a), detailsVpnAddr)
 	}

 	for _, a := range n.Details.V6AddrPorts {
-		b := protoV6AddrPortToNetAddrPort(a)
-		if remoteAllowList.Allow(detailsVpnAddr, b.Addr()) {
-			punch(b, detailsVpnAddr)
-		}
+		punch(protoV6AddrPortToNetAddrPort(a), detailsVpnAddr)
 	}

 	// This sends a nebula test packet to the host trying to contact us. In the case
--- a/lighthouse_test.go
+++ b/lighthouse_test.go
@@ -14,7 +14,7 @@ import (
 	"github.com/slackhq/nebula/test"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
-	"go.yaml.in/yaml/v3"
+	"gopkg.in/yaml.v3"
 )

 func TestOldIPv4Only(t *testing.T) {
--- a/main.go
+++ b/main.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"net"
 	"net/netip"
+	"runtime"
 	"time"

 	"github.com/sirupsen/logrus"
@@ -13,7 +14,7 @@ import (
 	"github.com/slackhq/nebula/sshd"
 	"github.com/slackhq/nebula/udp"
 	"github.com/slackhq/nebula/util"
-	"go.yaml.in/yaml/v3"
+	"gopkg.in/yaml.v3"
 )

 type m = map[string]any
@@ -75,8 +76,7 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
 	if c.GetBool("sshd.enabled", false) {
 		sshStart, err = configSSH(l, ssh, c)
 		if err != nil {
-			l.WithError(err).Warn("Failed to configure sshd, ssh debugging will not be available")
-			sshStart = nil
+			return nil, util.ContextualizeIfNeeded("Error while configuring the sshd", err)
 		}
 	}

@@ -144,6 +144,20 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
 	// set up our UDP listener
 	udpConns := make([]udp.Conn, routines)
 	port := c.GetInt("listen.port", 0)
+	enableGSO := c.GetBool("listen.enable_gso", true)
+	enableGRO := c.GetBool("listen.enable_gro", true)
+	gsoMaxSegments := c.GetInt("listen.gso_max_segments", defaultGSOMaxSegments)
+	if gsoMaxSegments <= 0 {
+		gsoMaxSegments = defaultGSOMaxSegments
+	}
+	if gsoMaxSegments > maxKernelGSOSegments {
+		gsoMaxSegments = maxKernelGSOSegments
+	}
+	gsoFlushTimeout := c.GetDuration("listen.gso_flush_timeout", defaultGSOFlushInterval)
+	if gsoFlushTimeout < 0 {
+		gsoFlushTimeout = 0
+	}
+	batchQueueDepth := c.GetInt("batch.queue_depth", 0)

 	if !configTest {
 		rawListenHost := c.GetString("listen.host", "0.0.0.0")
@@ -163,13 +177,28 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
 			listenHost = ips[0].Unmap()
 		}

+		useWGDefault := runtime.GOOS == "linux"
+		useWG := c.GetBool("listen.use_wireguard_stack", useWGDefault)
+		var mkListener func(*logrus.Logger, netip.Addr, int, bool, int, int) (udp.Conn, error)
+		if useWG {
+			mkListener = udp.NewWireguardListener
+		} else {
+			mkListener = udp.NewListener
+		}
+
 		for i := 0; i < routines; i++ {
 			l.Infof("listening on %v", netip.AddrPortFrom(listenHost, uint16(port)))
-			udpServer, err := udp.NewListener(l, listenHost, port, routines > 1, c.GetInt("listen.batch", 64))
+			udpServer, err := mkListener(l, listenHost, port, routines > 1, c.GetInt("listen.batch", 64), i)
 			if err != nil {
 				return nil, util.NewContextualError("Failed to open udp listener", m{"queue": i}, err)
 			}
+			//todo set bpf on zeroth socket
 			udpServer.ReloadConfig(c)
+			if cfg, ok := udpServer.(interface {
+				ConfigureOffload(bool, bool, int)
+			}); ok {
+				cfg.ConfigureOffload(enableGSO, enableGRO, gsoMaxSegments)
+			}
 			udpConns[i] = udpServer

 			// If port is dynamic, discover it before the next pass through the for loop
@@ -237,12 +266,17 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
 		reQueryWait:           c.GetDuration("timers.requery_wait_duration", defaultReQueryWait),
 		DropLocalBroadcast:    c.GetBool("tun.drop_local_broadcast", false),
 		DropMulticast:         c.GetBool("tun.drop_multicast", false),
+		EnableGSO:             enableGSO,
+		EnableGRO:             enableGRO,
+		GSOMaxSegments:        gsoMaxSegments,
 		routines:              routines,
 		MessageMetrics:        messageMetrics,
 		version:               buildVersion,
 		relayManager:          NewRelayManager(ctx, l, hostMap, c),
 		punchy:                punchy,
 		ConntrackCacheTimeout: conntrackCacheTimeout,
+		BatchFlushInterval:    gsoFlushTimeout,
+		BatchQueueDepth:       batchQueueDepth,
 		l:                     l,
 	}

@@ -254,6 +288,7 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
 		}

 		ifce.writers = udpConns
+		ifce.applyOffloadConfig(enableGSO, enableGRO, gsoMaxSegments)
 		lightHouse.ifce = ifce

 		ifce.RegisterConfigChangeCallbacks(c)
--- a/outside.go
+++ b/outside.go
@@ -7,12 +7,12 @@ import (
 	"time"

 	"github.com/google/gopacket/layers"
-	"github.com/slackhq/nebula/packet"
 	"golang.org/x/net/ipv6"

 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/firewall"
 	"github.com/slackhq/nebula/header"
+	"github.com/slackhq/nebula/overlay"
 	"golang.org/x/net/ipv4"
 )

@@ -20,7 +20,7 @@ const (
 	minFwPacketLen = 4
 )

-func (f *Interface) readOutsidePackets(ip netip.AddrPort, via *ViaSender, out []byte, packet []byte, h *header.H, fwPacket *firewall.Packet, lhf *LightHouseHandler, nb []byte, q int, localCache firewall.ConntrackCache, now time.Time) {
+func (f *Interface) readOutsidePackets(ip netip.AddrPort, via *ViaSender, out []byte, packet []byte, h *header.H, fwPacket *firewall.Packet, lhf *LightHouseHandler, nb []byte, q int, localCache *firewall.ConntrackCache) {
 	err := h.Parse(packet)
 	if err != nil {
 		// Hole punch packets are 0 or 1 byte big, so lets ignore printing those errors
@@ -62,7 +62,7 @@ func (f *Interface) readOutsidePackets(ip netip.AddrPort, via *ViaSender, out []

 		switch h.Subtype {
 		case header.MessageNone:
-			if !f.decryptToTun(hostinfo, h.MessageCounter, out, packet, fwPacket, nb, q, localCache, now) {
+			if !f.decryptToTun(hostinfo, h.MessageCounter, out, packet, fwPacket, nb, q, localCache, ip, h.RemoteIndex) {
 				return
 			}
 		case header.MessageRelay:
@@ -97,7 +97,7 @@ func (f *Interface) readOutsidePackets(ip netip.AddrPort, via *ViaSender, out []
 			case TerminalType:
 				// If I am the target of this relay, process the unwrapped packet
 				// From this recursive point, all these variables are 'burned'. We shouldn't rely on them again.
-				f.readOutsidePackets(netip.AddrPort{}, &ViaSender{relayHI: hostinfo, remoteIdx: relay.RemoteIndex, relay: relay}, out[:0], signedPayload, h, fwPacket, lhf, nb, q, localCache, now)
+				f.readOutsidePackets(netip.AddrPort{}, &ViaSender{relayHI: hostinfo, remoteIdx: relay.RemoteIndex, relay: relay}, out[:0], signedPayload, h, fwPacket, lhf, nb, q, localCache)
 				return
 			case ForwardingType:
 				// Find the target HostInfo relay object
@@ -217,217 +217,6 @@ func (f *Interface) readOutsidePackets(ip netip.AddrPort, via *ViaSender, out []
 	f.connectionManager.In(hostinfo)
 }

-func (f *Interface) readOutsidePacketsMany(packets []*packet.Packet, out []*packet.OutPacket, h *header.H, fwPacket *firewall.Packet, lhf *LightHouseHandler, nb []byte, q int, localCache firewall.ConntrackCache, now time.Time) {
-	for i, pkt := range packets {
-		out[i].Scratch = out[i].Scratch[:0]
-		ip := pkt.AddrPort()
-
-		//l.Error("in packet ", header, packet[HeaderLen:])
-		if ip.IsValid() {
-			if f.myVpnNetworksTable.Contains(ip.Addr()) {
-				if f.l.Level >= logrus.DebugLevel {
-					f.l.WithField("udpAddr", ip).Debug("Refusing to process double encrypted packet")
-				}
-				return
-			}
-		}
-
-		//todo per-segment!
-		for segment := range pkt.Segments() {
-
-			err := h.Parse(segment)
-			if err != nil {
-				// Hole punch packets are 0 or 1 byte big, so lets ignore printing those errors
-				if len(segment) > 1 {
-					f.l.WithField("packet", pkt).Infof("Error while parsing inbound packet from %s: %s", ip, err)
-				}
-				return
-			}
-
-			var hostinfo *HostInfo
-			// verify if we've seen this index before, otherwise respond to the handshake initiation
-			if h.Type == header.Message && h.Subtype == header.MessageRelay {
-				hostinfo = f.hostMap.QueryRelayIndex(h.RemoteIndex)
-			} else {
-				hostinfo = f.hostMap.QueryIndex(h.RemoteIndex)
-			}
-
-			var ci *ConnectionState
-			if hostinfo != nil {
-				ci = hostinfo.ConnectionState
-			}
-
-			switch h.Type {
-			case header.Message:
-				// TODO handleEncrypted sends directly to addr on error. Handle this in the tunneling case.
-				if !f.handleEncrypted(ci, ip, h) {
-					return
-				}
-
-				switch h.Subtype {
-				case header.MessageNone:
-					if !f.decryptToTunDelayWrite(hostinfo, h.MessageCounter, out[i], pkt, segment, fwPacket, nb, q, localCache, now) {
-						return
-					}
-				case header.MessageRelay:
-					// The entire body is sent as AD, not encrypted.
-					// The packet consists of a 16-byte parsed Nebula header, Associated Data-protected payload, and a trailing 16-byte AEAD signature value.
-					// The packet is guaranteed to be at least 16 bytes at this point, b/c it got past the h.Parse() call above. If it's
-					// otherwise malformed (meaning, there is no trailing 16 byte AEAD value), then this will result in at worst a 0-length slice
-					// which will gracefully fail in the DecryptDanger call.
-					signedPayload := segment[:len(segment)-hostinfo.ConnectionState.dKey.Overhead()]
-					signatureValue := segment[len(segment)-hostinfo.ConnectionState.dKey.Overhead():]
-					out[i].Scratch, err = hostinfo.ConnectionState.dKey.DecryptDanger(out[i].Scratch, signedPayload, signatureValue, h.MessageCounter, nb)
-					if err != nil {
-						return
-					}
-					// Successfully validated the thing. Get rid of the Relay header.
-					signedPayload = signedPayload[header.Len:]
-					// Pull the Roaming parts up here, and return in all call paths.
-					f.handleHostRoaming(hostinfo, ip)
-					// Track usage of both the HostInfo and the Relay for the received & authenticated packet
-					f.connectionManager.In(hostinfo)
-					f.connectionManager.RelayUsed(h.RemoteIndex)
-
-					relay, ok := hostinfo.relayState.QueryRelayForByIdx(h.RemoteIndex)
-					if !ok {
-						// The only way this happens is if hostmap has an index to the correct HostInfo, but the HostInfo is missing
-						// its internal mapping. This should never happen.
-						hostinfo.logger(f.l).WithFields(logrus.Fields{"vpnAddrs": hostinfo.vpnAddrs, "remoteIndex": h.RemoteIndex}).Error("HostInfo missing remote relay index")
-						return
-					}
-
-					switch relay.Type {
-					case TerminalType:
-						// If I am the target of this relay, process the unwrapped packet
-						// From this recursive point, all these variables are 'burned'. We shouldn't rely on them again.
-						f.readOutsidePackets(netip.AddrPort{}, &ViaSender{relayHI: hostinfo, remoteIdx: relay.RemoteIndex, relay: relay}, out[i].Scratch[:0], signedPayload, h, fwPacket, lhf, nb, q, localCache, now)
-						return
-					case ForwardingType:
-						// Find the target HostInfo relay object
-						targetHI, targetRelay, err := f.hostMap.QueryVpnAddrsRelayFor(hostinfo.vpnAddrs, relay.PeerAddr)
-						if err != nil {
-							hostinfo.logger(f.l).WithField("relayTo", relay.PeerAddr).WithError(err).WithField("hostinfo.vpnAddrs", hostinfo.vpnAddrs).Info("Failed to find target host info by ip")
-							return
-						}
-
-						// If that relay is Established, forward the payload through it
-						if targetRelay.State == Established {
-							switch targetRelay.Type {
-							case ForwardingType:
-								// Forward this packet through the relay tunnel
-								// Find the target HostInfo
-								f.SendVia(targetHI, targetRelay, signedPayload, nb, out[i].Scratch, false)
-								return
-							case TerminalType:
-								hostinfo.logger(f.l).Error("Unexpected Relay Type of Terminal")
-							}
-						} else {
-							hostinfo.logger(f.l).WithFields(logrus.Fields{"relayTo": relay.PeerAddr, "relayFrom": hostinfo.vpnAddrs[0], "targetRelayState": targetRelay.State}).Info("Unexpected target relay state")
-							return
-						}
-					}
-				}
-
-			case header.LightHouse:
-				f.messageMetrics.Rx(h.Type, h.Subtype, 1)
-				if !f.handleEncrypted(ci, ip, h) {
-					return
-				}
-
-				d, err := f.decrypt(hostinfo, h.MessageCounter, out[i].Scratch, segment, h, nb)
-				if err != nil {
-					hostinfo.logger(f.l).WithError(err).WithField("udpAddr", ip).
-						WithField("packet", segment).
-						Error("Failed to decrypt lighthouse packet")
-					return
-				}
-
-				lhf.HandleRequest(ip, hostinfo.vpnAddrs, d, f)
-
-				// Fallthrough to the bottom to record incoming traffic
-
-			case header.Test:
-				f.messageMetrics.Rx(h.Type, h.Subtype, 1)
-				if !f.handleEncrypted(ci, ip, h) {
-					return
-				}
-
-				d, err := f.decrypt(hostinfo, h.MessageCounter, out[i].Scratch, segment, h, nb)
-				if err != nil {
-					hostinfo.logger(f.l).WithError(err).WithField("udpAddr", ip).
-						WithField("packet", segment).
-						Error("Failed to decrypt test packet")
-					return
-				}
-
-				if h.Subtype == header.TestRequest {
-					// This testRequest might be from TryPromoteBest, so we should roam
-					// to the new IP address before responding
-					f.handleHostRoaming(hostinfo, ip)
-					f.send(header.Test, header.TestReply, ci, hostinfo, d, nb, out[i].Scratch)
-				}
-
-				// Fallthrough to the bottom to record incoming traffic
-
-				// Non encrypted messages below here, they should not fall through to avoid tracking incoming traffic since they
-				// are unauthenticated
-
-			case header.Handshake:
-				f.messageMetrics.Rx(h.Type, h.Subtype, 1)
-				f.handshakeManager.HandleIncoming(ip, nil, segment, h)
-				return
-
-			case header.RecvError:
-				f.messageMetrics.Rx(h.Type, h.Subtype, 1)
-				f.handleRecvError(ip, h)
-				return
-
-			case header.CloseTunnel:
-				f.messageMetrics.Rx(h.Type, h.Subtype, 1)
-				if !f.handleEncrypted(ci, ip, h) {
-					return
-				}
-
-				hostinfo.logger(f.l).WithField("udpAddr", ip).
-					Info("Close tunnel received, tearing down.")
-
-				f.closeTunnel(hostinfo)
-				return
-
-			case header.Control:
-				if !f.handleEncrypted(ci, ip, h) {
-					return
-				}
-
-				d, err := f.decrypt(hostinfo, h.MessageCounter, out[i].Scratch, segment, h, nb)
-				if err != nil {
-					hostinfo.logger(f.l).WithError(err).WithField("udpAddr", ip).
-						WithField("packet", segment).
-						Error("Failed to decrypt Control packet")
-					return
-				}
-
-				f.relayManager.HandleControlMsg(hostinfo, d, f)
-
-			default:
-				f.messageMetrics.Rx(h.Type, h.Subtype, 1)
-				hostinfo.logger(f.l).Debugf("Unexpected packet received from %s", ip)
-				return
-			}
-
-			f.handleHostRoaming(hostinfo, ip)
-
-			f.connectionManager.In(hostinfo)
-
-		}
-		_, err := f.readers[q].WriteOne(out[i], false, q)
-		if err != nil {
-			f.l.WithError(err).Error("Failed to write packet")
-		}
-	}
-}
-
 // closeTunnel closes a tunnel locally, it does not send a closeTunnel packet to the remote
 func (f *Interface) closeTunnel(hostInfo *HostInfo) {
 	final := f.hostMap.DeleteHostInfo(hostInfo)
@@ -677,78 +466,55 @@ func (f *Interface) decrypt(hostinfo *HostInfo, mc uint64, out []byte, packet []
 	return out, nil
 }

-func (f *Interface) decryptToTunDelayWrite(hostinfo *HostInfo, messageCounter uint64, out *packet.OutPacket, pkt *packet.Packet, inSegment []byte, fwPacket *firewall.Packet, nb []byte, q int, localCache firewall.ConntrackCache, now time.Time) bool {
-	var err error
+func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out []byte, packet []byte, fwPacket *firewall.Packet, nb []byte, q int, localCache *firewall.ConntrackCache, addr netip.AddrPort, recvIndex uint32) bool {
+	var (
+		err error
+		pkt *overlay.Packet
+	)

-	seg, err := f.readers[q].AllocSeg(out, q)
-	if err != nil {
-		f.l.WithError(err).Errorln("decryptToTunDelayWrite: failed to allocate segment")
-		return false
-	}
-
-	out.SegmentPayloads[seg] = out.SegmentPayloads[seg][:0]
-	out.SegmentPayloads[seg], err = hostinfo.ConnectionState.dKey.DecryptDanger(out.SegmentPayloads[seg], inSegment[:header.Len], inSegment[header.Len:], messageCounter, nb)
-	if err != nil {
-		hostinfo.logger(f.l).WithError(err).Error("Failed to decrypt packet")
-		return false
-	}
-
-	err = newPacket(out.SegmentPayloads[seg], true, fwPacket)
-	if err != nil {
-		hostinfo.logger(f.l).WithError(err).WithField("packet", out).
-			Warnf("Error while validating inbound packet")
-		return false
-	}
-
-	if !hostinfo.ConnectionState.window.Update(f.l, messageCounter) {
-		hostinfo.logger(f.l).WithField("fwPacket", fwPacket).
-			Debugln("dropping out of window packet")
-		return false
-	}
-
-	dropReason := f.firewall.Drop(*fwPacket, true, hostinfo, f.pki.GetCAPool(), localCache, now)
-	if dropReason != nil {
-		// NOTE: We give `packet` as the `out` here since we already decrypted from it and we don't need it anymore
-		// This gives us a buffer to build the reject packet in
-		f.rejectOutside(out.SegmentPayloads[seg], hostinfo.ConnectionState, hostinfo, nb, inSegment, q)
-		if f.l.Level >= logrus.DebugLevel {
-			hostinfo.logger(f.l).WithField("fwPacket", fwPacket).
-				WithField("reason", dropReason).
-				Debugln("dropping inbound packet")
+	if f.batches.tunQueue(q) != nil {
+		pkt = f.batches.newPacket()
+		if pkt != nil {
+			out = pkt.Payload()[:0]
 		}
-		return false
 	}

-	f.connectionManager.In(hostinfo)
-	pkt.OutLen += len(inSegment)
-	out.Segments[seg] = out.Segments[seg][:len(out.SegmentHeaders[seg])+len(out.SegmentPayloads[seg])]
-	return true
-}
-
-func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out []byte, packet []byte, fwPacket *firewall.Packet, nb []byte, q int, localCache firewall.ConntrackCache, now time.Time) bool {
-	var err error
-
 	out, err = hostinfo.ConnectionState.dKey.DecryptDanger(out, packet[:header.Len], packet[header.Len:], messageCounter, nb)
 	if err != nil {
+		if pkt != nil {
+			pkt.Release()
+		}
 		hostinfo.logger(f.l).WithError(err).Error("Failed to decrypt packet")
+		if addr.IsValid() {
+			f.maybeSendRecvError(addr, recvIndex)
+		}
 		return false
 	}

 	err = newPacket(out, true, fwPacket)
 	if err != nil {
+		if pkt != nil {
+			pkt.Release()
+		}
 		hostinfo.logger(f.l).WithError(err).WithField("packet", out).
 			Warnf("Error while validating inbound packet")
 		return false
 	}

 	if !hostinfo.ConnectionState.window.Update(f.l, messageCounter) {
+		if pkt != nil {
+			pkt.Release()
+		}
 		hostinfo.logger(f.l).WithField("fwPacket", fwPacket).
 			Debugln("dropping out of window packet")
 		return false
 	}

-	dropReason := f.firewall.Drop(*fwPacket, true, hostinfo, f.pki.GetCAPool(), localCache, now)
+	dropReason := f.firewall.Drop(*fwPacket, true, hostinfo, f.pki.GetCAPool(), localCache)
 	if dropReason != nil {
+		if pkt != nil {
+			pkt.Release()
+		}
 		// NOTE: We give `packet` as the `out` here since we already decrypted from it and we don't need it anymore
 		// This gives us a buffer to build the reject packet in
 		f.rejectOutside(out, hostinfo.ConnectionState, hostinfo, nb, packet, q)
@@ -761,8 +527,17 @@ func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out
 	}

 	f.connectionManager.In(hostinfo)
-	_, err = f.readers[q].Write(out)
-	if err != nil {
+	if pkt != nil {
+		pkt.Len = len(out)
+		if f.batches.enqueueTun(q, pkt) {
+			f.observeTunQueueLen(q)
+			return true
+		}
+		f.writePacketToTun(q, pkt)
+		return true
+	}
+
+	if _, err = f.readers[q].Write(out); err != nil {
 		f.l.WithError(err).Error("Failed to write to tun")
 	}
 	return true
--- a/overlay/device.go
+++ b/overlay/device.go
@@ -1,16 +1,99 @@
 package overlay

 import (
+	"io"
 	"net/netip"
+	"sync"

 	"github.com/slackhq/nebula/routing"
 )

 type Device interface {
-	TunDev
+	io.ReadWriteCloser
 	Activate() error
 	Networks() []netip.Prefix
 	Name() string
 	RoutesFor(netip.Addr) routing.Gateways
-	NewMultiQueueReader() (TunDev, error)
+	NewMultiQueueReader() (io.ReadWriteCloser, error)
+}
+
+// Packet represents a single packet buffer with optional headroom to carry
+// metadata (for example virtio-net headers).
+type Packet struct {
+	Buf     []byte
+	Offset  int
+	Len     int
+	release func()
+}
+
+func (p *Packet) Payload() []byte {
+	return p.Buf[p.Offset : p.Offset+p.Len]
+}
+
+func (p *Packet) Reset() {
+	p.Len = 0
+	p.Offset = 0
+	p.release = nil
+}
+
+func (p *Packet) Release() {
+	if p.release != nil {
+		p.release()
+		p.release = nil
+	}
+}
+
+func (p *Packet) Capacity() int {
+	return len(p.Buf) - p.Offset
+}
+
+// PacketPool manages reusable buffers with headroom.
+type PacketPool struct {
+	headroom int
+	blksz    int
+	pool     sync.Pool
+}
+
+func NewPacketPool(headroom, payload int) *PacketPool {
+	p := &PacketPool{headroom: headroom, blksz: headroom + payload}
+	p.pool.New = func() any {
+		buf := make([]byte, p.blksz)
+		return &Packet{Buf: buf, Offset: headroom}
+	}
+	return p
+}
+
+func (p *PacketPool) Get() *Packet {
+	pkt := p.pool.Get().(*Packet)
+	pkt.Offset = p.headroom
+	pkt.Len = 0
+	pkt.release = func() { p.put(pkt) }
+	return pkt
+}
+
+func (p *PacketPool) put(pkt *Packet) {
+	pkt.Reset()
+	p.pool.Put(pkt)
+}
+
+// BatchReader allows reading multiple packets into a shared pool with
+// preallocated headroom (e.g. virtio-net headers).
+type BatchReader interface {
+	ReadIntoBatch(pool *PacketPool) ([]*Packet, error)
+}
+
+// BatchWriter writes a slice of packets that carry their own metadata.
+type BatchWriter interface {
+	WriteBatch(packets []*Packet) (int, error)
+}
+
+// BatchCapableDevice describes a device that can efficiently read and write
+// batches of packets with virtio headroom.
+type BatchCapableDevice interface {
+	Device
+	BatchReader
+	BatchWriter
+	BatchHeadroom() int
+	BatchPayloadCap() int
+	BatchSize() int
 }
--- a/overlay/eventfd/eventfd.go
+++ b/overlay/eventfd/eventfd.go
@@ -1,91 +0,0 @@
-package eventfd
-
-import (
-	"encoding/binary"
-	"syscall"
-
-	"golang.org/x/sys/unix"
-)
-
-type EventFD struct {
-	fd  int
-	buf [8]byte
-}
-
-func New() (EventFD, error) {
-	fd, err := unix.Eventfd(0, unix.EFD_NONBLOCK)
-	if err != nil {
-		return EventFD{}, err
-	}
-	return EventFD{
-		fd:  fd,
-		buf: [8]byte{},
-	}, nil
-}
-
-func (e *EventFD) Kick() error {
-	binary.LittleEndian.PutUint64(e.buf[:], 1) //is this right???
-	_, err := syscall.Write(int(e.fd), e.buf[:])
-	return err
-}
-
-func (e *EventFD) Close() error {
-	if e.fd != 0 {
-		return unix.Close(e.fd)
-	}
-	return nil
-}
-
-func (e *EventFD) FD() int {
-	return e.fd
-}
-
-type Epoll struct {
-	fd     int
-	buf    [8]byte
-	events []syscall.EpollEvent
-}
-
-func NewEpoll() (Epoll, error) {
-	fd, err := unix.EpollCreate1(0)
-	if err != nil {
-		return Epoll{}, err
-	}
-	return Epoll{
-		fd:     fd,
-		buf:    [8]byte{},
-		events: make([]syscall.EpollEvent, 1),
-	}, nil
-}
-
-func (ep *Epoll) AddEvent(fdToAdd int) error {
-	event := syscall.EpollEvent{
-		Events: syscall.EPOLLIN,
-		Fd:     int32(fdToAdd),
-	}
-	return syscall.EpollCtl(ep.fd, syscall.EPOLL_CTL_ADD, fdToAdd, &event)
-}
-
-func (ep *Epoll) Block() (int, error) {
-	n, err := syscall.EpollWait(ep.fd, ep.events, -1)
-	if err != nil {
-		//goland:noinspection GoDirectComparisonOfErrors
-		if err == syscall.EINTR {
-			return 0, nil //??
-		}
-		return -1, err
-	}
-	return n, nil
-}
-
-func (ep *Epoll) Clear() error {
-	_, err := syscall.Read(int(ep.events[0].Fd), ep.buf[:])
-	return err
-}
-
-func (ep *Epoll) Close() error {
-	if ep.fd != 0 {
-		return unix.Close(ep.fd)
-	}
-	return nil
-}
--- a/overlay/tun.go
+++ b/overlay/tun.go
@@ -2,29 +2,16 @@ package overlay

 import (
 	"fmt"
-	"io"
 	"net"
 	"net/netip"

 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/config"
-	"github.com/slackhq/nebula/packet"
 	"github.com/slackhq/nebula/util"
 )

 const DefaultMTU = 1300

-type TunDev interface {
-	io.WriteCloser
-	ReadMany(x []*packet.VirtIOPacket, q int) (int, error)
-
-	//todo this interface sux
-	AllocSeg(pkt *packet.OutPacket, q int) (int, error)
-	WriteOne(x *packet.OutPacket, kick bool, q int) (int, error)
-	WriteMany(x []*packet.OutPacket, q int) (int, error)
-	RecycleRxSeg(pkt *packet.VirtIOPacket, kick bool, q int) error
-}
-
 // TODO: We may be able to remove routines
 type DeviceFactory func(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, routines int) (Device, error)

@@ -39,11 +26,11 @@ func NewDeviceFromConfig(c *config.C, l *logrus.Logger, vpnNetworks []netip.Pref
 	}
 }

-//func NewFdDeviceFromConfig(fd *int) DeviceFactory {
-//	return func(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, routines int) (Device, error) {
-//		return newTunFromFd(c, l, *fd, vpnNetworks)
-//	}
-//}
+func NewFdDeviceFromConfig(fd *int) DeviceFactory {
+	return func(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, routines int) (Device, error) {
+		return newTunFromFd(c, l, *fd, vpnNetworks)
+	}
+}

 func getAllRoutesFromConfig(c *config.C, vpnNetworks []netip.Prefix, initial bool) (bool, []Route, error) {
 	if !initial && !c.HasChanged("tun.routes") && !c.HasChanged("tun.unsafe_routes") {
--- a/overlay/tun_disabled.go
+++ b/overlay/tun_disabled.go
@@ -9,8 +9,6 @@ import (
 	"github.com/rcrowley/go-metrics"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/iputil"
-	"github.com/slackhq/nebula/overlay/virtqueue"
-	"github.com/slackhq/nebula/packet"
 	"github.com/slackhq/nebula/routing"
 )

@@ -24,10 +22,6 @@ type disabledTun struct {
 	l  *logrus.Logger
 }

-func (*disabledTun) RecycleRxSeg(pkt *packet.VirtIOPacket, kick bool, q int) error {
-	return nil
-}
-
 func newDisabledTun(vpnNetworks []netip.Prefix, queueLen int, metricsEnabled bool, l *logrus.Logger) *disabledTun {
 	tun := &disabledTun{
 		vpnNetworks: vpnNetworks,
@@ -46,10 +40,6 @@ func newDisabledTun(vpnNetworks []netip.Prefix, queueLen int, metricsEnabled boo
 	return tun
 }

-func (*disabledTun) GetQueues() []*virtqueue.SplitQueue {
-	return nil
-}
-
 func (*disabledTun) Activate() error {
 	return nil
 }
@@ -115,23 +105,7 @@ func (t *disabledTun) Write(b []byte) (int, error) {
 	return len(b), nil
 }

-func (t *disabledTun) AllocSeg(pkt *packet.OutPacket, q int) (int, error) {
-	return 0, fmt.Errorf("tun_disabled: AllocSeg not implemented")
-}
-
-func (t *disabledTun) WriteOne(x *packet.OutPacket, kick bool, q int) (int, error) {
-	return 0, fmt.Errorf("tun_disabled: WriteOne not implemented")
-}
-
-func (t *disabledTun) WriteMany(x []*packet.OutPacket, q int) (int, error) {
-	return 0, fmt.Errorf("tun_disabled: WriteMany not implemented")
-}
-
-func (t *disabledTun) ReadMany(b []*packet.VirtIOPacket, _ int) (int, error) {
-	return t.Read(b[0].Payload)
-}
-
-func (t *disabledTun) NewMultiQueueReader() (TunDev, error) {
+func (t *disabledTun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
 	return t, nil
 }

--- a/overlay/tun_linux.go
+++ b/overlay/tun_linux.go
@@ -5,9 +5,11 @@ package overlay

 import (
 	"fmt"
+	"io"
 	"net"
 	"net/netip"
 	"os"
+	"runtime"
 	"strings"
 	"sync/atomic"
 	"time"
@@ -16,19 +18,16 @@ import (
 	"github.com/gaissmai/bart"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/config"
-	"github.com/slackhq/nebula/overlay/vhostnet"
-	"github.com/slackhq/nebula/packet"
 	"github.com/slackhq/nebula/routing"
 	"github.com/slackhq/nebula/util"
-	"github.com/slackhq/nebula/util/virtio"
+	wgtun "github.com/slackhq/nebula/wgstack/tun"
 	"github.com/vishvananda/netlink"
 	"golang.org/x/sys/unix"
 )

 type tun struct {
-	file        *os.File
+	io.ReadWriteCloser
 	fd          int
-	vdev        []*vhostnet.Device
 	Device      string
 	vpnNetworks []netip.Prefix
 	MaxMTU      int
@@ -36,6 +35,7 @@ type tun struct {
 	TXQueueLen  int
 	deviceIndex int
 	ioctlFd     uintptr
+	wgDevice    wgtun.Device

 	Routes                    atomic.Pointer[[]Route]
 	routeTree                 atomic.Pointer[bart.Table[routing.Gateways]]
@@ -43,8 +43,7 @@ type tun struct {
 	useSystemRoutes           bool
 	useSystemRoutesBufferSize int

-	isV6 bool
-	l    *logrus.Logger
+	l *logrus.Logger
 }

 func (t *tun) Networks() []netip.Prefix {
@@ -72,7 +71,9 @@ type ifreqQLEN struct {
 func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, vpnNetworks []netip.Prefix) (*tun, error) {
 	file := os.NewFile(uintptr(deviceFd), "/dev/net/tun")

-	t, err := newTunGeneric(c, l, file, vpnNetworks)
+	useWGDefault := runtime.GOOS == "linux"
+	useWG := c.GetBool("tun.use_wireguard_stack", c.GetBool("listen.use_wireguard_stack", useWGDefault))
+	t, err := newTunGeneric(c, l, file, vpnNetworks, useWG)
 	if err != nil {
 		return nil, err
 	}
@@ -106,7 +107,7 @@ func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, multiqueu
 	}

 	var req ifReq
-	req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI | unix.IFF_TUN_EXCL | unix.IFF_VNET_HDR | unix.IFF_NAPI)
+	req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI)
 	if multiqueue {
 		req.Flags |= unix.IFF_MULTI_QUEUE
 	}
@@ -116,56 +117,57 @@ func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, multiqueu
 	}
 	name := strings.Trim(string(req.Name[:]), "\x00")

-	if err = unix.SetNonblock(fd, true); err != nil {
-		_ = unix.Close(fd)
-		return nil, fmt.Errorf("make file descriptor non-blocking: %w", err)
-	}
-
 	file := os.NewFile(uintptr(fd), "/dev/net/tun")
-
-	err = unix.IoctlSetPointerInt(fd, unix.TUNSETVNETHDRSZ, virtio.NetHdrSize)
-	if err != nil {
-		return nil, fmt.Errorf("set vnethdr size: %w", err)
-	}
-
-	flags := 0
-	//flags = //unix.TUN_F_CSUM //| unix.TUN_F_TSO4 | unix.TUN_F_USO4 | unix.TUN_F_TSO6 | unix.TUN_F_USO6
-	err = unix.IoctlSetInt(fd, unix.TUNSETOFFLOAD, flags)
-	if err != nil {
-		return nil, fmt.Errorf("set offloads: %w", err)
-	}
-
-	t, err := newTunGeneric(c, l, file, vpnNetworks)
+	useWGDefault := runtime.GOOS == "linux"
+	useWG := c.GetBool("tun.use_wireguard_stack", c.GetBool("listen.use_wireguard_stack", useWGDefault))
+	t, err := newTunGeneric(c, l, file, vpnNetworks, useWG)
 	if err != nil {
 		return nil, err
 	}
-	t.fd = fd
+
 	t.Device = name

-	vdev, err := vhostnet.NewDevice(
-		vhostnet.WithBackendFD(fd),
-		vhostnet.WithQueueSize(8192), //todo config
-	)
-	if err != nil {
-		return nil, err
-	}
-	t.vdev = []*vhostnet.Device{vdev}
-
 	return t, nil
 }

-func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, vpnNetworks []netip.Prefix) (*tun, error) {
+func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, vpnNetworks []netip.Prefix, useWireguard bool) (*tun, error) {
+	var (
+		rw    io.ReadWriteCloser = file
+		fd                       = int(file.Fd())
+		wgDev wgtun.Device
+	)
+
+	if useWireguard {
+		dev, err := wgtun.CreateTUNFromFile(file, c.GetInt("tun.mtu", DefaultMTU))
+		if err != nil {
+			return nil, fmt.Errorf("failed to initialize wireguard tun device: %w", err)
+		}
+		wgDev = dev
+		rw = newWireguardTunIO(dev, c.GetInt("tun.mtu", DefaultMTU))
+		fd = int(dev.File().Fd())
+	}
+
 	t := &tun{
-		file:                      file,
-		fd:                        int(file.Fd()),
+		ReadWriteCloser:           rw,
+		fd:                        fd,
 		vpnNetworks:               vpnNetworks,
 		TXQueueLen:                c.GetInt("tun.tx_queue", 500),
 		useSystemRoutes:           c.GetBool("tun.use_system_route_table", false),
 		useSystemRoutesBufferSize: c.GetInt("tun.use_system_route_table_buffer_size", 0),
 		l:                         l,
 	}
-	if len(vpnNetworks) != 0 {
-		t.isV6 = vpnNetworks[0].Addr().Is6() //todo what about multi-IP?
+	if wgDev != nil {
+		t.wgDevice = wgDev
+	}
+	if wgDev != nil {
+		// replace ioctl fd with device file descriptor to keep route management working
+		file = wgDev.File()
+		t.fd = int(file.Fd())
+		t.ioctlFd = file.Fd()
+	}
+
+	if t.ioctlFd == 0 {
+		t.ioctlFd = file.Fd()
 	}

 	err := t.reload(c, true)
@@ -250,7 +252,7 @@ func (t *tun) reload(c *config.C, initial bool) error {
 	return nil
 }

-func (t *tun) NewMultiQueueReader() (TunDev, error) {
+func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
 	fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
 	if err != nil {
 		return nil, err
@@ -263,17 +265,9 @@ func (t *tun) NewMultiQueueReader() (TunDev, error) {
 		return nil, err
 	}

-	vdev, err := vhostnet.NewDevice(
-		vhostnet.WithBackendFD(fd),
-		vhostnet.WithQueueSize(8192), //todo config
-	)
-	if err != nil {
-		return nil, err
-	}
+	file := os.NewFile(uintptr(fd), "/dev/net/tun")

-	t.vdev = append(t.vdev, vdev)
-
-	return t, nil
+	return file, nil
 }

 func (t *tun) RoutesFor(ip netip.Addr) routing.Gateways {
@@ -281,6 +275,29 @@ func (t *tun) RoutesFor(ip netip.Addr) routing.Gateways {
 	return r
 }

+func (t *tun) Write(b []byte) (int, error) {
+	var nn int
+	maximum := len(b)
+
+	for {
+		n, err := unix.Write(t.fd, b[nn:maximum])
+		if n > 0 {
+			nn += n
+		}
+		if nn == len(b) {
+			return nn, err
+		}
+
+		if err != nil {
+			return nn, err
+		}
+
+		if n == 0 {
+			return nn, io.ErrUnexpectedEOF
+		}
+	}
+}
+
 func (t *tun) deviceBytes() (o [16]byte) {
 	for i, c := range t.Device {
 		o[i] = byte(c)
@@ -693,14 +710,16 @@ func (t *tun) Close() error {
 		close(t.routeChan)
 	}

-	for _, v := range t.vdev {
-		if v != nil {
-			_ = v.Close()
-		}
+	if t.ReadWriteCloser != nil {
+		_ = t.ReadWriteCloser.Close()
 	}

-	if t.file != nil {
-		_ = t.file.Close()
+	if t.wgDevice != nil {
+		_ = t.wgDevice.Close()
+		if t.ioctlFd > 0 {
+			// underlying fd already closed by the device
+			t.ioctlFd = 0
+		}
 	}

 	if t.ioctlFd > 0 {
@@ -709,65 +728,3 @@ func (t *tun) Close() error {

 	return nil
 }
-
-func (t *tun) ReadMany(p []*packet.VirtIOPacket, q int) (int, error) {
-	n, err := t.vdev[q].ReceivePackets(p) //we are TXing
-	if err != nil {
-		return 0, err
-	}
-	return n, nil
-}
-
-func (t *tun) Write(b []byte) (int, error) {
-	maximum := len(b) //we are RXing
-
-	//todo garbagey
-	out := packet.NewOut()
-	x, err := t.AllocSeg(out, 0)
-	if err != nil {
-		return 0, err
-	}
-	copy(out.SegmentPayloads[x], b)
-	err = t.vdev[0].TransmitPacket(out, true)
-
-	if err != nil {
-		t.l.WithError(err).Error("Transmitting packet")
-		return 0, err
-	}
-	return maximum, nil
-}
-
-func (t *tun) AllocSeg(pkt *packet.OutPacket, q int) (int, error) {
-	idx, buf, err := t.vdev[q].GetPacketForTx()
-	if err != nil {
-		return 0, err
-	}
-	x := pkt.UseSegment(idx, buf, t.isV6)
-	return x, nil
-}
-
-func (t *tun) WriteOne(x *packet.OutPacket, kick bool, q int) (int, error) {
-	if err := t.vdev[q].TransmitPacket(x, kick); err != nil {
-		t.l.WithError(err).Error("Transmitting packet")
-		return 0, err
-	}
-	return 1, nil
-}
-
-func (t *tun) WriteMany(x []*packet.OutPacket, q int) (int, error) {
-	maximum := len(x) //we are RXing
-	if maximum == 0 {
-		return 0, nil
-	}
-
-	err := t.vdev[q].TransmitPackets(x)
-	if err != nil {
-		t.l.WithError(err).Error("Transmitting packet")
-		return 0, err
-	}
-	return maximum, nil
-}
-
-func (t *tun) RecycleRxSeg(pkt *packet.VirtIOPacket, kick bool, q int) error {
-	return t.vdev[q].ReceiveQueue.OfferDescriptorChains(pkt.Chains, kick)
-}
--- a/overlay/tun_linux_batch.go
+++ b/overlay/tun_linux_batch.go
@@ -0,0 +1,56 @@
+//go:build linux && !android && !e2e_testing
+
+package overlay
+
+import "fmt"
+
+func (t *tun) batchIO() (*wireguardTunIO, bool) {
+	io, ok := t.ReadWriteCloser.(*wireguardTunIO)
+	return io, ok
+}
+
+func (t *tun) ReadIntoBatch(pool *PacketPool) ([]*Packet, error) {
+	io, ok := t.batchIO()
+	if !ok {
+		return nil, fmt.Errorf("wireguard batch I/O not enabled")
+	}
+	return io.ReadIntoBatch(pool)
+}
+
+func (t *tun) WriteBatch(packets []*Packet) (int, error) {
+	io, ok := t.batchIO()
+	if ok {
+		return io.WriteBatch(packets)
+	}
+	for _, pkt := range packets {
+		if pkt == nil {
+			continue
+		}
+		if _, err := t.Write(pkt.Payload()[:pkt.Len]); err != nil {
+			return 0, err
+		}
+		pkt.Release()
+	}
+	return len(packets), nil
+}
+
+func (t *tun) BatchHeadroom() int {
+	if io, ok := t.batchIO(); ok {
+		return io.BatchHeadroom()
+	}
+	return 0
+}
+
+func (t *tun) BatchPayloadCap() int {
+	if io, ok := t.batchIO(); ok {
+		return io.BatchPayloadCap()
+	}
+	return 0
+}
+
+func (t *tun) BatchSize() int {
+	if io, ok := t.batchIO(); ok {
+		return io.BatchSize()
+	}
+	return 1
+}
--- a/overlay/user.go
+++ b/overlay/user.go
@@ -1,13 +1,11 @@
 package overlay

 import (
-	"fmt"
 	"io"
 	"net/netip"

 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/config"
-	"github.com/slackhq/nebula/packet"
 	"github.com/slackhq/nebula/routing"
 )

@@ -38,10 +36,6 @@ type UserDevice struct {
 	inboundWriter *io.PipeWriter
 }

-func (d *UserDevice) RecycleRxSeg(pkt *packet.VirtIOPacket, kick bool, q int) error {
-	return nil
-}
-
 func (d *UserDevice) Activate() error {
 	return nil
 }
@@ -52,7 +46,7 @@ func (d *UserDevice) RoutesFor(ip netip.Addr) routing.Gateways {
 	return routing.Gateways{routing.NewGateway(ip, 1)}
 }

-func (d *UserDevice) NewMultiQueueReader() (TunDev, error) {
+func (d *UserDevice) NewMultiQueueReader() (io.ReadWriteCloser, error) {
 	return d, nil
 }

@@ -71,19 +65,3 @@ func (d *UserDevice) Close() error {
 	d.outboundWriter.Close()
 	return nil
 }
-
-func (d *UserDevice) ReadMany(b []*packet.VirtIOPacket, _ int) (int, error) {
-	return d.Read(b[0].Payload)
-}
-
-func (d *UserDevice) AllocSeg(pkt *packet.OutPacket, q int) (int, error) {
-	return 0, fmt.Errorf("user: AllocSeg not implemented")
-}
-
-func (d *UserDevice) WriteOne(x *packet.OutPacket, kick bool, q int) (int, error) {
-	return 0, fmt.Errorf("user: WriteOne not implemented")
-}
-
-func (d *UserDevice) WriteMany(x []*packet.OutPacket, q int) (int, error) {
-	return 0, fmt.Errorf("user: WriteMany not implemented")
-}
--- a/overlay/vhost/README.md
+++ b/overlay/vhost/README.md
@@ -1,23 +0,0 @@
-Significant portions of this code are derived from https://pkg.go.dev/github.com/hetznercloud/virtio-go
-
-MIT License
-
-Copyright (c) 2025 Hetzner Cloud GmbH
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/overlay/vhost/doc.go
+++ b/overlay/vhost/doc.go
@@ -1,4 +0,0 @@
-// Package vhost implements the basic ioctl requests needed to interact with the
-// kernel-level virtio server that provides accelerated virtio devices for
-// networking and more.
-package vhost
--- a/overlay/vhost/ioctl.go
+++ b/overlay/vhost/ioctl.go
@@ -1,218 +0,0 @@
-package vhost
-
-import (
-	"fmt"
-	"unsafe"
-
-	"github.com/slackhq/nebula/overlay/virtqueue"
-	"github.com/slackhq/nebula/util/virtio"
-	"golang.org/x/sys/unix"
-)
-
-const (
-	// vhostIoctlGetFeatures can be used to retrieve the features supported by
-	// the vhost implementation in the kernel.
-	//
-	// Response payload: [virtio.Feature]
-	// Kernel name: VHOST_GET_FEATURES
-	vhostIoctlGetFeatures = 0x8008af00
-
-	// vhostIoctlSetFeatures can be used to communicate the features supported
-	// by this virtio implementation to the kernel.
-	//
-	// Request payload: [virtio.Feature]
-	// Kernel name: VHOST_SET_FEATURES
-	vhostIoctlSetFeatures = 0x4008af00
-
-	// vhostIoctlSetOwner can be used to set the current process as the
-	// exclusive owner of a control file descriptor.
-	//
-	// Request payload: none
-	// Kernel name: VHOST_SET_OWNER
-	vhostIoctlSetOwner = 0x0000af01
-
-	// vhostIoctlSetMemoryLayout can be used to set up or modify the memory
-	// layout which describes the IOTLB mappings in the kernel.
-	//
-	// Request payload: [MemoryLayout] with custom serialization
-	// Kernel name: VHOST_SET_MEM_TABLE
-	vhostIoctlSetMemoryLayout = 0x4008af03
-
-	// vhostIoctlSetQueueSize can be used to set the size of the virtqueue.
-	//
-	// Request payload: [QueueState]
-	// Kernel name: VHOST_SET_VRING_NUM
-	vhostIoctlSetQueueSize = 0x4008af10
-
-	// vhostIoctlSetQueueAddress can be used to set the addresses of the
-	// different parts of the virtqueue.
-	//
-	// Request payload: [QueueAddresses]
-	// Kernel name: VHOST_SET_VRING_ADDR
-	vhostIoctlSetQueueAddress = 0x4028af11
-
-	// vhostIoctlSetAvailableRingBase can be used to set the index of the next
-	// available ring entry the device will process.
-	//
-	// Request payload: [QueueState]
-	// Kernel name: VHOST_SET_VRING_BASE
-	vhostIoctlSetAvailableRingBase = 0x4008af12
-
-	// vhostIoctlSetQueueKickEventFD can be used to set the event file
-	// descriptor to signal the device when descriptor chains were added to the
-	// available ring.
-	//
-	// Request payload: [QueueFile]
-	// Kernel name: VHOST_SET_VRING_KICK
-	vhostIoctlSetQueueKickEventFD = 0x4008af20
-
-	// vhostIoctlSetQueueCallEventFD can be used to set the event file
-	// descriptor that gets signaled by the device when descriptor chains have
-	// been used by it.
-	//
-	// Request payload: [QueueFile]
-	// Kernel name: VHOST_SET_VRING_CALL
-	vhostIoctlSetQueueCallEventFD = 0x4008af21
-)
-
-// QueueState is an ioctl request payload that can hold a queue index and any
-// 32-bit number.
-//
-// Kernel name: vhost_vring_state
-type QueueState struct {
-	// QueueIndex is the index of the virtqueue.
-	QueueIndex uint32
-	// Num is any 32-bit number, depending on the request.
-	Num uint32
-}
-
-// QueueAddresses is an ioctl request payload that can hold the addresses of the
-// different parts of a virtqueue.
-//
-// Kernel name: vhost_vring_addr
-type QueueAddresses struct {
-	// QueueIndex is the index of the virtqueue.
-	QueueIndex uint32
-	// Flags that are not used in this implementation.
-	Flags uint32
-	// DescriptorTableAddress is the address of the descriptor table in user
-	// space memory. It must be 16-byte aligned.
-	DescriptorTableAddress uintptr
-	// UsedRingAddress is the address of the used ring in user space memory. It
-	// must be 4-byte aligned.
-	UsedRingAddress uintptr
-	// AvailableRingAddress is the address of the available ring in user space
-	// memory. It must be 2-byte aligned.
-	AvailableRingAddress uintptr
-	// LogAddress is used for an optional logging support, not supported by this
-	// implementation.
-	LogAddress uintptr
-}
-
-// QueueFile is an ioctl request payload that can hold a queue index and a file
-// descriptor.
-//
-// Kernel name: vhost_vring_file
-type QueueFile struct {
-	// QueueIndex is the index of the virtqueue.
-	QueueIndex uint32
-	// FD is the file descriptor of the file. Pass -1 to unbind from a file.
-	FD int32
-}
-
-// IoctlPtr is a copy of the similarly named unexported function from the Go
-// unix package. This is needed to do custom ioctl requests not supported by the
-// standard library.
-func IoctlPtr(fd int, req uint, arg unsafe.Pointer) error {
-	_, _, err := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg))
-	if err != 0 {
-		return fmt.Errorf("ioctl request %d: %w", req, err)
-	}
-	return nil
-}
-
-// GetFeatures requests the supported feature bits from the virtio device
-// associated with the given control file descriptor.
-func GetFeatures(controlFD int) (virtio.Feature, error) {
-	var features virtio.Feature
-	if err := IoctlPtr(controlFD, vhostIoctlGetFeatures, unsafe.Pointer(&features)); err != nil {
-		return 0, fmt.Errorf("get features: %w", err)
-	}
-	return features, nil
-}
-
-// SetFeatures communicates the feature bits supported by this implementation
-// to the virtio device associated with the given control file descriptor.
-func SetFeatures(controlFD int, features virtio.Feature) error {
-	if err := IoctlPtr(controlFD, vhostIoctlSetFeatures, unsafe.Pointer(&features)); err != nil {
-		return fmt.Errorf("set features: %w", err)
-	}
-	return nil
-}
-
-// OwnControlFD sets the current process as the exclusive owner for the
-// given control file descriptor. This must be called before interacting with
-// the control file descriptor in any other way.
-func OwnControlFD(controlFD int) error {
-	if err := IoctlPtr(controlFD, vhostIoctlSetOwner, unsafe.Pointer(nil)); err != nil {
-		return fmt.Errorf("set control file descriptor owner: %w", err)
-	}
-	return nil
-}
-
-// SetMemoryLayout sets up or modifies the memory layout for the kernel-level
-// virtio device associated with the given control file descriptor.
-func SetMemoryLayout(controlFD int, layout MemoryLayout) error {
-	payload := layout.serializePayload()
-	if err := IoctlPtr(controlFD, vhostIoctlSetMemoryLayout, unsafe.Pointer(&payload[0])); err != nil {
-		return fmt.Errorf("set memory layout: %w", err)
-	}
-	return nil
-}
-
-// RegisterQueue registers a virtio queue with the kernel-level virtio server.
-// The virtqueue will be linked to the given control file descriptor and will
-// have the given index. The kernel will use this queue until the control file
-// descriptor is closed.
-func RegisterQueue(controlFD int, queueIndex uint32, queue *virtqueue.SplitQueue) error {
-	if err := IoctlPtr(controlFD, vhostIoctlSetQueueSize, unsafe.Pointer(&QueueState{
-		QueueIndex: queueIndex,
-		Num:        uint32(queue.Size()),
-	})); err != nil {
-		return fmt.Errorf("set queue size: %w", err)
-	}
-
-	if err := IoctlPtr(controlFD, vhostIoctlSetQueueAddress, unsafe.Pointer(&QueueAddresses{
-		QueueIndex:             queueIndex,
-		Flags:                  0,
-		DescriptorTableAddress: queue.DescriptorTable().Address(),
-		UsedRingAddress:        queue.UsedRing().Address(),
-		AvailableRingAddress:   queue.AvailableRing().Address(),
-		LogAddress:             0,
-	})); err != nil {
-		return fmt.Errorf("set queue addresses: %w", err)
-	}
-
-	if err := IoctlPtr(controlFD, vhostIoctlSetAvailableRingBase, unsafe.Pointer(&QueueState{
-		QueueIndex: queueIndex,
-		Num:        0,
-	})); err != nil {
-		return fmt.Errorf("set available ring base: %w", err)
-	}
-
-	if err := IoctlPtr(controlFD, vhostIoctlSetQueueKickEventFD, unsafe.Pointer(&QueueFile{
-		QueueIndex: queueIndex,
-		FD:         int32(queue.KickEventFD()),
-	})); err != nil {
-		return fmt.Errorf("set kick event file descriptor: %w", err)
-	}
-
-	if err := IoctlPtr(controlFD, vhostIoctlSetQueueCallEventFD, unsafe.Pointer(&QueueFile{
-		QueueIndex: queueIndex,
-		FD:         int32(queue.CallEventFD()),
-	})); err != nil {
-		return fmt.Errorf("set call event file descriptor: %w", err)
-	}
-
-	return nil
-}
--- a/overlay/vhost/ioctl_test.go
+++ b/overlay/vhost/ioctl_test.go
@@ -1,21 +0,0 @@
-package vhost_test
-
-import (
-	"testing"
-	"unsafe"
-
-	"github.com/slackhq/nebula/overlay/vhost"
-	"github.com/stretchr/testify/assert"
-)
-
-func TestQueueState_Size(t *testing.T) {
-	assert.EqualValues(t, 8, unsafe.Sizeof(vhost.QueueState{}))
-}
-
-func TestQueueAddresses_Size(t *testing.T) {
-	assert.EqualValues(t, 40, unsafe.Sizeof(vhost.QueueAddresses{}))
-}
-
-func TestQueueFile_Size(t *testing.T) {
-	assert.EqualValues(t, 8, unsafe.Sizeof(vhost.QueueFile{}))
-}
--- a/overlay/vhost/memory.go
+++ b/overlay/vhost/memory.go
@@ -1,73 +0,0 @@
-package vhost
-
-import (
-	"encoding/binary"
-	"fmt"
-	"unsafe"
-
-	"github.com/slackhq/nebula/overlay/virtqueue"
-)
-
-// MemoryRegion describes a region of userspace memory which is being made
-// accessible to a vhost device.
-//
-// Kernel name: vhost_memory_region
-type MemoryRegion struct {
-	// GuestPhysicalAddress is the physical address of the memory region within
-	// the guest, when virtualization is used. When no virtualization is used,
-	// this should be the same as UserspaceAddress.
-	GuestPhysicalAddress uintptr
-	// Size is the size of the memory region.
-	Size uint64
-	// UserspaceAddress is the virtual address in the userspace of the host
-	// where the memory region can be found.
-	UserspaceAddress uintptr
-	// Padding and room for flags. Currently unused.
-	_ uint64
-}
-
-// MemoryLayout is a list of [MemoryRegion]s.
-type MemoryLayout []MemoryRegion
-
-// NewMemoryLayoutForQueues returns a new [MemoryLayout] that describes the
-// memory pages used by the descriptor tables of the given queues.
-func NewMemoryLayoutForQueues(queues []*virtqueue.SplitQueue) MemoryLayout {
-	regions := make([]MemoryRegion, 0)
-	for _, queue := range queues {
-		for address, size := range queue.DescriptorTable().BufferAddresses() {
-			regions = append(regions, MemoryRegion{
-				// There is no virtualization in play here, so the guest address
-				// is the same as in the host's userspace.
-				GuestPhysicalAddress: address,
-				Size:                 uint64(size),
-				UserspaceAddress:     address,
-			})
-		}
-	}
-	return regions
-}
-
-// serializePayload serializes the list of memory regions into a format that is
-// compatible to the vhost_memory kernel struct. The returned byte slice can be
-// used as a payload for the vhostIoctlSetMemoryLayout ioctl.
-func (regions MemoryLayout) serializePayload() []byte {
-	regionCount := len(regions)
-	regionSize := int(unsafe.Sizeof(MemoryRegion{}))
-	payload := make([]byte, 8+regionCount*regionSize)
-
-	// The first 32 bits contain the number of memory regions. The following 32
-	// bits are padding.
-	binary.LittleEndian.PutUint32(payload[0:4], uint32(regionCount))
-
-	if regionCount > 0 {
-		// The underlying byte array of the slice should already have the correct
-		// format, so just copy that.
-		copied := copy(payload[8:], unsafe.Slice((*byte)(unsafe.Pointer(&regions[0])), regionCount*regionSize))
-		if copied != regionCount*regionSize {
-			panic(fmt.Sprintf("copied only %d bytes of the memory regions, but expected %d",
-				copied, regionCount*regionSize))
-		}
-	}
-
-	return payload
-}
--- a/overlay/vhost/memory_internal_test.go
+++ b/overlay/vhost/memory_internal_test.go
@@ -1,42 +0,0 @@
-package vhost
-
-import (
-	"testing"
-	"unsafe"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestMemoryRegion_Size(t *testing.T) {
-	assert.EqualValues(t, 32, unsafe.Sizeof(MemoryRegion{}))
-}
-
-func TestMemoryLayout_SerializePayload(t *testing.T) {
-	layout := MemoryLayout([]MemoryRegion{
-		{
-			GuestPhysicalAddress: 42,
-			Size:                 100,
-			UserspaceAddress:     142,
-		}, {
-			GuestPhysicalAddress: 99,
-			Size:                 100,
-			UserspaceAddress:     99,
-		},
-	})
-	payload := layout.serializePayload()
-
-	assert.Equal(t, []byte{
-		0x02, 0x00, 0x00, 0x00, // nregions
-		0x00, 0x00, 0x00, 0x00, // padding
-		// region 0
-		0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // guest_phys_addr
-		0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // memory_size
-		0x8e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // userspace_addr
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // flags_padding
-		// region 1
-		0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // guest_phys_addr
-		0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // memory_size
-		0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // userspace_addr
-		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // flags_padding
-	}, payload)
-}
--- a/overlay/vhostnet/README.md
+++ b/overlay/vhostnet/README.md
@@ -1,23 +0,0 @@
-Significant portions of this code are derived from https://pkg.go.dev/github.com/hetznercloud/virtio-go
-
-MIT License
-
-Copyright (c) 2025 Hetzner Cloud GmbH
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/overlay/vhostnet/device.go
+++ b/overlay/vhostnet/device.go
@@ -1,427 +0,0 @@
-package vhostnet
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"os"
-	"runtime"
-
-	"github.com/slackhq/nebula/overlay/vhost"
-	"github.com/slackhq/nebula/overlay/virtqueue"
-	"github.com/slackhq/nebula/packet"
-	"github.com/slackhq/nebula/util/virtio"
-	"golang.org/x/sys/unix"
-)
-
-// ErrDeviceClosed is returned when the [Device] is closed while operations are
-// still running.
-var ErrDeviceClosed = errors.New("device was closed")
-
-// The indexes for the receive and transmit queues.
-const (
-	receiveQueueIndex  = 0
-	transmitQueueIndex = 1
-)
-
-// Device represents a vhost networking device within the kernel-level virtio
-// implementation and provides methods to interact with it.
-type Device struct {
-	initialized bool
-	controlFD   int
-
-	fullTable     bool
-	ReceiveQueue  *virtqueue.SplitQueue
-	TransmitQueue *virtqueue.SplitQueue
-}
-
-// NewDevice initializes a new vhost networking device within the
-// kernel-level virtio implementation, sets up the virtqueues and returns a
-// [Device] instance that can be used to communicate with that vhost device.
-//
-// There are multiple options that can be passed to this constructor to
-// influence device creation:
-//   - [WithQueueSize]
-//   - [WithBackendFD]
-//   - [WithBackendDevice]
-//
-// Remember to call [Device.Close] after use to free up resources.
-func NewDevice(options ...Option) (*Device, error) {
-	var err error
-	opts := optionDefaults
-	opts.apply(options)
-	if err = opts.validate(); err != nil {
-		return nil, fmt.Errorf("invalid options: %w", err)
-	}
-
-	dev := Device{
-		controlFD: -1,
-	}
-
-	// Clean up a partially initialized device when something fails.
-	defer func() {
-		if err != nil {
-			_ = dev.Close()
-		}
-	}()
-
-	// Retrieve a new control file descriptor. This will be used to configure
-	// the vhost networking device in the kernel.
-	dev.controlFD, err = unix.Open("/dev/vhost-net", os.O_RDWR, 0666)
-	if err != nil {
-		return nil, fmt.Errorf("get control file descriptor: %w", err)
-	}
-	if err = vhost.OwnControlFD(dev.controlFD); err != nil {
-		return nil, fmt.Errorf("own control file descriptor: %w", err)
-	}
-
-	// Advertise the supported features. This isn't much for now.
-	// TODO: Add feature options and implement proper feature negotiation.
-	getFeatures, err := vhost.GetFeatures(dev.controlFD) //0x1033D008000 but why
-	if err != nil {
-		return nil, fmt.Errorf("get features: %w", err)
-	}
-	if getFeatures == 0 {
-
-	}
-	//const funky = virtio.Feature(1 << 27)
-	//features := virtio.FeatureVersion1 | funky // | todo virtio.FeatureNetMergeRXBuffers
-	features := virtio.FeatureVersion1 | virtio.FeatureNetMergeRXBuffers
-	if err = vhost.SetFeatures(dev.controlFD, features); err != nil {
-		return nil, fmt.Errorf("set features: %w", err)
-	}
-
-	itemSize := os.Getpagesize() * 4 //todo config
-
-	// Initialize and register the queues needed for the networking device.
-	if dev.ReceiveQueue, err = createQueue(dev.controlFD, receiveQueueIndex, opts.queueSize, itemSize); err != nil {
-		return nil, fmt.Errorf("create receive queue: %w", err)
-	}
-	if dev.TransmitQueue, err = createQueue(dev.controlFD, transmitQueueIndex, opts.queueSize, itemSize); err != nil {
-		return nil, fmt.Errorf("create transmit queue: %w", err)
-	}
-
-	// Set up memory mappings for all buffers used by the queues. This has to
-	// happen before a backend for the queues can be registered.
-	memoryLayout := vhost.NewMemoryLayoutForQueues(
-		[]*virtqueue.SplitQueue{dev.ReceiveQueue, dev.TransmitQueue},
-	)
-	if err = vhost.SetMemoryLayout(dev.controlFD, memoryLayout); err != nil {
-		return nil, fmt.Errorf("setup memory layout: %w", err)
-	}
-
-	// Set the queue backends. This activates the queues within the kernel.
-	if err = SetQueueBackend(dev.controlFD, receiveQueueIndex, opts.backendFD); err != nil {
-		return nil, fmt.Errorf("set receive queue backend: %w", err)
-	}
-	if err = SetQueueBackend(dev.controlFD, transmitQueueIndex, opts.backendFD); err != nil {
-		return nil, fmt.Errorf("set transmit queue backend: %w", err)
-	}
-
-	// Fully populate the receive queue with available buffers which the device
-	// can write new packets into.
-	if err = dev.refillReceiveQueue(); err != nil {
-		return nil, fmt.Errorf("refill receive queue: %w", err)
-	}
-	if err = dev.refillTransmitQueue(); err != nil {
-		return nil, fmt.Errorf("refill receive queue: %w", err)
-	}
-
-	dev.initialized = true
-
-	// Make sure to clean up even when the device gets garbage collected without
-	// Close being called first.
-	devPtr := &dev
-	runtime.SetFinalizer(devPtr, (*Device).Close)
-
-	return devPtr, nil
-}
-
-// refillReceiveQueue offers as many new device-writable buffers to the device
-// as the queue can fit. The device will then use these to write received
-// packets.
-func (dev *Device) refillReceiveQueue() error {
-	for {
-		_, err := dev.ReceiveQueue.OfferInDescriptorChains()
-		if err != nil {
-			if errors.Is(err, virtqueue.ErrNotEnoughFreeDescriptors) {
-				// Queue is full, job is done.
-				return nil
-			}
-			return fmt.Errorf("offer descriptor chain: %w", err)
-		}
-	}
-}
-
-func (dev *Device) refillTransmitQueue() error {
-	//for {
-	//	desc, err := dev.TransmitQueue.DescriptorTable().CreateDescriptorForOutputs()
-	//	if err != nil {
-	//		if errors.Is(err, virtqueue.ErrNotEnoughFreeDescriptors) {
-	//			// Queue is full, job is done.
-	//			return nil
-	//		}
-	//		return fmt.Errorf("offer descriptor chain: %w", err)
-	//	} else {
-	//		dev.TransmitQueue.UsedRing().InitOfferSingle(desc, 0)
-	//	}
-	//}
-	return nil
-}
-
-// Close cleans up the vhost networking device within the kernel and releases
-// all resources used for it.
-// The implementation will try to release as many resources as possible and
-// collect potential errors before returning them.
-func (dev *Device) Close() error {
-	dev.initialized = false
-
-	// Closing the control file descriptor will unregister all queues from the
-	// kernel.
-	if dev.controlFD >= 0 {
-		if err := unix.Close(dev.controlFD); err != nil {
-			// Return an error and do not continue, because the memory used for
-			// the queues should not be released before they were unregistered
-			// from the kernel.
-			return fmt.Errorf("close control file descriptor: %w", err)
-		}
-		dev.controlFD = -1
-	}
-
-	var errs []error
-
-	if dev.ReceiveQueue != nil {
-		if err := dev.ReceiveQueue.Close(); err == nil {
-			dev.ReceiveQueue = nil
-		} else {
-			errs = append(errs, fmt.Errorf("close receive queue: %w", err))
-		}
-	}
-
-	if dev.TransmitQueue != nil {
-		if err := dev.TransmitQueue.Close(); err == nil {
-			dev.TransmitQueue = nil
-		} else {
-			errs = append(errs, fmt.Errorf("close transmit queue: %w", err))
-		}
-	}
-
-	if len(errs) == 0 {
-		// Everything was cleaned up. No need to run the finalizer anymore.
-		runtime.SetFinalizer(dev, nil)
-	}
-
-	return errors.Join(errs...)
-}
-
-// ensureInitialized is used as a guard to prevent methods to be called on an
-// uninitialized instance.
-func (dev *Device) ensureInitialized() {
-	if !dev.initialized {
-		panic("device is not initialized")
-	}
-}
-
-// createQueue creates a new virtqueue and registers it with the vhost device
-// using the given index.
-func createQueue(controlFD int, queueIndex int, queueSize int, itemSize int) (*virtqueue.SplitQueue, error) {
-	var (
-		queue *virtqueue.SplitQueue
-		err   error
-	)
-	if queue, err = virtqueue.NewSplitQueue(queueSize, itemSize); err != nil {
-		return nil, fmt.Errorf("create virtqueue: %w", err)
-	}
-	if err = vhost.RegisterQueue(controlFD, uint32(queueIndex), queue); err != nil {
-		return nil, fmt.Errorf("register virtqueue with index %d: %w", queueIndex, err)
-	}
-	return queue, nil
-}
-
-// truncateBuffers returns a new list of buffers whose combined length matches
-// exactly the specified length. When the specified length exceeds the length of
-// the buffers, this is an error. When it is smaller, the buffer list will be
-// truncated accordingly.
-func truncateBuffers(buffers [][]byte, length int) (out [][]byte) {
-	for _, buffer := range buffers {
-		if length < len(buffer) {
-			out = append(out, buffer[:length])
-			return
-		}
-		out = append(out, buffer)
-		length -= len(buffer)
-	}
-	if length > 0 {
-		panic("length exceeds the combined length of all buffers")
-	}
-	return
-}
-
-func (dev *Device) GetPacketForTx() (uint16, []byte, error) {
-	var err error
-	var idx uint16
-	if !dev.fullTable {
-
-		idx, err = dev.TransmitQueue.DescriptorTable().CreateDescriptorForOutputs()
-		if err == virtqueue.ErrNotEnoughFreeDescriptors {
-			dev.fullTable = true
-			idx, err = dev.TransmitQueue.TakeSingle(context.TODO())
-		}
-	} else {
-		idx, err = dev.TransmitQueue.TakeSingle(context.TODO())
-	}
-	if err != nil {
-		return 0, nil, fmt.Errorf("transmit queue: %w", err)
-	}
-	buf, err := dev.TransmitQueue.GetDescriptorItem(idx)
-	if err != nil {
-		return 0, nil, fmt.Errorf("get descriptor chain: %w", err)
-	}
-	return idx, buf, nil
-}
-
-func (dev *Device) TransmitPacket(pkt *packet.OutPacket, kick bool) error {
-	if len(pkt.SegmentIDs) == 0 {
-		return nil
-	}
-	for idx := range pkt.SegmentIDs {
-		segmentID := pkt.SegmentIDs[idx]
-		dev.TransmitQueue.SetDescSize(segmentID, len(pkt.Segments[idx]))
-	}
-	err := dev.TransmitQueue.OfferDescriptorChains(pkt.SegmentIDs, false)
-	if err != nil {
-		return fmt.Errorf("offer descriptor chains: %w", err)
-	}
-	pkt.Reset()
-	if kick {
-		if err := dev.TransmitQueue.Kick(); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-func (dev *Device) TransmitPackets(pkts []*packet.OutPacket) error {
-	if len(pkts) == 0 {
-		return nil
-	}
-
-	for i := range pkts {
-		if err := dev.TransmitPacket(pkts[i], false); err != nil {
-			return err
-		}
-	}
-	if err := dev.TransmitQueue.Kick(); err != nil {
-		return err
-	}
-	return nil
-}
-
-// TODO: Make above methods cancelable by taking a context.Context argument?
-// TODO: Implement zero-copy variants to transmit and receive packets?
-
-// processChains processes as many chains as needed to create one packet. The number of processed chains is returned.
-func (dev *Device) processChains(pkt *packet.VirtIOPacket, chains []virtqueue.UsedElement) (int, error) {
-	//read first element to see how many descriptors we need:
-	pkt.Reset()
-
-	err := dev.ReceiveQueue.GetDescriptorInbuffers(uint16(chains[0].DescriptorIndex), &pkt.ChainRefs)
-	if err != nil {
-		return 0, fmt.Errorf("get descriptor chain: %w", err)
-	}
-	if len(pkt.ChainRefs) == 0 {
-		return 1, nil
-	}
-
-	// The specification requires that the first descriptor chain starts
-	// with a virtio-net header. It is not clear, whether it is also
-	// required to be fully contained in the first buffer of that
-	// descriptor chain, but it is reasonable to assume that this is
-	// always the case.
-	// The decode method already does the buffer length check.
-	if err = pkt.Header.Decode(pkt.ChainRefs[0][0:]); err != nil {
-		// The device misbehaved. There is no way we can gracefully
-		// recover from this, because we don't know how many of the
-		// following descriptor chains belong to this packet.
-		return 0, fmt.Errorf("decode vnethdr: %w", err)
-	}
-
-	//we have the header now: what do we need to do?
-	if int(pkt.Header.NumBuffers) > len(chains) {
-		return 0, fmt.Errorf("number of buffers is greater than number of chains %d", len(chains))
-	}
-	if int(pkt.Header.NumBuffers) != 1 {
-		return 0, fmt.Errorf("too smol-brain to handle more than one chain right now: %d chains", len(chains))
-	}
-	if chains[0].Length > 16000 {
-		//todo!
-		return 1, fmt.Errorf("too big packet length: %d", chains[0].Length)
-	}
-
-	//shift the buffer out of out:
-	pkt.Payload = pkt.ChainRefs[0][virtio.NetHdrSize:chains[0].Length]
-	pkt.Chains = append(pkt.Chains, uint16(chains[0].DescriptorIndex))
-	return 1, nil
-
-	//cursor := n - virtio.NetHdrSize
-	//
-	//if uint32(n) >= chains[0].Length && pkt.Header.NumBuffers == 1 {
-	//	pkt.Payload = pkt.Payload[:chains[0].Length-virtio.NetHdrSize]
-	//	return 1, nil
-	//}
-	//
-	//i := 1
-	//// we used chain 0 already
-	//for i = 1; i < len(chains); i++ {
-	//	n, err = dev.ReceiveQueue.GetDescriptorChainContents(uint16(chains[i].DescriptorIndex), pkt.Payload[cursor:], int(chains[i].Length))
-	//	if err != nil {
-	//		// When this fails we may miss to free some descriptor chains. We
-	//		// could try to mitigate this by deferring the freeing somehow, but
-	//		// it's not worth the hassle. When this method fails, the queue will
-	//		// be in a broken state anyway.
-	//		return i, fmt.Errorf("get descriptor chain: %w", err)
-	//	}
-	//	cursor += n
-	//}
-	////todo this has to be wrong
-	//pkt.Payload = pkt.Payload[:cursor]
-	//return i, nil
-}
-
-func (dev *Device) ReceivePackets(out []*packet.VirtIOPacket) (int, error) {
-	//todo optimize?
-	var chains []virtqueue.UsedElement
-	var err error
-	//if len(dev.extraRx) == 0 {
-	chains, err = dev.ReceiveQueue.BlockAndGetHeadsCapped(context.TODO(), len(out))
-	if err != nil {
-		return 0, err
-	}
-	if len(chains) == 0 {
-		return 0, nil
-	}
-	//} else {
-	//	chains = dev.extraRx
-	//}
-
-	numPackets := 0
-	chainsIdx := 0
-	for numPackets = 0; chainsIdx < len(chains); numPackets++ {
-		if numPackets >= len(out) {
-			return numPackets, fmt.Errorf("dropping %d packets, no room", len(chains)-numPackets)
-		}
-		numChains, err := dev.processChains(out[numPackets], chains[chainsIdx:])
-		if err != nil {
-			return 0, err
-		}
-		chainsIdx += numChains
-	}
-
-	// Now that we have copied all buffers, we can recycle the used descriptor chains
-	//if err = dev.ReceiveQueue.OfferDescriptorChains(chains); err != nil {
-	//	return 0, err
-	//}
-
-	return numPackets, nil
-}
--- a/overlay/vhostnet/doc.go
+++ b/overlay/vhostnet/doc.go
@@ -1,3 +0,0 @@
-// Package vhostnet implements methods to initialize vhost networking devices
-// within the kernel-level virtio implementation and communicate with them.
-package vhostnet
--- a/overlay/vhostnet/ioctl.go
+++ b/overlay/vhostnet/ioctl.go
@@ -1,31 +0,0 @@
-package vhostnet
-
-import (
-	"fmt"
-	"unsafe"
-
-	"github.com/slackhq/nebula/overlay/vhost"
-)
-
-const (
-	// vhostNetIoctlSetBackend can be used to attach a virtqueue to a RAW socket
-	// or TAP device.
-	//
-	// Request payload: [vhost.QueueFile]
-	// Kernel name: VHOST_NET_SET_BACKEND
-	vhostNetIoctlSetBackend = 0x4008af30
-)
-
-// SetQueueBackend attaches a virtqueue of the vhost networking device
-// described by controlFD to the given backend file descriptor.
-// The backend file descriptor can either be a RAW socket or a TAP device. When
-// it is -1, the queue will be detached.
-func SetQueueBackend(controlFD int, queueIndex uint32, backendFD int) error {
-	if err := vhost.IoctlPtr(controlFD, vhostNetIoctlSetBackend, unsafe.Pointer(&vhost.QueueFile{
-		QueueIndex: queueIndex,
-		FD:         int32(backendFD),
-	})); err != nil {
-		return fmt.Errorf("set queue backend file descriptor: %w", err)
-	}
-	return nil
-}
--- a/overlay/vhostnet/options.go
+++ b/overlay/vhostnet/options.go
@@ -1,69 +0,0 @@
-package vhostnet
-
-import (
-	"errors"
-
-	"github.com/slackhq/nebula/overlay/virtqueue"
-)
-
-type optionValues struct {
-	queueSize int
-	backendFD int
-}
-
-func (o *optionValues) apply(options []Option) {
-	for _, option := range options {
-		option(o)
-	}
-}
-
-func (o *optionValues) validate() error {
-	if o.queueSize == -1 {
-		return errors.New("queue size is required")
-	}
-	if err := virtqueue.CheckQueueSize(o.queueSize); err != nil {
-		return err
-	}
-	if o.backendFD == -1 {
-		return errors.New("backend file descriptor is required")
-	}
-	return nil
-}
-
-var optionDefaults = optionValues{
-	// Required.
-	queueSize: -1,
-	// Required.
-	backendFD: -1,
-}
-
-// Option can be passed to [NewDevice] to influence device creation.
-type Option func(*optionValues)
-
-// WithQueueSize returns an [Option] that sets the size of the TX and RX queues
-// that are to be created for the device. It specifies the number of
-// entries/buffers each queue can hold. This also affects the memory
-// consumption.
-// This is required and must be an integer from 1 to 32768 that is also a power
-// of 2.
-func WithQueueSize(queueSize int) Option {
-	return func(o *optionValues) { o.queueSize = queueSize }
-}
-
-// WithBackendFD returns an [Option] that sets the file descriptor of the
-// backend that will be used for the queues of the device. The device will write
-// and read packets to/from that backend. The file descriptor can either be of a
-// RAW socket or TUN/TAP device.
-// Either this or [WithBackendDevice] is required.
-func WithBackendFD(backendFD int) Option {
-	return func(o *optionValues) { o.backendFD = backendFD }
-}
-
-//// WithBackendDevice returns an [Option] that sets the given TAP device as the
-//// backend that will be used for the queues of the device. The device will
-//// write and read packets to/from that backend. The TAP device should have been
-//// created with the [tuntap.WithVirtioNetHdr] option enabled.
-//// Either this or [WithBackendFD] is required.
-//func WithBackendDevice(dev *tuntap.Device) Option {
-//	return func(o *optionValues) { o.backendFD = int(dev.File().Fd()) }
-//}
--- a/overlay/virtqueue/README.md
+++ b/overlay/virtqueue/README.md
@@ -1,23 +0,0 @@
-Significant portions of this code are derived from https://pkg.go.dev/github.com/hetznercloud/virtio-go
-
-MIT License
-
-Copyright (c) 2025 Hetzner Cloud GmbH
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
--- a/overlay/virtqueue/available_ring.go
+++ b/overlay/virtqueue/available_ring.go
@@ -1,140 +0,0 @@
-package virtqueue
-
-import (
-	"fmt"
-	"unsafe"
-)
-
-// availableRingFlag is a flag that describes an [AvailableRing].
-type availableRingFlag uint16
-
-const (
-	// availableRingFlagNoInterrupt is used by the guest to advise the host to
-	// not interrupt it when consuming a buffer. It's unreliable, so it's simply
-	// an optimization.
-	availableRingFlagNoInterrupt availableRingFlag = 1 << iota
-)
-
-// availableRingSize is the number of bytes needed to store an [AvailableRing]
-// with the given queue size in memory.
-func availableRingSize(queueSize int) int {
-	return 6 + 2*queueSize
-}
-
-// availableRingAlignment is the minimum alignment of an [AvailableRing]
-// in memory, as required by the virtio spec.
-const availableRingAlignment = 2
-
-// AvailableRing is used by the driver to offer descriptor chains to the device.
-// Each ring entry refers to the head of a descriptor chain. It is only written
-// to by the driver and read by the device.
-//
-// Because the size of the ring depends on the queue size, we cannot define a
-// Go struct with a static size that maps to the memory of the ring. Instead,
-// this struct only contains pointers to the corresponding memory areas.
-type AvailableRing struct {
-	initialized bool
-
-	// flags that describe this ring.
-	flags *availableRingFlag
-	// ringIndex indicates where the driver would put the next entry into the
-	// ring (modulo the queue size).
-	ringIndex *uint16
-	// ring references buffers using the index of the head of the descriptor
-	// chain in the [DescriptorTable]. It wraps around at queue size.
-	ring []uint16
-	// usedEvent is not used by this implementation, but we reserve it anyway to
-	// avoid issues in case a device may try to access it, contrary to the
-	// virtio specification.
-	usedEvent *uint16
-}
-
-// newAvailableRing creates an available ring that uses the given underlying
-// memory. The length of the memory slice must match the size needed for the
-// ring (see [availableRingSize]) for the given queue size.
-func newAvailableRing(queueSize int, mem []byte) *AvailableRing {
-	ringSize := availableRingSize(queueSize)
-	if len(mem) != ringSize {
-		panic(fmt.Sprintf("memory size (%v) does not match required size "+
-			"for available ring: %v", len(mem), ringSize))
-	}
-
-	return &AvailableRing{
-		initialized: true,
-		flags:       (*availableRingFlag)(unsafe.Pointer(&mem[0])),
-		ringIndex:   (*uint16)(unsafe.Pointer(&mem[2])),
-		ring:        unsafe.Slice((*uint16)(unsafe.Pointer(&mem[4])), queueSize),
-		usedEvent:   (*uint16)(unsafe.Pointer(&mem[ringSize-2])),
-	}
-}
-
-// Address returns the pointer to the beginning of the ring in memory.
-// Do not modify the memory directly to not interfere with this implementation.
-func (r *AvailableRing) Address() uintptr {
-	if !r.initialized {
-		panic("available ring is not initialized")
-	}
-	return uintptr(unsafe.Pointer(r.flags))
-}
-
-// offer adds the given descriptor chain heads to the available ring and
-// advances the ring index accordingly to make the device process the new
-// descriptor chains.
-func (r *AvailableRing) offerElements(chains []UsedElement) {
-	//always called under lock
-	//r.mu.Lock()
-	//defer r.mu.Unlock()
-
-	// Add descriptor chain heads to the ring.
-	for offset, x := range chains {
-		// The 16-bit ring index may overflow. This is expected and is not an
-		// issue because the size of the ring array (which equals the queue
-		// size) is always a power of 2 and smaller than the highest possible
-		// 16-bit value.
-		insertIndex := int(*r.ringIndex+uint16(offset)) % len(r.ring)
-		r.ring[insertIndex] = x.GetHead()
-	}
-
-	// Increase the ring index by the number of descriptor chains added to the
-	// ring.
-	*r.ringIndex += uint16(len(chains))
-}
-
-func (r *AvailableRing) offer(chains []uint16) {
-	//always called under lock
-	//r.mu.Lock()
-	//defer r.mu.Unlock()
-
-	// Add descriptor chain heads to the ring.
-	for offset, x := range chains {
-		// The 16-bit ring index may overflow. This is expected and is not an
-		// issue because the size of the ring array (which equals the queue
-		// size) is always a power of 2 and smaller than the highest possible
-		// 16-bit value.
-		insertIndex := int(*r.ringIndex+uint16(offset)) % len(r.ring)
-		r.ring[insertIndex] = x
-	}
-
-	// Increase the ring index by the number of descriptor chains added to the
-	// ring.
-	*r.ringIndex += uint16(len(chains))
-}
-
-func (r *AvailableRing) offerSingle(x uint16) {
-	//always called under lock
-	//r.mu.Lock()
-	//defer r.mu.Unlock()
-
-	offset := 0
-	// Add descriptor chain heads to the ring.
-
-	// The 16-bit ring index may overflow. This is expected and is not an
-	// issue because the size of the ring array (which equals the queue
-	// size) is always a power of 2 and smaller than the highest possible
-	// 16-bit value.
-	insertIndex := int(*r.ringIndex+uint16(offset)) % len(r.ring)
-	r.ring[insertIndex] = x
-
-	// Increase the ring index by the number of descriptor chains added to the ring.
-	*r.ringIndex += 1
-}
--- a/overlay/virtqueue/available_ring_internal_test.go
+++ b/overlay/virtqueue/available_ring_internal_test.go
@@ -1,71 +0,0 @@
-package virtqueue
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestAvailableRing_MemoryLayout(t *testing.T) {
-	const queueSize = 2
-
-	memory := make([]byte, availableRingSize(queueSize))
-	r := newAvailableRing(queueSize, memory)
-
-	*r.flags = 0x01ff
-	*r.ringIndex = 1
-	r.ring[0] = 0x1234
-	r.ring[1] = 0x5678
-
-	assert.Equal(t, []byte{
-		0xff, 0x01,
-		0x01, 0x00,
-		0x34, 0x12,
-		0x78, 0x56,
-		0x00, 0x00,
-	}, memory)
-}
-
-func TestAvailableRing_Offer(t *testing.T) {
-	const queueSize = 8
-
-	chainHeads := []uint16{42, 33, 69}
-
-	tests := []struct {
-		name              string
-		startRingIndex    uint16
-		expectedRingIndex uint16
-		expectedRing      []uint16
-	}{
-		{
-			name:              "no overflow",
-			startRingIndex:    0,
-			expectedRingIndex: 3,
-			expectedRing:      []uint16{42, 33, 69, 0, 0, 0, 0, 0},
-		},
-		{
-			name:              "ring overflow",
-			startRingIndex:    6,
-			expectedRingIndex: 9,
-			expectedRing:      []uint16{69, 0, 0, 0, 0, 0, 42, 33},
-		},
-		{
-			name:              "index overflow",
-			startRingIndex:    65535,
-			expectedRingIndex: 2,
-			expectedRing:      []uint16{33, 69, 0, 0, 0, 0, 0, 42},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			memory := make([]byte, availableRingSize(queueSize))
-			r := newAvailableRing(queueSize, memory)
-			*r.ringIndex = tt.startRingIndex
-
-			r.offer(chainHeads)
-
-			assert.Equal(t, tt.expectedRingIndex, *r.ringIndex)
-			assert.Equal(t, tt.expectedRing, r.ring)
-		})
-	}
-}
--- a/overlay/virtqueue/descriptor.go
+++ b/overlay/virtqueue/descriptor.go
@@ -1,43 +0,0 @@
-package virtqueue
-
-// descriptorFlag is a flag that describes a [Descriptor].
-type descriptorFlag uint16
-
-const (
-	// descriptorFlagHasNext marks a descriptor chain as continuing via the next
-	// field.
-	descriptorFlagHasNext descriptorFlag = 1 << iota
-	// descriptorFlagWritable marks a buffer as device write-only (otherwise
-	// device read-only).
-	descriptorFlagWritable
-	// descriptorFlagIndirect means the buffer contains a list of buffer
-	// descriptors to provide an additional layer of indirection.
-	// Only allowed when the [virtio.FeatureIndirectDescriptors] feature was
-	// negotiated.
-	descriptorFlagIndirect
-)
-
-// descriptorSize is the number of bytes needed to store a [Descriptor] in
-// memory.
-const descriptorSize = 16
-
-// Descriptor describes (a part of) a buffer which is either read-only for the
-// device or write-only for the device (depending on [descriptorFlagWritable]).
-// Multiple descriptors can be chained to produce a "descriptor chain" that can
-// contain both device-readable and device-writable buffers. Device-readable
-// descriptors always come first in a chain. A single, large buffer may be
-// split up by chaining multiple similar descriptors that reference different
-// memory pages. This is required, because buffers may exceed a single page size
-// and the memory accessed by the device is expected to be continuous.
-type Descriptor struct {
-	// address is the address to the continuous memory holding the data for this
-	// descriptor.
-	address uintptr
-	// length is the amount of bytes stored at address.
-	length uint32
-	// flags that describe this descriptor.
-	flags descriptorFlag
-	// next contains the index of the next descriptor continuing this descriptor
-	// chain when the [descriptorFlagHasNext] flag is set.
-	next uint16
-}
--- a/overlay/virtqueue/descriptor_internal_test.go
+++ b/overlay/virtqueue/descriptor_internal_test.go
@@ -1,12 +0,0 @@
-package virtqueue
-
-import (
-	"testing"
-	"unsafe"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestDescriptor_Size(t *testing.T) {
-	assert.EqualValues(t, descriptorSize, unsafe.Sizeof(Descriptor{}))
-}
--- a/overlay/virtqueue/descriptor_table.go
+++ b/overlay/virtqueue/descriptor_table.go
@@ -1,641 +0,0 @@
-package virtqueue
-
-import (
-	"errors"
-	"fmt"
-	"math"
-	"unsafe"
-
-	"golang.org/x/sys/unix"
-)
-
-var (
-	// ErrDescriptorChainEmpty is returned when a descriptor chain would contain
-	// no buffers, which is not allowed.
-	ErrDescriptorChainEmpty = errors.New("empty descriptor chains are not allowed")
-
-	// ErrNotEnoughFreeDescriptors is returned when the free descriptors are
-	// exhausted, meaning that the queue is full.
-	ErrNotEnoughFreeDescriptors = errors.New("not enough free descriptors, queue is full")
-
-	// ErrInvalidDescriptorChain is returned when a descriptor chain is not
-	// valid for a given operation.
-	ErrInvalidDescriptorChain = errors.New("invalid descriptor chain")
-)
-
-// noFreeHead is used to mark when all descriptors are in use and we have no
-// free chain. This value is impossible to occur as an index naturally, because
-// it exceeds the maximum queue size.
-const noFreeHead = uint16(math.MaxUint16)
-
-// descriptorTableSize is the number of bytes needed to store a
-// [DescriptorTable] with the given queue size in memory.
-func descriptorTableSize(queueSize int) int {
-	return descriptorSize * queueSize
-}
-
-// descriptorTableAlignment is the minimum alignment of a [DescriptorTable]
-// in memory, as required by the virtio spec.
-const descriptorTableAlignment = 16
-
-// DescriptorTable is a table that holds [Descriptor]s, addressed via their
-// index in the slice.
-type DescriptorTable struct {
-	descriptors []Descriptor
-
-	// freeHeadIndex is the index of the head of the descriptor chain which
-	// contains all currently unused descriptors. When all descriptors are in
-	// use, this has the special value of noFreeHead.
-	freeHeadIndex uint16
-	// freeNum tracks the number of descriptors which are currently not in use.
-	freeNum uint16
-
-	bufferBase uintptr
-	bufferSize int
-	itemSize   int
-}
-
-// newDescriptorTable creates a descriptor table that uses the given underlying
-// memory. The Length of the memory slice must match the size needed for the
-// descriptor table (see [descriptorTableSize]) for the given queue size.
-//
-// Before this descriptor table can be used, [initialize] must be called.
-func newDescriptorTable(queueSize int, mem []byte, itemSize int) *DescriptorTable {
-	dtSize := descriptorTableSize(queueSize)
-	if len(mem) != dtSize {
-		panic(fmt.Sprintf("memory size (%v) does not match required size "+
-			"for descriptor table: %v", len(mem), dtSize))
-	}
-
-	return &DescriptorTable{
-		descriptors: unsafe.Slice((*Descriptor)(unsafe.Pointer(&mem[0])), queueSize),
-		// We have no free descriptors until they were initialized.
-		freeHeadIndex: noFreeHead,
-		freeNum:       0,
-		itemSize:      itemSize, //todo configurable? needs to be page-aligned
-	}
-}
-
-// Address returns the pointer to the beginning of the descriptor table in
-// memory. Do not modify the memory directly to not interfere with this
-// implementation.
-func (dt *DescriptorTable) Address() uintptr {
-	if dt.descriptors == nil {
-		panic("descriptor table is not initialized")
-	}
-	//should be same as dt.bufferBase
-	return uintptr(unsafe.Pointer(&dt.descriptors[0]))
-}
-
-func (dt *DescriptorTable) Size() uintptr {
-	if dt.descriptors == nil {
-		panic("descriptor table is not initialized")
-	}
-	return uintptr(dt.bufferSize)
-}
-
-// BufferAddresses returns a map of pointer->size for all allocations used by the table
-func (dt *DescriptorTable) BufferAddresses() map[uintptr]int {
-	if dt.descriptors == nil {
-		panic("descriptor table is not initialized")
-	}
-
-	return map[uintptr]int{dt.bufferBase: dt.bufferSize}
-}
-
-// initializeDescriptors allocates buffers with the size of a full memory page
-// for each descriptor in the table. While this may be a bit wasteful, it makes
-// dealing with descriptors way easier. Without this preallocation, we would
-// have to allocate and free memory on demand, increasing complexity.
-//
-// All descriptors will be marked as free and will form a free chain. The
-// addresses of all descriptors will be populated while their length remains
-// zero.
-func (dt *DescriptorTable) initializeDescriptors() error {
-	numDescriptors := len(dt.descriptors)
-
-	// Allocate ONE large region for all buffers
-	totalSize := dt.itemSize * numDescriptors
-	basePtr, err := unix.MmapPtr(-1, 0, nil, uintptr(totalSize),
-		unix.PROT_READ|unix.PROT_WRITE,
-		unix.MAP_PRIVATE|unix.MAP_ANONYMOUS)
-	if err != nil {
-		return fmt.Errorf("allocate buffer memory for descriptors: %w", err)
-	}
-
-	// Store the base for cleanup later
-	dt.bufferBase = uintptr(basePtr)
-	dt.bufferSize = totalSize
-
-	for i := range dt.descriptors {
-		dt.descriptors[i] = Descriptor{
-			address: dt.bufferBase + uintptr(i*dt.itemSize),
-			length:  0,
-			// All descriptors should form a free chain that loops around.
-			flags: descriptorFlagHasNext,
-			next:  uint16((i + 1) % len(dt.descriptors)),
-		}
-	}
-
-	// All descriptors are free to use now.
-	dt.freeHeadIndex = 0
-	dt.freeNum = uint16(len(dt.descriptors))
-
-	return nil
-}
-
-// releaseBuffers releases all allocated buffers for this descriptor table.
-// The implementation will try to release as many buffers as possible and
-// collect potential errors before returning them.
-// The descriptor table should no longer be used after calling this.
-func (dt *DescriptorTable) releaseBuffers() error {
-	for i := range dt.descriptors {
-		descriptor := &dt.descriptors[i]
-		descriptor.address = 0
-	}
-
-	// As a safety measure, make sure no descriptors can be used anymore.
-	dt.freeHeadIndex = noFreeHead
-	dt.freeNum = 0
-
-	if dt.bufferBase != 0 {
-		// The pointer points to memory not managed by Go, so this conversion
-		// is safe. See https://github.com/golang/go/issues/58625
-		dt.bufferBase = 0
-		//goland:noinspection GoVetUnsafePointer
-		err := unix.MunmapPtr(unsafe.Pointer(dt.bufferBase), uintptr(dt.bufferSize))
-		if err != nil {
-			return fmt.Errorf("release buffer memory: %w", err)
-		}
-	}
-
-	return nil
-}
-
-// createDescriptorChain creates a new descriptor chain within the descriptor
-// table which contains a number of device-readable buffers (out buffers) and
-// device-writable buffers (in buffers).
-//
-// All buffers in the outBuffers slice will be concatenated by chaining
-// descriptors, one for each buffer in the slice. The size of the single buffers
-// must not exceed the size of a memory page (see [os.Getpagesize]).
-// When numInBuffers is greater than zero, the given number of device-writable
-// descriptors will be appended to the end of the chain, each referencing a
-// whole memory page.
-//
-// The index of the head of the new descriptor chain will be returned. Callers
-// should make sure to free the descriptor chain using [freeDescriptorChain]
-// after it was used by the device.
-//
-// When there are not enough free descriptors to hold the given number of
-// buffers, an [ErrNotEnoughFreeDescriptors] will be returned. In this case, the
-// caller should try again after some descriptor chains were used by the device
-// and returned back into the free chain.
-func (dt *DescriptorTable) createDescriptorChain(outBuffers [][]byte, numInBuffers int) (uint16, error) {
-	// Calculate the number of descriptors needed to build the chain.
-	numDesc := uint16(len(outBuffers) + numInBuffers)
-
-	// Descriptor chains must always contain at least one descriptor.
-	if numDesc < 1 {
-		return 0, ErrDescriptorChainEmpty
-	}
-
-	// Do we still have enough free descriptors?
-	if numDesc > dt.freeNum {
-		return 0, ErrNotEnoughFreeDescriptors
-	}
-
-	// Above validation ensured that there is at least one free descriptor, so
-	// the free descriptor chain head should be valid.
-	if dt.freeHeadIndex == noFreeHead {
-		panic("free descriptor chain head is unset but there should be free descriptors")
-	}
-
-	// To avoid having to iterate over the whole table to find the descriptor
-	// pointing to the head just to replace the free head, we instead always
-	// create descriptor chains from the descriptors coming after the head.
-	// This way we only have to touch the head as a last resort, when all other
-	// descriptors are already used.
-	head := dt.descriptors[dt.freeHeadIndex].next
-	next := head
-	tail := head
-	for i, buffer := range outBuffers {
-		desc := &dt.descriptors[next]
-		checkUnusedDescriptorLength(next, desc)
-
-		if len(buffer) > dt.itemSize {
-			// The caller should already prevent that from happening.
-			panic(fmt.Sprintf("out buffer %d has size %d which exceeds desc length %d", i, len(buffer), dt.itemSize))
-		}
-
-		// Copy the buffer to the memory referenced by the descriptor.
-		// The descriptor address points to memory not managed by Go, so this
-		// conversion is safe. See https://github.com/golang/go/issues/58625
-		//goland:noinspection GoVetUnsafePointer
-		copy(unsafe.Slice((*byte)(unsafe.Pointer(desc.address)), dt.itemSize), buffer)
-		desc.length = uint32(len(buffer))
-
-		// Clear the flags in case there were any others set.
-		desc.flags = descriptorFlagHasNext
-
-		tail = next
-		next = desc.next
-	}
-	for range numInBuffers {
-		desc := &dt.descriptors[next]
-		checkUnusedDescriptorLength(next, desc)
-
-		// Give the device the maximum available number of bytes to write into.
-		desc.length = uint32(dt.itemSize)
-
-		// Mark the descriptor as device-writable.
-		desc.flags = descriptorFlagHasNext | descriptorFlagWritable
-
-		tail = next
-		next = desc.next
-	}
-
-	// The last descriptor should end the chain.
-	tailDesc := &dt.descriptors[tail]
-	tailDesc.flags &= ^descriptorFlagHasNext
-	tailDesc.next = 0 // Not necessary to clear this, it's just for looks.
-
-	dt.freeNum -= numDesc
-
-	if dt.freeNum == 0 {
-		// The last descriptor in the chain should be the free chain head
-		// itself.
-		if tail != dt.freeHeadIndex {
-			panic("descriptor chain takes up all free descriptors but does not end with the free chain head")
-		}
-
-		// When this new chain takes up all remaining descriptors, we no longer
-		// have a free chain.
-		dt.freeHeadIndex = noFreeHead
-	} else {
-		// We took some descriptors out of the free chain, so make sure to close
-		// the circle again.
-		dt.descriptors[dt.freeHeadIndex].next = next
-	}
-
-	return head, nil
-}
-
-func (dt *DescriptorTable) CreateDescriptorForOutputs() (uint16, error) {
-	//todo just fill the damn table
-	// Do we still have enough free descriptors?
-
-	if 1 > dt.freeNum {
-		return 0, ErrNotEnoughFreeDescriptors
-	}
-
-	// Above validation ensured that there is at least one free descriptor, so
-	// the free descriptor chain head should be valid.
-	if dt.freeHeadIndex == noFreeHead {
-		panic("free descriptor chain head is unset but there should be free descriptors")
-	}
-
-	// To avoid having to iterate over the whole table to find the descriptor
-	// pointing to the head just to replace the free head, we instead always
-	// create descriptor chains from the descriptors coming after the head.
-	// This way we only have to touch the head as a last resort, when all other
-	// descriptors are already used.
-	head := dt.descriptors[dt.freeHeadIndex].next
-	desc := &dt.descriptors[head]
-	next := desc.next
-
-	checkUnusedDescriptorLength(head, desc)
-
-	// Give the device the maximum available number of bytes to write into.
-	desc.length = uint32(dt.itemSize)
-	desc.flags = 0 // descriptorFlagWritable
-	desc.next = 0  // Not necessary to clear this, it's just for looks.
-
-	dt.freeNum -= 1
-
-	if dt.freeNum == 0 {
-		// The last descriptor in the chain should be the free chain head
-		// itself.
-		if next != dt.freeHeadIndex {
-			panic("descriptor chain takes up all free descriptors but does not end with the free chain head")
-		}
-
-		// When this new chain takes up all remaining descriptors, we no longer
-		// have a free chain.
-		dt.freeHeadIndex = noFreeHead
-	} else {
-		// We took some descriptors out of the free chain, so make sure to close
-		// the circle again.
-		dt.descriptors[dt.freeHeadIndex].next = next
-	}
-
-	return head, nil
-}
-
-func (dt *DescriptorTable) createDescriptorForInputs() (uint16, error) {
-	// Do we still have enough free descriptors?
-	if 1 > dt.freeNum {
-		return 0, ErrNotEnoughFreeDescriptors
-	}
-
-	// Above validation ensured that there is at least one free descriptor, so
-	// the free descriptor chain head should be valid.
-	if dt.freeHeadIndex == noFreeHead {
-		panic("free descriptor chain head is unset but there should be free descriptors")
-	}
-
-	// To avoid having to iterate over the whole table to find the descriptor
-	// pointing to the head just to replace the free head, we instead always
-	// create descriptor chains from the descriptors coming after the head.
-	// This way we only have to touch the head as a last resort, when all other
-	// descriptors are already used.
-	head := dt.descriptors[dt.freeHeadIndex].next
-	desc := &dt.descriptors[head]
-	next := desc.next
-
-	checkUnusedDescriptorLength(head, desc)
-
-	// Give the device the maximum available number of bytes to write into.
-	desc.length = uint32(dt.itemSize)
-	desc.flags = descriptorFlagWritable
-	desc.next = 0 // Not necessary to clear this, it's just for looks.
-
-	dt.freeNum -= 1
-
-	if dt.freeNum == 0 {
-		// The last descriptor in the chain should be the free chain head
-		// itself.
-		if next != dt.freeHeadIndex {
-			panic("descriptor chain takes up all free descriptors but does not end with the free chain head")
-		}
-
-		// When this new chain takes up all remaining descriptors, we no longer
-		// have a free chain.
-		dt.freeHeadIndex = noFreeHead
-	} else {
-		// We took some descriptors out of the free chain, so make sure to close
-		// the circle again.
-		dt.descriptors[dt.freeHeadIndex].next = next
-	}
-
-	return head, nil
-}
-
-// TODO: Implement a zero-copy variant of createDescriptorChain?
-
-// getDescriptorChain returns the device-readable buffers (out buffers) and
-// device-writable buffers (in buffers) of the descriptor chain that starts with
-// the given head index. The descriptor chain must have been created using
-// [createDescriptorChain] and must not have been freed yet (meaning that the
-// head index must not be contained in the free chain).
-//
-// Be careful to only access the returned buffer slices when the device has not
-// yet or is no longer using them. They must not be accessed after
-// [freeDescriptorChain] has been called.
-func (dt *DescriptorTable) getDescriptorChain(head uint16) (outBuffers, inBuffers [][]byte, err error) {
-	if int(head) > len(dt.descriptors) {
-		return nil, nil, fmt.Errorf("%w: index out of range", ErrInvalidDescriptorChain)
-	}
-
-	// Iterate over the chain. The iteration is limited to the queue size to
-	// avoid ending up in an endless loop when things go very wrong.
-	next := head
-	for range len(dt.descriptors) {
-		if next == dt.freeHeadIndex {
-			return nil, nil, fmt.Errorf("%w: must not be part of the free chain", ErrInvalidDescriptorChain)
-		}
-
-		desc := &dt.descriptors[next]
-
-		// The descriptor address points to memory not managed by Go, so this
-		// conversion is safe. See https://github.com/golang/go/issues/58625
-		//goland:noinspection GoVetUnsafePointer
-		bs := unsafe.Slice((*byte)(unsafe.Pointer(desc.address)), desc.length)
-
-		if desc.flags&descriptorFlagWritable == 0 {
-			outBuffers = append(outBuffers, bs)
-		} else {
-			inBuffers = append(inBuffers, bs)
-		}
-
-		// Is this the tail of the chain?
-		if desc.flags&descriptorFlagHasNext == 0 {
-			break
-		}
-
-		// Detect loops.
-		if desc.next == head {
-			return nil, nil, fmt.Errorf("%w: contains a loop", ErrInvalidDescriptorChain)
-		}
-
-		next = desc.next
-	}
-
-	return
-}
-
-func (dt *DescriptorTable) getDescriptorItem(head uint16) ([]byte, error) {
-	if int(head) > len(dt.descriptors) {
-		return nil, fmt.Errorf("%w: index out of range", ErrInvalidDescriptorChain)
-	}
-
-	desc := &dt.descriptors[head] //todo this is a pretty nasty hack with no checks
-
-	// The descriptor address points to memory not managed by Go, so this
-	// conversion is safe. See https://github.com/golang/go/issues/58625
-	//goland:noinspection GoVetUnsafePointer
-	bs := unsafe.Slice((*byte)(unsafe.Pointer(desc.address)), desc.length)
-	return bs, nil
-}
-
-func (dt *DescriptorTable) getDescriptorInbuffers(head uint16, inBuffers *[][]byte) error {
-	if int(head) > len(dt.descriptors) {
-		return fmt.Errorf("%w: index out of range", ErrInvalidDescriptorChain)
-	}
-
-	// Iterate over the chain. The iteration is limited to the queue size to
-	// avoid ending up in an endless loop when things go very wrong.
-	next := head
-	for range len(dt.descriptors) {
-		if next == dt.freeHeadIndex {
-			return fmt.Errorf("%w: must not be part of the free chain", ErrInvalidDescriptorChain)
-		}
-
-		desc := &dt.descriptors[next]
-
-		// The descriptor address points to memory not managed by Go, so this
-		// conversion is safe. See https://github.com/golang/go/issues/58625
-		//goland:noinspection GoVetUnsafePointer
-		bs := unsafe.Slice((*byte)(unsafe.Pointer(desc.address)), desc.length)
-
-		if desc.flags&descriptorFlagWritable == 0 {
-			return fmt.Errorf("there should not be an outbuffer in %d", head)
-		} else {
-			*inBuffers = append(*inBuffers, bs)
-		}
-
-		// Is this the tail of the chain?
-		if desc.flags&descriptorFlagHasNext == 0 {
-			break
-		}
-
-		// Detect loops.
-		if desc.next == head {
-			return fmt.Errorf("%w: contains a loop", ErrInvalidDescriptorChain)
-		}
-
-		next = desc.next
-	}
-
-	return nil
-}
-
-func (dt *DescriptorTable) getDescriptorChainContents(head uint16, out []byte, maxLen int) (int, error) {
-	if int(head) > len(dt.descriptors) {
-		return 0, fmt.Errorf("%w: index out of range", ErrInvalidDescriptorChain)
-	}
-
-	// Iterate over the chain. The iteration is limited to the queue size to
-	// avoid ending up in an endless loop when things go very wrong.
-
-	length := 0
-	//find length
-	next := head
-	for range len(dt.descriptors) {
-		if next == dt.freeHeadIndex {
-			return 0, fmt.Errorf("%w: must not be part of the free chain", ErrInvalidDescriptorChain)
-		}
-
-		desc := &dt.descriptors[next]
-
-		if desc.flags&descriptorFlagWritable == 0 {
-			return 0, fmt.Errorf("receive queue contains device-readable buffer")
-		}
-		length += int(desc.length)
-
-		// Is this the tail of the chain?
-		if desc.flags&descriptorFlagHasNext == 0 {
-			break
-		}
-
-		// Detect loops.
-		if desc.next == head {
-			return 0, fmt.Errorf("%w: contains a loop", ErrInvalidDescriptorChain)
-		}
-
-		next = desc.next
-	}
-	if maxLen > 0 {
-		//todo length = min(maxLen, length)
-	}
-	//set out to length:
-	out = out[:length]
-
-	//now do the copying
-	copied := 0
-	for range len(dt.descriptors) {
-		desc := &dt.descriptors[next]
-
-		// The descriptor address points to memory not managed by Go, so this
-		// conversion is safe. See https://github.com/golang/go/issues/58625
-		//goland:noinspection GoVetUnsafePointer
-		bs := unsafe.Slice((*byte)(unsafe.Pointer(desc.address)), min(uint32(length-copied), desc.length))
-		copied += copy(out[copied:], bs)
-
-		// Is this the tail of the chain?
-		if desc.flags&descriptorFlagHasNext == 0 {
-			break
-		}
-
-		// we did this already, no need to detect loops.
-		next = desc.next
-	}
-	if copied != length {
-		panic(fmt.Sprintf("expected to copy %d bytes but only copied %d bytes", length, copied))
-	}
-
-	return length, nil
-}
-
-// freeDescriptorChain can be used to free a descriptor chain when it is no
-// longer in use. The descriptor chain that starts with the given index will be
-// put back into the free chain, so the descriptors can be used for later calls
-// of [createDescriptorChain].
-// The descriptor chain must have been created using [createDescriptorChain] and
-// must not have been freed yet (meaning that the head index must not be
-// contained in the free chain).
-func (dt *DescriptorTable) freeDescriptorChain(head uint16) error {
-	if int(head) > len(dt.descriptors) {
-		return fmt.Errorf("%w: index out of range", ErrInvalidDescriptorChain)
-	}
-
-	// Iterate over the chain. The iteration is limited to the queue size to
-	// avoid ending up in an endless loop when things go very wrong.
-	next := head
-	var tailDesc *Descriptor
-	var chainLen uint16
-	for range len(dt.descriptors) {
-		if next == dt.freeHeadIndex {
-			return fmt.Errorf("%w: must not be part of the free chain", ErrInvalidDescriptorChain)
-		}
-
-		desc := &dt.descriptors[next]
-		chainLen++
-
-		// Set the length of all unused descriptors back to zero.
-		desc.length = 0
-
-		// Unset all flags except the next flag.
-		desc.flags &= descriptorFlagHasNext
-
-		// Is this the tail of the chain?
-		if desc.flags&descriptorFlagHasNext == 0 {
-			tailDesc = desc
-			break
-		}
-
-		// Detect loops.
-		if desc.next == head {
-			return fmt.Errorf("%w: contains a loop", ErrInvalidDescriptorChain)
-		}
-
-		next = desc.next
-	}
-	if tailDesc == nil {
-		// A descriptor chain longer than the queue size but without loops
-		// should be impossible.
-		panic(fmt.Sprintf("could not find a tail for descriptor chain starting at %d", head))
-	}
-
-	// The tail descriptor does not have the next flag set, but when it comes
-	// back into the free chain, it should have.
-	tailDesc.flags = descriptorFlagHasNext
-
-	if dt.freeHeadIndex == noFreeHead {
-		// The whole free chain was used up, so we turn this returned descriptor
-		// chain into the new free chain by completing the circle and using its
-		// head.
-		tailDesc.next = head
-		dt.freeHeadIndex = head
-	} else {
-		// Attach the returned chain at the beginning of the free chain but
-		// right after the free chain head.
-		freeHeadDesc := &dt.descriptors[dt.freeHeadIndex]
-		tailDesc.next = freeHeadDesc.next
-		freeHeadDesc.next = head
-	}
-
-	dt.freeNum += chainLen
-
-	return nil
-}
-
-// checkUnusedDescriptorLength asserts that the length of an unused descriptor
-// is zero, as it should be.
-// This is not a requirement by the virtio spec but rather a thing we do to
-// notice when our algorithm goes sideways.
-func checkUnusedDescriptorLength(index uint16, desc *Descriptor) {
-	if desc.length != 0 {
-		panic(fmt.Sprintf("descriptor %d should be unused but has a non-zero length", index))
-	}
-}
--- a/overlay/virtqueue/descriptor_table_internal_test.go
+++ b/overlay/virtqueue/descriptor_table_internal_test.go
@@ -1,407 +0,0 @@
-package virtqueue
-
-import (
-	"os"
-	"testing"
-	"unsafe"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestDescriptorTable_InitializeDescriptors(t *testing.T) {
-	const queueSize = 32
-
-	dt := DescriptorTable{
-		descriptors: make([]Descriptor, queueSize),
-	}
-
-	assert.NoError(t, dt.initializeDescriptors())
-	t.Cleanup(func() {
-		assert.NoError(t, dt.releaseBuffers())
-	})
-
-	for i, descriptor := range dt.descriptors {
-		assert.NotZero(t, descriptor.address)
-		assert.Zero(t, descriptor.length)
-		assert.EqualValues(t, descriptorFlagHasNext, descriptor.flags)
-		assert.EqualValues(t, (i+1)%queueSize, descriptor.next)
-	}
-}
-
-func TestDescriptorTable_DescriptorChains(t *testing.T) {
-	// Use a very short queue size to not make this test overly verbose.
-	const queueSize = 8
-
-	pageSize := os.Getpagesize() * 2
-
-	// Initialize descriptor table.
-	dt := DescriptorTable{
-		descriptors: make([]Descriptor, queueSize),
-	}
-	assert.NoError(t, dt.initializeDescriptors())
-	t.Cleanup(func() {
-		assert.NoError(t, dt.releaseBuffers())
-	})
-
-	// Some utilities for easier checking if the descriptor table looks as
-	// expected.
-	type desc struct {
-		buffer []byte
-		flags  descriptorFlag
-		next   uint16
-	}
-	assertDescriptorTable := func(expected [queueSize]desc) {
-		for i := 0; i < queueSize; i++ {
-			actualDesc := &dt.descriptors[i]
-			expectedDesc := &expected[i]
-			assert.Equal(t, uint32(len(expectedDesc.buffer)), actualDesc.length)
-			if len(expectedDesc.buffer) > 0 {
-				//goland:noinspection GoVetUnsafePointer
-				assert.EqualValues(t,
-					unsafe.Slice((*byte)(unsafe.Pointer(actualDesc.address)), actualDesc.length),
-					expectedDesc.buffer)
-			}
-			assert.Equal(t, expectedDesc.flags, actualDesc.flags)
-			if expectedDesc.flags&descriptorFlagHasNext != 0 {
-				assert.Equal(t, expectedDesc.next, actualDesc.next)
-			}
-		}
-	}
-
-	// Initial state: All descriptors are in the free chain.
-	assert.Equal(t, uint16(0), dt.freeHeadIndex)
-	assert.Equal(t, uint16(8), dt.freeNum)
-	assertDescriptorTable([queueSize]desc{
-		{
-			// Free head.
-			flags: descriptorFlagHasNext,
-			next:  1,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  2,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  3,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  4,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  5,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  6,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  7,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  0,
-		},
-	})
-
-	// Create the first chain.
-	firstChain, err := dt.createDescriptorChain([][]byte{
-		makeTestBuffer(t, 26),
-		makeTestBuffer(t, 256),
-	}, 1)
-	assert.NoError(t, err)
-	assert.Equal(t, uint16(1), firstChain)
-
-	// Now there should be a new chain next to the free chain.
-	assert.Equal(t, uint16(0), dt.freeHeadIndex)
-	assert.Equal(t, uint16(5), dt.freeNum)
-	assertDescriptorTable([queueSize]desc{
-		{
-			// Free head.
-			flags: descriptorFlagHasNext,
-			next:  4,
-		},
-		{
-			// Head of first chain.
-			buffer: makeTestBuffer(t, 26),
-			flags:  descriptorFlagHasNext,
-			next:   2,
-		},
-		{
-			buffer: makeTestBuffer(t, 256),
-			flags:  descriptorFlagHasNext,
-			next:   3,
-		},
-		{
-			// Tail of first chain.
-			buffer: make([]byte, pageSize),
-			flags:  descriptorFlagWritable,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  5,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  6,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  7,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  0,
-		},
-	})
-
-	// Create a second chain with only a single in buffer.
-	secondChain, err := dt.createDescriptorChain(nil, 1)
-	assert.NoError(t, err)
-	assert.Equal(t, uint16(4), secondChain)
-
-	// Now there should be two chains next to the free chain.
-	assert.Equal(t, uint16(0), dt.freeHeadIndex)
-	assert.Equal(t, uint16(4), dt.freeNum)
-	assertDescriptorTable([queueSize]desc{
-		{
-			// Free head.
-			flags: descriptorFlagHasNext,
-			next:  5,
-		},
-		{
-			// Head of the first chain.
-			buffer: makeTestBuffer(t, 26),
-			flags:  descriptorFlagHasNext,
-			next:   2,
-		},
-		{
-			buffer: makeTestBuffer(t, 256),
-			flags:  descriptorFlagHasNext,
-			next:   3,
-		},
-		{
-			// Tail of the first chain.
-			buffer: make([]byte, pageSize),
-			flags:  descriptorFlagWritable,
-		},
-		{
-			// Head and tail of the second chain.
-			buffer: make([]byte, pageSize),
-			flags:  descriptorFlagWritable,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  6,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  7,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  0,
-		},
-	})
-
-	// Create a third chain taking up all remaining descriptors.
-	thirdChain, err := dt.createDescriptorChain([][]byte{
-		makeTestBuffer(t, 42),
-		makeTestBuffer(t, 96),
-		makeTestBuffer(t, 33),
-		makeTestBuffer(t, 222),
-	}, 0)
-	assert.NoError(t, err)
-	assert.Equal(t, uint16(5), thirdChain)
-
-	// Now there should be three chains and no free chain.
-	assert.Equal(t, noFreeHead, dt.freeHeadIndex)
-	assert.Equal(t, uint16(0), dt.freeNum)
-	assertDescriptorTable([queueSize]desc{
-		{
-			// Tail of the third chain.
-			buffer: makeTestBuffer(t, 222),
-		},
-		{
-			// Head of the first chain.
-			buffer: makeTestBuffer(t, 26),
-			flags:  descriptorFlagHasNext,
-			next:   2,
-		},
-		{
-			buffer: makeTestBuffer(t, 256),
-			flags:  descriptorFlagHasNext,
-			next:   3,
-		},
-		{
-			// Tail of the first chain.
-			buffer: make([]byte, pageSize),
-			flags:  descriptorFlagWritable,
-		},
-		{
-			// Head and tail of the second chain.
-			buffer: make([]byte, pageSize),
-			flags:  descriptorFlagWritable,
-		},
-		{
-			// Head of the third chain.
-			buffer: makeTestBuffer(t, 42),
-			flags:  descriptorFlagHasNext,
-			next:   6,
-		},
-		{
-			buffer: makeTestBuffer(t, 96),
-			flags:  descriptorFlagHasNext,
-			next:   7,
-		},
-		{
-			buffer: makeTestBuffer(t, 33),
-			flags:  descriptorFlagHasNext,
-			next:   0,
-		},
-	})
-
-	// Free the third chain.
-	assert.NoError(t, dt.freeDescriptorChain(thirdChain))
-
-	// Now there should be two chains and a free chain again.
-	assert.Equal(t, uint16(5), dt.freeHeadIndex)
-	assert.Equal(t, uint16(4), dt.freeNum)
-	assertDescriptorTable([queueSize]desc{
-		{
-			flags: descriptorFlagHasNext,
-			next:  5,
-		},
-		{
-			// Head of the first chain.
-			buffer: makeTestBuffer(t, 26),
-			flags:  descriptorFlagHasNext,
-			next:   2,
-		},
-		{
-			buffer: makeTestBuffer(t, 256),
-			flags:  descriptorFlagHasNext,
-			next:   3,
-		},
-		{
-			// Tail of the first chain.
-			buffer: make([]byte, pageSize),
-			flags:  descriptorFlagWritable,
-		},
-		{
-			// Head and tail of the second chain.
-			buffer: make([]byte, pageSize),
-			flags:  descriptorFlagWritable,
-		},
-		{
-			// Free head.
-			flags: descriptorFlagHasNext,
-			next:  6,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  7,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  0,
-		},
-	})
-
-	// Free the first chain.
-	assert.NoError(t, dt.freeDescriptorChain(firstChain))
-
-	// Now there should be only a single chain next to the free chain.
-	assert.Equal(t, uint16(5), dt.freeHeadIndex)
-	assert.Equal(t, uint16(7), dt.freeNum)
-	assertDescriptorTable([queueSize]desc{
-		{
-			flags: descriptorFlagHasNext,
-			next:  5,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  2,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  3,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  6,
-		},
-		{
-			// Head and tail of the second chain.
-			buffer: make([]byte, pageSize),
-			flags:  descriptorFlagWritable,
-		},
-		{
-			// Free head.
-			flags: descriptorFlagHasNext,
-			next:  1,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  7,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  0,
-		},
-	})
-
-	// Free the second chain.
-	assert.NoError(t, dt.freeDescriptorChain(secondChain))
-
-	// Now all descriptors should be in the free chain again.
-	assert.Equal(t, uint16(5), dt.freeHeadIndex)
-	assert.Equal(t, uint16(8), dt.freeNum)
-	assertDescriptorTable([queueSize]desc{
-		{
-			flags: descriptorFlagHasNext,
-			next:  5,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  2,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  3,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  6,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  1,
-		},
-		{
-			// Free head.
-			flags: descriptorFlagHasNext,
-			next:  4,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  7,
-		},
-		{
-			flags: descriptorFlagHasNext,
-			next:  0,
-		},
-	})
-}
-
-func makeTestBuffer(t *testing.T, length int) []byte {
-	t.Helper()
-	buf := make([]byte, length)
-	for i := 0; i < length; i++ {
-		buf[i] = byte(length - i)
-	}
-	return buf
-}
--- a/overlay/virtqueue/doc.go
+++ b/overlay/virtqueue/doc.go
@@ -1,7 +0,0 @@
-// Package virtqueue implements the driver-side for a virtio queue as described
-// in the specification:
-// https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html#x1-270006
-// This package does not make assumptions about the device that consumes the
-// queue. It rather just allocates the queue structures in memory and provides
-// methods to interact with it.
-package virtqueue
--- a/overlay/virtqueue/eventfd_test.go
+++ b/overlay/virtqueue/eventfd_test.go
@@ -1,45 +0,0 @@
-package virtqueue
-
-import (
-	"testing"
-	"time"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"gvisor.dev/gvisor/pkg/eventfd"
-)
-
-// Tests how an eventfd and a waiting goroutine can be gracefully closed.
-// Extends the eventfd test suite:
-// https://github.com/google/gvisor/blob/0799336d64be65eb97d330606c30162dc3440cab/pkg/eventfd/eventfd_test.go
-func TestEventFD_CancelWait(t *testing.T) {
-	efd, err := eventfd.Create()
-	require.NoError(t, err)
-	t.Cleanup(func() {
-		assert.NoError(t, efd.Close())
-	})
-
-	var stop bool
-
-	done := make(chan struct{})
-	go func() {
-		for !stop {
-			_ = efd.Wait()
-		}
-		close(done)
-	}()
-	select {
-	case <-done:
-		t.Fatalf("goroutine ended early")
-	case <-time.After(500 * time.Millisecond):
-	}
-
-	stop = true
-	assert.NoError(t, efd.Notify())
-	select {
-	case <-done:
-		break
-	case <-time.After(5 * time.Second):
-		t.Error("goroutine did not end")
-	}
-}
--- a/overlay/virtqueue/size.go
+++ b/overlay/virtqueue/size.go
@@ -1,33 +0,0 @@
-package virtqueue
-
-import (
-	"errors"
-	"fmt"
-)
-
-// ErrQueueSizeInvalid is returned when a queue size is invalid.
-var ErrQueueSizeInvalid = errors.New("queue size is invalid")
-
-// CheckQueueSize checks if the given value would be a valid size for a
-// virtqueue and returns an [ErrQueueSizeInvalid], if not.
-func CheckQueueSize(queueSize int) error {
-	if queueSize <= 0 {
-		return fmt.Errorf("%w: %d is too small", ErrQueueSizeInvalid, queueSize)
-	}
-
-	// The queue size must always be a power of 2.
-	// This ensures that ring indexes wrap correctly when the 16-bit integers
-	// overflow.
-	if queueSize&(queueSize-1) != 0 {
-		return fmt.Errorf("%w: %d is not a power of 2", ErrQueueSizeInvalid, queueSize)
-	}
-
-	// The largest power of 2 that fits into a 16-bit integer is 32768.
-	// 2 * 32768 would be 65536 which no longer fits.
-	if queueSize > 32768 {
-		return fmt.Errorf("%w: %d is larger than the maximum possible queue size 32768",
-			ErrQueueSizeInvalid, queueSize)
-	}
-
-	return nil
-}
--- a/overlay/virtqueue/size_test.go
+++ b/overlay/virtqueue/size_test.go
@@ -1,59 +0,0 @@
-package virtqueue
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestCheckQueueSize(t *testing.T) {
-	tests := []struct {
-		name        string
-		queueSize   int
-		containsErr string
-	}{
-		{
-			name:        "negative",
-			queueSize:   -1,
-			containsErr: "too small",
-		},
-		{
-			name:        "zero",
-			queueSize:   0,
-			containsErr: "too small",
-		},
-		{
-			name:        "not a power of 2",
-			queueSize:   24,
-			containsErr: "not a power of 2",
-		},
-		{
-			name:        "too large",
-			queueSize:   65536,
-			containsErr: "larger than the maximum",
-		},
-		{
-			name:      "valid 1",
-			queueSize: 1,
-		},
-		{
-			name:      "valid 256",
-			queueSize: 256,
-		},
-
-		{
-			name:      "valid 32768",
-			queueSize: 32768,
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			err := CheckQueueSize(tt.queueSize)
-			if tt.containsErr != "" {
-				assert.ErrorContains(t, err, tt.containsErr)
-			} else {
-				assert.NoError(t, err)
-			}
-		})
-	}
-}
--- a/overlay/virtqueue/split_virtqueue.go
+++ b/overlay/virtqueue/split_virtqueue.go
@@ -1,530 +0,0 @@
-package virtqueue
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"os"
-	"syscall"
-
-	"github.com/slackhq/nebula/overlay/eventfd"
-	"golang.org/x/sys/unix"
-)
-
-// SplitQueue is a virtqueue that consists of several parts, where each part is
-// writeable by either the driver or the device, but not both.
-type SplitQueue struct {
-	// size is the size of the queue.
-	size int
-	// buf is the underlying memory used for the queue.
-	buf []byte
-
-	descriptorTable *DescriptorTable
-	availableRing   *AvailableRing
-	usedRing        *UsedRing
-
-	// kickEventFD is used to signal the device when descriptor chains were
-	// added to the available ring.
-	kickEventFD eventfd.EventFD
-	// callEventFD is used by the device to signal when it has used descriptor
-	// chains and put them in the used ring.
-	callEventFD eventfd.EventFD
-
-	// stop is used by [SplitQueue.Close] to cancel the goroutine that handles
-	// used buffer notifications. It blocks until the goroutine ended.
-	stop func() error
-
-	itemSize int
-
-	epoll eventfd.Epoll
-	more  int
-}
-
-// NewSplitQueue allocates a new [SplitQueue] in memory. The given queue size
-// specifies the number of entries/buffers the queue can hold. This also affects
-// the memory consumption.
-func NewSplitQueue(queueSize int, itemSize int) (_ *SplitQueue, err error) {
-	if err = CheckQueueSize(queueSize); err != nil {
-		return nil, err
-	}
-
-	if itemSize%os.Getpagesize() != 0 {
-		return nil, errors.New("split queue size must be multiple of os.Getpagesize()")
-	}
-
-	sq := SplitQueue{
-		size:     queueSize,
-		itemSize: itemSize,
-	}
-
-	// Clean up a partially initialized queue when something fails.
-	defer func() {
-		if err != nil {
-			_ = sq.Close()
-		}
-	}()
-
-	// There are multiple ways for how the memory for the virtqueue could be
-	// allocated. We could use Go native structs with arrays inside them, but
-	// this wouldn't allow us to make the queue size configurable. And including
-	// a slice in the Go structs wouldn't work, because this would just put the
-	// Go slice descriptor into the memory region which the virtio device will
-	// not understand.
-	// Additionally, Go does not allow us to ensure a correct alignment of the
-	// parts of the virtqueue, as it is required by the virtio specification.
-	//
-	// To resolve this, let's just allocate the memory manually by allocating
-	// one or more memory pages, depending on the queue size. Making the
-	// virtqueue start at the beginning of a page is not strictly necessary, as
-	// the virtio specification does not require it to be continuous in the
-	// physical memory of the host (e.g. the vhost implementation in the kernel
-	// always uses copy_from_user to access it), but this makes it very easy to
-	// guarantee the alignment. Also, it is not required for the virtqueue parts
-	// to be in the same memory region, as we pass separate pointers to them to
-	// the device, but this design just makes things easier to implement.
-	//
-	// One added benefit of allocating the memory manually is, that we have full
-	// control over its lifetime and don't risk the garbage collector to collect
-	// our valuable structures while the device still works with them.
-
-	// The descriptor table is at the start of the page, so alignment is not an
-	// issue here.
-	descriptorTableStart := 0
-	descriptorTableEnd := descriptorTableStart + descriptorTableSize(queueSize)
-	availableRingStart := align(descriptorTableEnd, availableRingAlignment)
-	availableRingEnd := availableRingStart + availableRingSize(queueSize)
-	usedRingStart := align(availableRingEnd, usedRingAlignment)
-	usedRingEnd := usedRingStart + usedRingSize(queueSize)
-
-	sq.buf, err = unix.Mmap(-1, 0, usedRingEnd,
-		unix.PROT_READ|unix.PROT_WRITE,
-		unix.MAP_PRIVATE|unix.MAP_ANONYMOUS)
-	if err != nil {
-		return nil, fmt.Errorf("allocate virtqueue buffer: %w", err)
-	}
-
-	sq.descriptorTable = newDescriptorTable(queueSize, sq.buf[descriptorTableStart:descriptorTableEnd], sq.itemSize)
-	sq.availableRing = newAvailableRing(queueSize, sq.buf[availableRingStart:availableRingEnd])
-	sq.usedRing = newUsedRing(queueSize, sq.buf[usedRingStart:usedRingEnd])
-
-	sq.kickEventFD, err = eventfd.New()
-	if err != nil {
-		return nil, fmt.Errorf("create kick event file descriptor: %w", err)
-	}
-	sq.callEventFD, err = eventfd.New()
-	if err != nil {
-		return nil, fmt.Errorf("create call event file descriptor: %w", err)
-	}
-
-	if err = sq.descriptorTable.initializeDescriptors(); err != nil {
-		return nil, fmt.Errorf("initialize descriptors: %w", err)
-	}
-
-	sq.epoll, err = eventfd.NewEpoll()
-	if err != nil {
-		return nil, err
-	}
-	err = sq.epoll.AddEvent(sq.callEventFD.FD())
-	if err != nil {
-		return nil, err
-	}
-
-	// Consume used buffer notifications in the background.
-	sq.stop = sq.startConsumeUsedRing()
-
-	return &sq, nil
-}
-
-// Size returns the size of this queue, which is the number of entries/buffers
-// this queue can hold.
-func (sq *SplitQueue) Size() int {
-	return sq.size
-}
-
-// DescriptorTable returns the [DescriptorTable] behind this queue.
-func (sq *SplitQueue) DescriptorTable() *DescriptorTable {
-	return sq.descriptorTable
-}
-
-// AvailableRing returns the [AvailableRing] behind this queue.
-func (sq *SplitQueue) AvailableRing() *AvailableRing {
-	return sq.availableRing
-}
-
-// UsedRing returns the [UsedRing] behind this queue.
-func (sq *SplitQueue) UsedRing() *UsedRing {
-	return sq.usedRing
-}
-
-// KickEventFD returns the kick event file descriptor behind this queue.
-// The returned file descriptor should be used with great care to not interfere
-// with this implementation.
-func (sq *SplitQueue) KickEventFD() int {
-	return sq.kickEventFD.FD()
-}
-
-// CallEventFD returns the call event file descriptor behind this queue.
-// The returned file descriptor should be used with great care to not interfere
-// with this implementation.
-func (sq *SplitQueue) CallEventFD() int {
-	return sq.callEventFD.FD()
-}
-
-// startConsumeUsedRing starts a goroutine that runs [consumeUsedRing].
-// A function is returned that can be used to gracefully cancel it. todo rename
-func (sq *SplitQueue) startConsumeUsedRing() func() error {
-	return func() error {
-
-		// The goroutine blocks until it receives a signal on the event file
-		// descriptor, so it will never notice the context being canceled.
-		// To resolve this, we can just produce a fake-signal ourselves to wake
-		// it up.
-		if err := sq.callEventFD.Kick(); err != nil {
-			return fmt.Errorf("wake up goroutine: %w", err)
-		}
-		return nil
-	}
-}
-
-// BlockAndGetHeads waits for the device to signal that it has used descriptor chains and returns all [UsedElement]s
-func (sq *SplitQueue) BlockAndGetHeads(ctx context.Context) ([]UsedElement, error) {
-	var n int
-	var err error
-	for ctx.Err() == nil {
-
-		// Wait for a signal from the device.
-		if n, err = sq.epoll.Block(); err != nil {
-			return nil, fmt.Errorf("wait: %w", err)
-		}
-		if n > 0 {
-			stillNeedToTake, out := sq.usedRing.take(-1)
-			sq.more = stillNeedToTake
-			if stillNeedToTake == 0 {
-				_ = sq.epoll.Clear() //???
-			}
-			return out, nil
-		}
-	}
-	return nil, ctx.Err()
-}
-
-func (sq *SplitQueue) TakeSingle(ctx context.Context) (uint16, error) {
-	var n int
-	var err error
-	for ctx.Err() == nil {
-		out, ok := sq.usedRing.takeOne()
-		if ok {
-			return out, nil
-		}
-		// Wait for a signal from the device.
-		if n, err = sq.epoll.Block(); err != nil {
-			return 0, fmt.Errorf("wait: %w", err)
-		}
-
-		if n > 0 {
-			out, ok = sq.usedRing.takeOne()
-			if ok {
-				_ = sq.epoll.Clear() //???
-				return out, nil
-			} else {
-				continue //???
-			}
-		}
-	}
-	return 0, ctx.Err()
-}
-
-func (sq *SplitQueue) BlockAndGetHeadsCapped(ctx context.Context, maxToTake int) ([]UsedElement, error) {
-	var n int
-	var err error
-	for ctx.Err() == nil {
-
-		//we have leftovers in the fridge
-		if sq.more > 0 {
-			stillNeedToTake, out := sq.usedRing.take(maxToTake)
-			sq.more = stillNeedToTake
-			return out, nil
-		}
-		//look inside the fridge
-		stillNeedToTake, out := sq.usedRing.take(maxToTake)
-		if len(out) > 0 {
-			sq.more = stillNeedToTake
-			return out, nil
-		}
-		//fridge is empty I guess
-
-		// Wait for a signal from the device.
-		if n, err = sq.epoll.Block(); err != nil {
-			return nil, fmt.Errorf("wait: %w", err)
-		}
-		if n > 0 {
-			_ = sq.epoll.Clear() //???
-			stillNeedToTake, out = sq.usedRing.take(maxToTake)
-			sq.more = stillNeedToTake
-			return out, nil
-		}
-	}
-
-	return nil, ctx.Err()
-}
-
-// OfferDescriptorChain offers a descriptor chain to the device which contains a
-// number of device-readable buffers (out buffers) and device-writable buffers
-// (in buffers).
-//
-// All buffers in the outBuffers slice will be concatenated by chaining
-// descriptors, one for each buffer in the slice. When a buffer is too large to
-// fit into a single descriptor (limited by the system's page size), it will be
-// split up into multiple descriptors within the chain.
-// When numInBuffers is greater than zero, the given number of device-writable
-// descriptors will be appended to the end of the chain, each referencing a
-// whole memory page (see [os.Getpagesize]).
-//
-// When the queue is full and no more descriptor chains can be added, a wrapped
-// [ErrNotEnoughFreeDescriptors] will be returned. If you set waitFree to true,
-// this method will handle this error and will block instead until there are
-// enough free descriptors again.
-//
-// After defining the descriptor chain in the [DescriptorTable], the index of
-// the head of the chain will be made available to the device using the
-// [AvailableRing] and will be returned by this method.
-// Callers should read from the [SplitQueue.UsedDescriptorChains] channel to be
-// notified when the descriptor chain was used by the device and should free the
-// used descriptor chains again using [SplitQueue.FreeDescriptorChain] when
-// they're done with them. When this does not happen, the queue will run full
-// and any further calls to [SplitQueue.OfferDescriptorChain] will stall.
-
-func (sq *SplitQueue) OfferInDescriptorChains() (uint16, error) {
-	// Create a descriptor chain for the given buffers.
-	var (
-		head uint16
-		err  error
-	)
-	for {
-		head, err = sq.descriptorTable.createDescriptorForInputs()
-		if err == nil {
-			break
-		}
-
-		// I don't wanna use errors.Is, it's slow
-		//goland:noinspection GoDirectComparisonOfErrors
-		if err == ErrNotEnoughFreeDescriptors {
-			return 0, err
-		} else {
-			return 0, fmt.Errorf("create descriptor chain: %w", err)
-		}
-	}
-
-	// Make the descriptor chain available to the device.
-	sq.availableRing.offerSingle(head)
-
-	// Notify the device to make it process the updated available ring.
-	if err := sq.kickEventFD.Kick(); err != nil {
-		return head, fmt.Errorf("notify device: %w", err)
-	}
-
-	return head, nil
-}
-
-func (sq *SplitQueue) OfferOutDescriptorChains(prepend []byte, outBuffers [][]byte) ([]uint16, error) {
-	// TODO change this
-	// Each descriptor can only hold a whole memory page, so split large out
-	// buffers into multiple smaller ones.
-	outBuffers = splitBuffers(outBuffers, sq.itemSize)
-
-	chains := make([]uint16, len(outBuffers))
-
-	// Create a descriptor chain for the given buffers.
-	var (
-		head uint16
-		err  error
-	)
-	for i := range outBuffers {
-		for {
-			bufs := [][]byte{prepend, outBuffers[i]}
-			head, err = sq.descriptorTable.createDescriptorChain(bufs, 0)
-			if err == nil {
-				break
-			}
-
-			// I don't wanna use errors.Is, it's slow
-			//goland:noinspection GoDirectComparisonOfErrors
-			if err == ErrNotEnoughFreeDescriptors {
-				// Wait for more free descriptors to be put back into the queue.
-				// If the number of free descriptors is still not sufficient, we'll
-				// land here again.
-				//todo should never happen
-				syscall.Syscall(syscall.SYS_SCHED_YIELD, 0, 0, 0) // Cheap barrier
-				continue
-			}
-			return nil, fmt.Errorf("create descriptor chain: %w", err)
-		}
-		chains[i] = head
-	}
-
-	// Make the descriptor chain available to the device.
-	sq.availableRing.offer(chains)
-
-	// Notify the device to make it process the updated available ring.
-	if err := sq.kickEventFD.Kick(); err != nil {
-		return chains, fmt.Errorf("notify device: %w", err)
-	}
-
-	return chains, nil
-}
-
-// GetDescriptorChain returns the device-readable buffers (out buffers) and
-// device-writable buffers (in buffers) of the descriptor chain with the given
-// head index.
-// The head index must be one that was returned by a previous call to
-// [SplitQueue.OfferDescriptorChain] and the descriptor chain must not have been
-// freed yet.
-//
-// Be careful to only access the returned buffer slices when the device is no
-// longer using them. They must not be accessed after
-// [SplitQueue.FreeDescriptorChain] has been called.
-func (sq *SplitQueue) GetDescriptorChain(head uint16) (outBuffers, inBuffers [][]byte, err error) {
-	return sq.descriptorTable.getDescriptorChain(head)
-}
-
-func (sq *SplitQueue) GetDescriptorItem(head uint16) ([]byte, error) {
-	sq.descriptorTable.descriptors[head].length = uint32(sq.descriptorTable.itemSize)
-	return sq.descriptorTable.getDescriptorItem(head)
-}
-
-func (sq *SplitQueue) GetDescriptorChainContents(head uint16, out []byte, maxLen int) (int, error) {
-	return sq.descriptorTable.getDescriptorChainContents(head, out, maxLen)
-}
-
-func (sq *SplitQueue) GetDescriptorInbuffers(head uint16, inBuffers *[][]byte) error {
-	return sq.descriptorTable.getDescriptorInbuffers(head, inBuffers)
-}
-
-// FreeDescriptorChain frees the descriptor chain with the given head index.
-// The head index must be one that was returned by a previous call to
-// [SplitQueue.OfferDescriptorChain] and the descriptor chain must not have been
-// freed yet.
-//
-// This creates new room in the queue which can be used by following
-// [SplitQueue.OfferDescriptorChain] calls.
-// When there are outstanding calls for [SplitQueue.OfferDescriptorChain] that
-// are waiting for free room in the queue, they may become unblocked by this.
-func (sq *SplitQueue) FreeDescriptorChain(head uint16) error {
-	//not called under lock
-	if err := sq.descriptorTable.freeDescriptorChain(head); err != nil {
-		return fmt.Errorf("free: %w", err)
-	}
-
-	return nil
-}
-
-func (sq *SplitQueue) SetDescSize(head uint16, sz int) {
-	//not called under lock
-	sq.descriptorTable.descriptors[int(head)].length = uint32(sz)
-}
-
-func (sq *SplitQueue) OfferDescriptorChains(chains []uint16, kick bool) error {
-	//todo not doing this may break eventually?
-	//not called under lock
-	//if err := sq.descriptorTable.freeDescriptorChain(head); err != nil {
-	//	return fmt.Errorf("free: %w", err)
-	//}
-
-	// Make the descriptor chain available to the device.
-	sq.availableRing.offer(chains)
-
-	// Notify the device to make it process the updated available ring.
-	if kick {
-		return sq.Kick()
-	}
-
-	return nil
-}
-
-func (sq *SplitQueue) Kick() error {
-	if err := sq.kickEventFD.Kick(); err != nil {
-		return fmt.Errorf("notify device: %w", err)
-	}
-	return nil
-}
-
-// Close releases all resources used for this queue.
-// The implementation will try to release as many resources as possible and
-// collect potential errors before returning them.
-func (sq *SplitQueue) Close() error {
-	var errs []error
-
-	if sq.stop != nil {
-		// This has to happen before the event file descriptors may be closed.
-		if err := sq.stop(); err != nil {
-			errs = append(errs, fmt.Errorf("stop consume used ring: %w", err))
-		}
-
-		// Make sure that this code block is executed only once.
-		sq.stop = nil
-	}
-
-	if err := sq.kickEventFD.Close(); err != nil {
-		errs = append(errs, fmt.Errorf("close kick event file descriptor: %w", err))
-	}
-	if err := sq.callEventFD.Close(); err != nil {
-		errs = append(errs, fmt.Errorf("close call event file descriptor: %w", err))
-	}
-
-	if err := sq.descriptorTable.releaseBuffers(); err != nil {
-		errs = append(errs, fmt.Errorf("release descriptor buffers: %w", err))
-	}
-
-	if sq.buf != nil {
-		if err := unix.Munmap(sq.buf); err == nil {
-			sq.buf = nil
-		} else {
-			errs = append(errs, fmt.Errorf("unmap virtqueue buffer: %w", err))
-		}
-	}
-
-	return errors.Join(errs...)
-}
-
-// ensureInitialized is used as a guard to prevent methods to be called on an
-// uninitialized instance.
-func (sq *SplitQueue) ensureInitialized() {
-	if sq.buf == nil {
-		panic("used ring is not initialized")
-	}
-}
-
-func align(index, alignment int) int {
-	remainder := index % alignment
-	if remainder == 0 {
-		return index
-	}
-	return index + alignment - remainder
-}
-
-// splitBuffers processes a list of buffers and splits each buffer that is
-// larger than the size limit into multiple smaller buffers.
-// If none of the buffers are too big though, do nothing, to avoid allocation for now
-func splitBuffers(buffers [][]byte, sizeLimit int) [][]byte {
-	for i := range buffers {
-		if len(buffers[i]) > sizeLimit {
-			return reallySplitBuffers(buffers, sizeLimit)
-		}
-	}
-	return buffers
-}
-
-func reallySplitBuffers(buffers [][]byte, sizeLimit int) [][]byte {
-	result := make([][]byte, 0, len(buffers))
-	for _, buffer := range buffers {
-		for added := 0; added < len(buffer); added += sizeLimit {
-			if len(buffer)-added <= sizeLimit {
-				result = append(result, buffer[added:])
-				break
-			}
-			result = append(result, buffer[added:added+sizeLimit])
-		}
-	}
-
-	return result
-}
--- a/overlay/virtqueue/split_virtqueue_internal_test.go
+++ b/overlay/virtqueue/split_virtqueue_internal_test.go
@@ -1,105 +0,0 @@
-package virtqueue
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-func TestSplitQueue_MemoryAlignment(t *testing.T) {
-	tests := []struct {
-		name      string
-		queueSize int
-	}{
-		{
-			name:      "minimal queue size",
-			queueSize: 1,
-		},
-		{
-			name:      "small queue size",
-			queueSize: 8,
-		},
-		{
-			name:      "large queue size",
-			queueSize: 256,
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			sq, err := NewSplitQueue(tt.queueSize)
-			require.NoError(t, err)
-
-			assert.Zero(t, sq.descriptorTable.Address()%descriptorTableAlignment)
-			assert.Zero(t, sq.availableRing.Address()%availableRingAlignment)
-			assert.Zero(t, sq.usedRing.Address()%usedRingAlignment)
-		})
-	}
-}
-
-func TestSplitBuffers(t *testing.T) {
-	const sizeLimit = 16
-	tests := []struct {
-		name     string
-		buffers  [][]byte
-		expected [][]byte
-	}{
-		{
-			name:     "no buffers",
-			buffers:  make([][]byte, 0),
-			expected: make([][]byte, 0),
-		},
-		{
-			name: "small",
-			buffers: [][]byte{
-				make([]byte, 11),
-			},
-			expected: [][]byte{
-				make([]byte, 11),
-			},
-		},
-		{
-			name: "exact size",
-			buffers: [][]byte{
-				make([]byte, sizeLimit),
-			},
-			expected: [][]byte{
-				make([]byte, sizeLimit),
-			},
-		},
-		{
-			name: "large",
-			buffers: [][]byte{
-				make([]byte, 42),
-			},
-			expected: [][]byte{
-				make([]byte, 16),
-				make([]byte, 16),
-				make([]byte, 10),
-			},
-		},
-		{
-			name: "mixed",
-			buffers: [][]byte{
-				make([]byte, 7),
-				make([]byte, 30),
-				make([]byte, 15),
-				make([]byte, 32),
-			},
-			expected: [][]byte{
-				make([]byte, 7),
-				make([]byte, 16),
-				make([]byte, 14),
-				make([]byte, 15),
-				make([]byte, 16),
-				make([]byte, 16),
-			},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			actual := splitBuffers(tt.buffers, sizeLimit)
-			assert.Equal(t, tt.expected, actual)
-		})
-	}
-}
--- a/overlay/virtqueue/used_element.go
+++ b/overlay/virtqueue/used_element.go
@@ -1,21 +0,0 @@
-package virtqueue
-
-// usedElementSize is the number of bytes needed to store a [UsedElement] in
-// memory.
-const usedElementSize = 8
-
-// UsedElement is an element of the [UsedRing] and describes a descriptor chain
-// that was used by the device.
-type UsedElement struct {
-	// DescriptorIndex is the index of the head of the used descriptor chain in
-	// the [DescriptorTable].
-	// The index is 32-bit here for padding reasons.
-	DescriptorIndex uint32
-	// Length is the number of bytes written into the device writable portion of
-	// the buffer described by the descriptor chain.
-	Length uint32
-}
-
-func (u *UsedElement) GetHead() uint16 {
-	return uint16(u.DescriptorIndex)
-}
--- a/overlay/virtqueue/used_element_internal_test.go
+++ b/overlay/virtqueue/used_element_internal_test.go
@@ -1,12 +0,0 @@
-package virtqueue
-
-import (
-	"testing"
-	"unsafe"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestUsedElement_Size(t *testing.T) {
-	assert.EqualValues(t, usedElementSize, unsafe.Sizeof(UsedElement{}))
-}
--- a/overlay/virtqueue/used_ring.go
+++ b/overlay/virtqueue/used_ring.go
@@ -1,184 +0,0 @@
-package virtqueue
-
-import (
-	"fmt"
-	"unsafe"
-)
-
-// usedRingFlag is a flag that describes a [UsedRing].
-type usedRingFlag uint16
-
-const (
-	// usedRingFlagNoNotify is used by the host to advise the guest to not
-	// kick it when adding a buffer. It's unreliable, so it's simply an
-	// optimization. Guest will still kick when it's out of buffers.
-	usedRingFlagNoNotify usedRingFlag = 1 << iota
-)
-
-// usedRingSize is the number of bytes needed to store a [UsedRing] with the
-// given queue size in memory.
-func usedRingSize(queueSize int) int {
-	return 6 + usedElementSize*queueSize
-}
-
-// usedRingAlignment is the minimum alignment of a [UsedRing] in memory, as
-// required by the virtio spec.
-const usedRingAlignment = 4
-
-// UsedRing is where the device returns descriptor chains once it is done with
-// them. Each ring entry is a [UsedElement]. It is only written to by the device
-// and read by the driver.
-//
-// Because the size of the ring depends on the queue size, we cannot define a
-// Go struct with a static size that maps to the memory of the ring. Instead,
-// this struct only contains pointers to the corresponding memory areas.
-type UsedRing struct {
-	initialized bool
-
-	// flags that describe this ring.
-	flags *usedRingFlag
-	// ringIndex indicates where the device would put the next entry into the
-	// ring (modulo the queue size).
-	ringIndex *uint16
-	// ring contains the [UsedElement]s. It wraps around at queue size.
-	ring []UsedElement
-	// availableEvent is not used by this implementation, but we reserve it
-	// anyway to avoid issues in case a device may try to write to it, contrary
-	// to the virtio specification.
-	availableEvent *uint16
-
-	// lastIndex is the internal ringIndex up to which all [UsedElement]s were
-	// processed.
-	lastIndex uint16
-
-	//mu sync.Mutex
-}
-
-// newUsedRing creates a used ring that uses the given underlying memory. The
-// length of the memory slice must match the size needed for the ring (see
-// [usedRingSize]) for the given queue size.
-func newUsedRing(queueSize int, mem []byte) *UsedRing {
-	ringSize := usedRingSize(queueSize)
-	if len(mem) != ringSize {
-		panic(fmt.Sprintf("memory size (%v) does not match required size "+
-			"for used ring: %v", len(mem), ringSize))
-	}
-
-	r := UsedRing{
-		initialized:    true,
-		flags:          (*usedRingFlag)(unsafe.Pointer(&mem[0])),
-		ringIndex:      (*uint16)(unsafe.Pointer(&mem[2])),
-		ring:           unsafe.Slice((*UsedElement)(unsafe.Pointer(&mem[4])), queueSize),
-		availableEvent: (*uint16)(unsafe.Pointer(&mem[ringSize-2])),
-	}
-	r.lastIndex = *r.ringIndex
-	return &r
-}
-
-// Address returns the pointer to the beginning of the ring in memory.
-// Do not modify the memory directly to not interfere with this implementation.
-func (r *UsedRing) Address() uintptr {
-	if !r.initialized {
-		panic("used ring is not initialized")
-	}
-	return uintptr(unsafe.Pointer(r.flags))
-}
-
-// take returns all new [UsedElement]s that the device put into the ring and
-// that weren't already returned by a previous call to this method.
-// had a lock, I removed it
-func (r *UsedRing) take(maxToTake int) (int, []UsedElement) {
-	//r.mu.Lock()
-	//defer r.mu.Unlock()
-
-	ringIndex := *r.ringIndex
-	if ringIndex == r.lastIndex {
-		// Nothing new.
-		return 0, nil
-	}
-
-	// Calculate the number new used elements that we can read from the ring.
-	// The ring index may wrap, so special handling for that case is needed.
-	count := int(ringIndex - r.lastIndex)
-	if count < 0 {
-		count += 0xffff
-	}
-
-	stillNeedToTake := 0
-
-	if maxToTake > 0 {
-		stillNeedToTake = count - maxToTake
-		if stillNeedToTake < 0 {
-			stillNeedToTake = 0
-		}
-		count = min(count, maxToTake)
-	}
-
-	// The number of new elements can never exceed the queue size.
-	if count > len(r.ring) {
-		panic("used ring contains more new elements than the ring is long")
-	}
-
-	elems := make([]UsedElement, count)
-	for i := range count {
-		elems[i] = r.ring[r.lastIndex%uint16(len(r.ring))]
-		r.lastIndex++
-	}
-
-	return stillNeedToTake, elems
-}
-
-func (r *UsedRing) takeOne() (uint16, bool) {
-	//r.mu.Lock()
-	//defer r.mu.Unlock()
-
-	ringIndex := *r.ringIndex
-	if ringIndex == r.lastIndex {
-		// Nothing new.
-		return 0xffff, false
-	}
-
-	// Calculate the number new used elements that we can read from the ring.
-	// The ring index may wrap, so special handling for that case is needed.
-	count := int(ringIndex - r.lastIndex)
-	if count < 0 {
-		count += 0xffff
-	}
-
-	// The number of new elements can never exceed the queue size.
-	if count > len(r.ring) {
-		panic("used ring contains more new elements than the ring is long")
-	}
-
-	if count == 0 {
-		return 0xffff, false
-	}
-
-	out := r.ring[r.lastIndex%uint16(len(r.ring))].GetHead()
-	r.lastIndex++
-
-	return out, true
-}
-
-// InitOfferSingle is only used to pre-fill the used queue at startup, and should not be used if the device is running!
-func (r *UsedRing) InitOfferSingle(x uint16, size int) {
-	//always called under lock
-	//r.mu.Lock()
-	//defer r.mu.Unlock()
-
-	offset := 0
-	// Add descriptor chain heads to the ring.
-
-	// The 16-bit ring index may overflow. This is expected and is not an
-	// issue because the size of the ring array (which equals the queue
-	// size) is always a power of 2 and smaller than the highest possible
-	// 16-bit value.
-	insertIndex := int(*r.ringIndex+uint16(offset)) % len(r.ring)
-	r.ring[insertIndex] = UsedElement{
-		DescriptorIndex: uint32(x),
-		Length:          uint32(size),
-	}
-
-	// Increase the ring index by the number of descriptor chains added to the ring.
-	*r.ringIndex += 1
-}
--- a/overlay/virtqueue/used_ring_internal_test.go
+++ b/overlay/virtqueue/used_ring_internal_test.go
@@ -1,136 +0,0 @@
-package virtqueue
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestUsedRing_MemoryLayout(t *testing.T) {
-	const queueSize = 2
-
-	memory := make([]byte, usedRingSize(queueSize))
-	r := newUsedRing(queueSize, memory)
-
-	*r.flags = 0x01ff
-	*r.ringIndex = 1
-	r.ring[0] = UsedElement{
-		DescriptorIndex: 0x0123,
-		Length:          0x4567,
-	}
-	r.ring[1] = UsedElement{
-		DescriptorIndex: 0x89ab,
-		Length:          0xcdef,
-	}
-
-	assert.Equal(t, []byte{
-		0xff, 0x01,
-		0x01, 0x00,
-		0x23, 0x01, 0x00, 0x00,
-		0x67, 0x45, 0x00, 0x00,
-		0xab, 0x89, 0x00, 0x00,
-		0xef, 0xcd, 0x00, 0x00,
-		0x00, 0x00,
-	}, memory)
-}
-
-//func TestUsedRing_Take(t *testing.T) {
-//	const queueSize = 8
-//
-//	tests := []struct {
-//		name      string
-//		ring      []UsedElement
-//		ringIndex uint16
-//		lastIndex uint16
-//		expected  []UsedElement
-//	}{
-//		{
-//			name: "nothing new",
-//			ring: []UsedElement{
-//				{DescriptorIndex: 1},
-//				{DescriptorIndex: 2},
-//				{DescriptorIndex: 3},
-//				{DescriptorIndex: 4},
-//				{},
-//				{},
-//				{},
-//				{},
-//			},
-//			ringIndex: 4,
-//			lastIndex: 4,
-//			expected:  nil,
-//		},
-//		{
-//			name: "no overflow",
-//			ring: []UsedElement{
-//				{DescriptorIndex: 1},
-//				{DescriptorIndex: 2},
-//				{DescriptorIndex: 3},
-//				{DescriptorIndex: 4},
-//				{},
-//				{},
-//				{},
-//				{},
-//			},
-//			ringIndex: 4,
-//			lastIndex: 1,
-//			expected: []UsedElement{
-//				{DescriptorIndex: 2},
-//				{DescriptorIndex: 3},
-//				{DescriptorIndex: 4},
-//			},
-//		},
-//		{
-//			name: "ring overflow",
-//			ring: []UsedElement{
-//				{DescriptorIndex: 9},
-//				{DescriptorIndex: 10},
-//				{DescriptorIndex: 3},
-//				{DescriptorIndex: 4},
-//				{DescriptorIndex: 5},
-//				{DescriptorIndex: 6},
-//				{DescriptorIndex: 7},
-//				{DescriptorIndex: 8},
-//			},
-//			ringIndex: 10,
-//			lastIndex: 7,
-//			expected: []UsedElement{
-//				{DescriptorIndex: 8},
-//				{DescriptorIndex: 9},
-//				{DescriptorIndex: 10},
-//			},
-//		},
-//		{
-//			name: "index overflow",
-//			ring: []UsedElement{
-//				{DescriptorIndex: 9},
-//				{DescriptorIndex: 10},
-//				{DescriptorIndex: 3},
-//				{DescriptorIndex: 4},
-//				{DescriptorIndex: 5},
-//				{DescriptorIndex: 6},
-//				{DescriptorIndex: 7},
-//				{DescriptorIndex: 8},
-//			},
-//			ringIndex: 2,
-//			lastIndex: 65535,
-//			expected: []UsedElement{
-//				{DescriptorIndex: 8},
-//				{DescriptorIndex: 9},
-//				{DescriptorIndex: 10},
-//			},
-//		},
-//	}
-//	for _, tt := range tests {
-//		t.Run(tt.name, func(t *testing.T) {
-//			memory := make([]byte, usedRingSize(queueSize))
-//			r := newUsedRing(queueSize, memory)
-//
-//			copy(r.ring, tt.ring)
-//			*r.ringIndex = tt.ringIndex
-//			r.lastIndex = tt.lastIndex
-//
-//			assert.Equal(t, tt.expected, r.take())
-//		})
-//	}
-//}
--- a/overlay/wireguard_tun_linux.go
+++ b/overlay/wireguard_tun_linux.go
@@ -0,0 +1,220 @@
+//go:build linux && !android && !e2e_testing
+
+package overlay
+
+import (
+	"fmt"
+	"sync"
+
+	wgtun "github.com/slackhq/nebula/wgstack/tun"
+)
+
+type wireguardTunIO struct {
+	dev       wgtun.Device
+	mtu       int
+	batchSize int
+
+	readMu      sync.Mutex
+	readBuffers [][]byte
+	readLens    []int
+	legacyBuf   []byte
+
+	writeMu      sync.Mutex
+	writeBuf     []byte
+	writeWrap    [][]byte
+	writeBuffers [][]byte
+}
+
+func newWireguardTunIO(dev wgtun.Device, mtu int) *wireguardTunIO {
+	batch := dev.BatchSize()
+	if batch <= 0 {
+		batch = 1
+	}
+	if mtu <= 0 {
+		mtu = DefaultMTU
+	}
+	return &wireguardTunIO{
+		dev:       dev,
+		mtu:       mtu,
+		batchSize: batch,
+		readLens:  make([]int, batch),
+		legacyBuf: make([]byte, wgtun.VirtioNetHdrLen+mtu),
+		writeBuf:  make([]byte, wgtun.VirtioNetHdrLen+mtu),
+		writeWrap: make([][]byte, 1),
+	}
+}
+
+func (w *wireguardTunIO) Read(p []byte) (int, error) {
+	w.readMu.Lock()
+	defer w.readMu.Unlock()
+
+	bufs := w.readBuffers
+	if len(bufs) == 0 {
+		bufs = [][]byte{w.legacyBuf}
+		w.readBuffers = bufs
+	}
+	n, err := w.dev.Read(bufs[:1], w.readLens[:1], wgtun.VirtioNetHdrLen)
+	if err != nil {
+		return 0, err
+	}
+	if n == 0 {
+		return 0, nil
+	}
+	length := w.readLens[0]
+	copy(p, w.legacyBuf[wgtun.VirtioNetHdrLen:wgtun.VirtioNetHdrLen+length])
+	return length, nil
+}
+
+func (w *wireguardTunIO) Write(p []byte) (int, error) {
+	if len(p) > w.mtu {
+		return 0, fmt.Errorf("wireguard tun: payload exceeds MTU (%d > %d)", len(p), w.mtu)
+	}
+	w.writeMu.Lock()
+	defer w.writeMu.Unlock()
+	buf := w.writeBuf[:wgtun.VirtioNetHdrLen+len(p)]
+	for i := 0; i < wgtun.VirtioNetHdrLen; i++ {
+		buf[i] = 0
+	}
+	copy(buf[wgtun.VirtioNetHdrLen:], p)
+	w.writeWrap[0] = buf
+	n, err := w.dev.Write(w.writeWrap, wgtun.VirtioNetHdrLen)
+	if err != nil {
+		return n, err
+	}
+	return len(p), nil
+}
+
+func (w *wireguardTunIO) ReadIntoBatch(pool *PacketPool) ([]*Packet, error) {
+	if pool == nil {
+		return nil, fmt.Errorf("wireguard tun: packet pool is nil")
+	}
+
+	w.readMu.Lock()
+	defer w.readMu.Unlock()
+
+	if len(w.readBuffers) < w.batchSize {
+		w.readBuffers = make([][]byte, w.batchSize)
+	}
+	if len(w.readLens) < w.batchSize {
+		w.readLens = make([]int, w.batchSize)
+	}
+
+	packets := make([]*Packet, w.batchSize)
+	requiredHeadroom := w.BatchHeadroom()
+	requiredPayload := w.BatchPayloadCap()
+	headroom := 0
+	for i := 0; i < w.batchSize; i++ {
+		pkt := pool.Get()
+		if pkt == nil {
+			releasePackets(packets[:i])
+			return nil, fmt.Errorf("wireguard tun: packet pool returned nil packet")
+		}
+		if pkt.Capacity() < requiredPayload {
+			pkt.Release()
+			releasePackets(packets[:i])
+			return nil, fmt.Errorf("wireguard tun: packet capacity %d below required %d", pkt.Capacity(), requiredPayload)
+		}
+		if i == 0 {
+			headroom = pkt.Offset
+			if headroom < requiredHeadroom {
+				pkt.Release()
+				releasePackets(packets[:i])
+				return nil, fmt.Errorf("wireguard tun: packet headroom %d below virtio requirement %d", headroom, requiredHeadroom)
+			}
+		} else if pkt.Offset != headroom {
+			pkt.Release()
+			releasePackets(packets[:i])
+			return nil, fmt.Errorf("wireguard tun: inconsistent packet headroom (%d != %d)", pkt.Offset, headroom)
+		}
+		packets[i] = pkt
+		w.readBuffers[i] = pkt.Buf
+	}
+
+	n, err := w.dev.Read(w.readBuffers[:w.batchSize], w.readLens[:w.batchSize], headroom)
+	if err != nil {
+		releasePackets(packets)
+		return nil, err
+	}
+	if n == 0 {
+		releasePackets(packets)
+		return nil, nil
+	}
+	for i := 0; i < n; i++ {
+		packets[i].Len = w.readLens[i]
+	}
+	for i := n; i < w.batchSize; i++ {
+		packets[i].Release()
+		packets[i] = nil
+	}
+	return packets[:n], nil
+}
+
+func (w *wireguardTunIO) WriteBatch(packets []*Packet) (int, error) {
+	if len(packets) == 0 {
+		return 0, nil
+	}
+	requiredHeadroom := w.BatchHeadroom()
+	offset := packets[0].Offset
+	if offset < requiredHeadroom {
+		releasePackets(packets)
+		return 0, fmt.Errorf("wireguard tun: packet offset %d smaller than required headroom %d", offset, requiredHeadroom)
+	}
+	for _, pkt := range packets {
+		if pkt == nil {
+			continue
+		}
+		if pkt.Offset != offset {
+			releasePackets(packets)
+			return 0, fmt.Errorf("wireguard tun: mixed packet offsets not supported")
+		}
+		limit := pkt.Offset + pkt.Len
+		if limit > len(pkt.Buf) {
+			releasePackets(packets)
+			return 0, fmt.Errorf("wireguard tun: packet length %d exceeds buffer capacity %d", pkt.Len, len(pkt.Buf)-pkt.Offset)
+		}
+	}
+	w.writeMu.Lock()
+	defer w.writeMu.Unlock()
+
+	if len(w.writeBuffers) < len(packets) {
+		w.writeBuffers = make([][]byte, len(packets))
+	}
+	for i, pkt := range packets {
+		if pkt == nil {
+			w.writeBuffers[i] = nil
+			continue
+		}
+		limit := pkt.Offset + pkt.Len
+		w.writeBuffers[i] = pkt.Buf[:limit]
+	}
+	n, err := w.dev.Write(w.writeBuffers[:len(packets)], offset)
+	if err != nil {
+		return n, err
+	}
+	releasePackets(packets)
+	return n, nil
+}
+
+func (w *wireguardTunIO) BatchHeadroom() int {
+	return wgtun.VirtioNetHdrLen
+}
+
+func (w *wireguardTunIO) BatchPayloadCap() int {
+	return w.mtu
+}
+
+func (w *wireguardTunIO) BatchSize() int {
+	return w.batchSize
+}
+
+func (w *wireguardTunIO) Close() error {
+	return nil
+}
+
+func releasePackets(pkts []*Packet) {
+	for _, pkt := range pkts {
+		if pkt != nil {
+			pkt.Release()
+		}
+	}
+}
--- a/packet/outpacket.go
+++ b/packet/outpacket.go
@@ -1,70 +0,0 @@
-package packet
-
-import (
-	"github.com/slackhq/nebula/util/virtio"
-	"golang.org/x/sys/unix"
-)
-
-type OutPacket struct {
-	Segments        [][]byte
-	SegmentPayloads [][]byte
-	SegmentHeaders  [][]byte
-	SegmentIDs      []uint16
-	//todo virtio header?
-	SegSize      int
-	SegCounter   int
-	Valid        bool
-	wasSegmented bool
-
-	Scratch []byte
-}
-
-func NewOut() *OutPacket {
-	out := new(OutPacket)
-	out.Segments = make([][]byte, 0, 64)
-	out.SegmentHeaders = make([][]byte, 0, 64)
-	out.SegmentPayloads = make([][]byte, 0, 64)
-	out.SegmentIDs = make([]uint16, 0, 64)
-	out.Scratch = make([]byte, Size)
-	return out
-}
-
-func (pkt *OutPacket) Reset() {
-	pkt.Segments = pkt.Segments[:0]
-	pkt.SegmentPayloads = pkt.SegmentPayloads[:0]
-	pkt.SegmentHeaders = pkt.SegmentHeaders[:0]
-	pkt.SegmentIDs = pkt.SegmentIDs[:0]
-	pkt.SegSize = 0
-	pkt.Valid = false
-	pkt.wasSegmented = false
-}
-
-func (pkt *OutPacket) UseSegment(segID uint16, seg []byte, isV6 bool) int {
-	pkt.Valid = true
-	pkt.SegmentIDs = append(pkt.SegmentIDs, segID)
-	pkt.Segments = append(pkt.Segments, seg) //todo do we need this?
-
-	vhdr := virtio.NetHdr{ //todo
-		Flags:      unix.VIRTIO_NET_HDR_F_DATA_VALID,
-		GSOType:    unix.VIRTIO_NET_HDR_GSO_NONE,
-		HdrLen:     0,
-		GSOSize:    0,
-		CsumStart:  0,
-		CsumOffset: 0,
-		NumBuffers: 0,
-	}
-
-	hdr := seg[0 : virtio.NetHdrSize+14]
-	_ = vhdr.Encode(hdr)
-	if isV6 {
-		hdr[virtio.NetHdrSize+14-2] = 0x86
-		hdr[virtio.NetHdrSize+14-1] = 0xdd
-	} else {
-		hdr[virtio.NetHdrSize+14-2] = 0x08
-		hdr[virtio.NetHdrSize+14-1] = 0x00
-	}
-
-	pkt.SegmentHeaders = append(pkt.SegmentHeaders, hdr)
-	pkt.SegmentPayloads = append(pkt.SegmentPayloads, seg[virtio.NetHdrSize+14:])
-	return len(pkt.SegmentIDs) - 1
-}
--- a/packet/packet.go
+++ b/packet/packet.go
@@ -1,119 +0,0 @@
-package packet
-
-import (
-	"encoding/binary"
-	"iter"
-	"net/netip"
-	"slices"
-	"syscall"
-	"unsafe"
-
-	"golang.org/x/sys/unix"
-)
-
-const Size = 0xffff
-
-type Packet struct {
-	Payload []byte
-	Control []byte
-	Name    []byte
-	SegSize int
-
-	//todo should this hold out as well?
-	OutLen int
-
-	wasSegmented bool
-	isV4         bool
-}
-
-func New(isV4 bool) *Packet {
-	return &Packet{
-		Payload: make([]byte, Size),
-		Control: make([]byte, unix.CmsgSpace(2)),
-		Name:    make([]byte, unix.SizeofSockaddrInet6),
-		isV4:    isV4,
-	}
-}
-
-func (p *Packet) AddrPort() netip.AddrPort {
-	var ip netip.Addr
-	// Its ok to skip the ok check here, the slicing is the only error that can occur and it will panic
-	if p.isV4 {
-		ip, _ = netip.AddrFromSlice(p.Name[4:8])
-	} else {
-		ip, _ = netip.AddrFromSlice(p.Name[8:24])
-	}
-	return netip.AddrPortFrom(ip.Unmap(), binary.BigEndian.Uint16(p.Name[2:4]))
-}
-
-func (p *Packet) updateCtrl(ctrlLen int) {
-	p.SegSize = len(p.Payload)
-	p.wasSegmented = false
-	if ctrlLen == 0 {
-		return
-	}
-	if len(p.Control) == 0 {
-		return
-	}
-	cmsgs, err := unix.ParseSocketControlMessage(p.Control)
-	if err != nil {
-		return // oh well
-	}
-
-	for _, c := range cmsgs {
-		if c.Header.Level == unix.SOL_UDP && c.Header.Type == unix.UDP_GRO && len(c.Data) >= 2 {
-			p.wasSegmented = true
-			p.SegSize = int(binary.LittleEndian.Uint16(c.Data[:2]))
-			return
-		}
-	}
-}
-
-// Update sets a Packet into "just received, not processed" state
-func (p *Packet) Update(ctrlLen int) {
-	p.OutLen = -1
-	p.updateCtrl(ctrlLen)
-}
-
-func (p *Packet) SetSegSizeForTX() {
-	p.SegSize = len(p.Payload)
-	hdr := (*unix.Cmsghdr)(unsafe.Pointer(&p.Control[0]))
-	hdr.Level = unix.SOL_UDP
-	hdr.Type = unix.UDP_SEGMENT
-	hdr.SetLen(syscall.CmsgLen(2))
-	binary.NativeEndian.PutUint16(p.Control[unix.CmsgLen(0):unix.CmsgLen(0)+2], uint16(p.SegSize))
-}
-
-func (p *Packet) CompatibleForSegmentationWith(otherP *Packet, currentTotalSize int) bool {
-	//same dest
-	if !slices.Equal(p.Name, otherP.Name) {
-		return false
-	}
-
-	//don't get too big
-	if len(p.Payload)+currentTotalSize >= 0xffff {
-		return false
-	}
-
-	//same body len
-	//todo allow single different size at end
-	if len(p.Payload) != len(otherP.Payload) {
-		return false //todo technically you can cram one extra in
-	}
-	return true
-}
-
-func (p *Packet) Segments() iter.Seq[[]byte] {
-	return func(yield func([]byte) bool) {
-		//cursor := 0
-		for offset := 0; offset < len(p.Payload); offset += p.SegSize {
-			end := offset + p.SegSize
-			if end > len(p.Payload) {
-				end = len(p.Payload)
-			}
-			if !yield(p.Payload[offset:end]) {
-				return
-			}
-		}
-	}
-}
--- a/packet/virtio.go
+++ b/packet/virtio.go
@@ -1,37 +0,0 @@
-package packet
-
-import (
-	"github.com/slackhq/nebula/util/virtio"
-)
-
-type VirtIOPacket struct {
-	Payload   []byte
-	Header    virtio.NetHdr
-	Chains    []uint16
-	ChainRefs [][]byte
-	// OfferDescriptorChains(chains []uint16, kick bool) error
-}
-
-func NewVIO() *VirtIOPacket {
-	out := new(VirtIOPacket)
-	out.Payload = nil
-	out.ChainRefs = make([][]byte, 0, 4)
-	out.Chains = make([]uint16, 0, 8)
-	return out
-}
-
-func (v *VirtIOPacket) Reset() {
-	v.Payload = nil
-	v.ChainRefs = v.ChainRefs[:0]
-	v.Chains = v.Chains[:0]
-}
-
-type VirtIOTXPacket struct {
-	VirtIOPacket
-}
-
-func NewVIOTX(isV4 bool) *VirtIOTXPacket {
-	out := new(VirtIOTXPacket)
-	out.VirtIOPacket = *NewVIO()
-	return out
-}
--- a/pki.go
+++ b/pki.go
@@ -523,13 +523,9 @@ func loadCAPoolFromConfig(l *logrus.Logger, c *config.C) (*cert.CAPool, error) {
 		return nil, fmt.Errorf("error while adding CA certificate to CA trust store: %s", err)
 	}

-	bl := c.GetStringSlice("pki.blocklist", []string{})
-	if len(bl) > 0 {
-		for _, fp := range bl {
-			caPool.BlocklistFingerprint(fp)
-		}
-
-		l.WithField("fingerprintCount", len(bl)).Info("Blocklisted certificates")
+	for _, fp := range c.GetStringSlice("pki.blocklist", []string{}) {
+		l.WithField("fingerprint", fp).Info("Blocklisting cert")
+		caPool.BlocklistFingerprint(fp)
 	}

 	return caPool, nil
--- a/service/service_test.go
+++ b/service/service_test.go
@@ -16,8 +16,8 @@ import (
 	"github.com/slackhq/nebula/cert_test"
 	"github.com/slackhq/nebula/config"
 	"github.com/slackhq/nebula/overlay"
-	"go.yaml.in/yaml/v3"
 	"golang.org/x/sync/errgroup"
+	"gopkg.in/yaml.v3"
 )

 type m = map[string]any
--- a/udp/conn.go
+++ b/udp/conn.go
@@ -4,13 +4,13 @@ import (
 	"net/netip"

 	"github.com/slackhq/nebula/config"
-	"github.com/slackhq/nebula/packet"
 )

 const MTU = 9001

 type EncReader func(
-	[]*packet.Packet,
+	addr netip.AddrPort,
+	payload []byte,
 )

 type Conn interface {
@@ -19,11 +19,21 @@ type Conn interface {
 	ListenOut(r EncReader)
 	WriteTo(b []byte, addr netip.AddrPort) error
 	ReloadConfig(c *config.C)
-	Prep(pkt *packet.Packet, addr netip.AddrPort) error
-	WriteBatch(pkt []*packet.Packet) (int, error)
 	Close() error
 }

+// Datagram represents a UDP payload destined to a specific address.
+type Datagram struct {
+	Payload []byte
+	Addr    netip.AddrPort
+}
+
+// BatchConn can send multiple datagrams in one syscall.
+type BatchConn interface {
+	Conn
+	WriteBatch(pkts []Datagram) error
+}
+
 type NoopConn struct{}

 func (NoopConn) Rebind() error {
--- a/udp/udp_linux.go
+++ b/udp/udp_linux.go
@@ -14,25 +14,25 @@ import (
 	"github.com/rcrowley/go-metrics"
 	"github.com/sirupsen/logrus"
 	"github.com/slackhq/nebula/config"
-	"github.com/slackhq/nebula/packet"
 	"golang.org/x/sys/unix"
 )

-const iovMax = 128 //1024 //no unix constant for this? from limits.h
-//todo I'd like this to be 1024 but we seem to hit errors around ~130?
-
 type StdConn struct {
-	sysFd     int
-	isV4      bool
-	l         *logrus.Logger
-	batch     int
-	enableGRO bool
-
-	msgs []rawMessage
-	iovs [][]iovec
+	sysFd int
+	isV4  bool
+	l     *logrus.Logger
+	batch int
 }

-func NewListener(l *logrus.Logger, ip netip.Addr, port int, multi bool, batch int) (Conn, error) {
+func maybeIPV4(ip net.IP) (net.IP, bool) {
+	ip4 := ip.To4()
+	if ip4 != nil {
+		return ip4, true
+	}
+	return ip, false
+}
+
+func NewListener(l *logrus.Logger, ip netip.Addr, port int, multi bool, batch int, q int) (Conn, error) {
 	af := unix.AF_INET6
 	if ip.Is4() {
 		af = unix.AF_INET
@@ -69,20 +69,7 @@ func NewListener(l *logrus.Logger, ip netip.Addr, port int, multi bool, batch in
 		return nil, fmt.Errorf("unable to bind to socket: %s", err)
 	}

-	const batchSize = 8192
-	msgs := make([]rawMessage, 0, batchSize) //todo configure
-	iovs := make([][]iovec, batchSize)
-	for i := range iovs {
-		iovs[i] = make([]iovec, iovMax)
-	}
-	return &StdConn{
-		sysFd: fd,
-		isV4:  ip.Is4(),
-		l:     l,
-		batch: batch,
-		msgs:  msgs,
-		iovs:  iovs,
-	}, err
+	return &StdConn{sysFd: fd, isV4: ip.Is4(), l: l, batch: batch}, err
 }

 func (u *StdConn) Rebind() error {
@@ -132,7 +119,9 @@ func (u *StdConn) LocalAddr() (netip.AddrPort, error) {
 }

 func (u *StdConn) ListenOut(r EncReader) {
-	msgs, packets := u.PrepareRawMessages(u.batch, u.isV4)
+	var ip netip.Addr
+
+	msgs, buffers, names := u.PrepareRawMessages(u.batch)
 	read := u.ReadMulti
 	if u.batch == 1 {
 		read = u.ReadSingle
@@ -146,12 +135,13 @@ func (u *StdConn) ListenOut(r EncReader) {
 		}

 		for i := 0; i < n; i++ {
-			packets[i].Payload = packets[i].Payload[:msgs[i].Len]
-			packets[i].Update(getRawMessageControlLen(&msgs[i]))
-		}
-		r(packets[:n])
-		for i := 0; i < n; i++ { //todo reset this in prev loop, but this makes debug ez
-			msgs[i].Hdr.Controllen = uint64(unix.CmsgSpace(2))
+			// Its ok to skip the ok check here, the slicing is the only error that can occur and it will panic
+			if u.isV4 {
+				ip, _ = netip.AddrFromSlice(names[i][4:8])
+			} else {
+				ip, _ = netip.AddrFromSlice(names[i][8:24])
+			}
+			r(netip.AddrPortFrom(ip.Unmap(), binary.BigEndian.Uint16(names[i][2:4])), buffers[i][:msgs[i].Len])
 		}
 	}
 }
@@ -204,147 +194,6 @@ func (u *StdConn) WriteTo(b []byte, ip netip.AddrPort) error {
 	return u.writeTo6(b, ip)
 }

-func (u *StdConn) WriteToBatch(b []byte, ip netip.AddrPort) error {
-	if u.isV4 {
-		return u.writeTo4(b, ip)
-	}
-	return u.writeTo6(b, ip)
-}
-
-func (u *StdConn) Prep(pkt *packet.Packet, addr netip.AddrPort) error {
-	nl, err := u.encodeSockaddr(pkt.Name, addr)
-	if err != nil {
-		return err
-	}
-	pkt.Name = pkt.Name[:nl]
-	pkt.OutLen = len(pkt.Payload)
-	return nil
-}
-
-func (u *StdConn) WriteBatch(pkts []*packet.Packet) (int, error) {
-	if len(pkts) == 0 {
-		return 0, nil
-	}
-
-	u.msgs = u.msgs[:0]
-	//u.iovs = u.iovs[:0]
-
-	sent := 0
-	var mostRecentPkt *packet.Packet
-	mostRecentPktSize := 0
-	//segmenting := false
-	idx := 0
-	for _, pkt := range pkts {
-		if len(pkt.Payload) == 0 || pkt.OutLen == -1 {
-			sent++
-			continue
-		}
-		lastIdx := idx - 1
-		if mostRecentPkt != nil && pkt.CompatibleForSegmentationWith(mostRecentPkt, mostRecentPktSize) && u.msgs[lastIdx].Hdr.Iovlen < iovMax {
-			u.msgs[lastIdx].Hdr.Controllen = uint64(len(mostRecentPkt.Control))
-			u.msgs[lastIdx].Hdr.Control = &mostRecentPkt.Control[0]
-
-			u.iovs[lastIdx][u.msgs[lastIdx].Hdr.Iovlen].Base = &pkt.Payload[0]
-			u.iovs[lastIdx][u.msgs[lastIdx].Hdr.Iovlen].Len = uint64(len(pkt.Payload))
-			u.msgs[lastIdx].Hdr.Iovlen++
-
-			mostRecentPktSize += len(pkt.Payload)
-			mostRecentPkt.SetSegSizeForTX()
-		} else {
-			u.msgs = append(u.msgs, rawMessage{})
-			u.iovs[idx][0] = iovec{
-				Base: &pkt.Payload[0],
-				Len:  uint64(len(pkt.Payload)),
-			}
-
-			msg := &u.msgs[idx]
-			iov := &u.iovs[idx][0]
-			idx++
-
-			msg.Hdr.Iov = iov
-			msg.Hdr.Iovlen = 1
-			setRawMessageControl(msg, nil)
-			msg.Hdr.Flags = 0
-
-			msg.Hdr.Name = &pkt.Name[0]
-			msg.Hdr.Namelen = uint32(len(pkt.Name))
-			mostRecentPkt = pkt
-			mostRecentPktSize = len(pkt.Payload)
-		}
-	}
-
-	if len(u.msgs) == 0 {
-		return sent, nil
-	}
-
-	offset := 0
-	for offset < len(u.msgs) {
-		n, _, errno := unix.Syscall6(
-			unix.SYS_SENDMMSG,
-			uintptr(u.sysFd),
-			uintptr(unsafe.Pointer(&u.msgs[offset])),
-			uintptr(len(u.msgs)-offset),
-			0,
-			0,
-			0,
-		)
-
-		if errno != 0 {
-			if errno == unix.EINTR {
-				continue
-			}
-			//for i := 0; i < len(u.msgs); i++ {
-			//	for j := 0; j < int(u.msgs[i].Hdr.Iovlen); j++ {
-			//		u.l.WithFields(logrus.Fields{
-			//			"msg_index": i,
-			//			"iov idx":   j,
-			//			"iov":       fmt.Sprintf("%+v", u.iovs[i][j]),
-			//		}).Warn("failed to send message")
-			//	}
-			//
-			//}
-			u.l.WithFields(logrus.Fields{
-				"errno":   errno,
-				"idx":     idx,
-				"len":     len(u.msgs),
-				"deets":   fmt.Sprintf("%+v", u.msgs),
-				"lastIOV": fmt.Sprintf("%+v", u.iovs[len(u.msgs)-1][u.msgs[len(u.msgs)-1].Hdr.Iovlen-1]),
-			}).Error("failed to send message")
-			return sent + offset, &net.OpError{Op: "sendmmsg", Err: errno}
-		}
-
-		if n == 0 {
-			break
-		}
-		offset += int(n)
-	}
-
-	return sent + len(u.msgs), nil
-}
-
-func (u *StdConn) encodeSockaddr(dst []byte, addr netip.AddrPort) (uint32, error) {
-	if u.isV4 {
-		if !addr.Addr().Is4() {
-			return 0, fmt.Errorf("Listener is IPv4, but writing to IPv6 remote")
-		}
-		var sa unix.RawSockaddrInet4
-		sa.Family = unix.AF_INET
-		sa.Addr = addr.Addr().As4()
-		binary.BigEndian.PutUint16((*[2]byte)(unsafe.Pointer(&sa.Port))[:], addr.Port())
-		size := unix.SizeofSockaddrInet4
-		copy(dst[:size], (*(*[unix.SizeofSockaddrInet4]byte)(unsafe.Pointer(&sa)))[:])
-		return uint32(size), nil
-	}
-
-	var sa unix.RawSockaddrInet6
-	sa.Family = unix.AF_INET6
-	sa.Addr = addr.Addr().As16()
-	binary.BigEndian.PutUint16((*[2]byte)(unsafe.Pointer(&sa.Port))[:], addr.Port())
-	size := unix.SizeofSockaddrInet6
-	copy(dst[:size], (*(*[unix.SizeofSockaddrInet6]byte)(unsafe.Pointer(&sa)))[:])
-	return uint32(size), nil
-}
-
 func (u *StdConn) writeTo6(b []byte, ip netip.AddrPort) error {
 	var rsa unix.RawSockaddrInet6
 	rsa.Family = unix.AF_INET6
@@ -445,27 +294,6 @@ func (u *StdConn) ReloadConfig(c *config.C) {
 			u.l.WithError(err).Error("Failed to set listen.so_mark")
 		}
 	}
-	u.configureGRO(true)
-}
-
-func (u *StdConn) configureGRO(enable bool) {
-	if enable == u.enableGRO {
-		return
-	}
-
-	if enable {
-		if err := unix.SetsockoptInt(u.sysFd, unix.SOL_UDP, unix.UDP_GRO, 1); err != nil {
-			u.l.WithError(err).Warn("Failed to enable UDP GRO")
-			return
-		}
-		u.enableGRO = true
-		u.l.Info("UDP GRO enabled")
-	} else {
-		if err := unix.SetsockoptInt(u.sysFd, unix.SOL_UDP, unix.UDP_GRO, 0); err != nil && err != unix.ENOPROTOOPT {
-			u.l.WithError(err).Warn("Failed to disable UDP GRO")
-		}
-		u.enableGRO = false
-	}
 }

 func (u *StdConn) getMemInfo(meminfo *[unix.SK_MEMINFO_VARS]uint32) error {
@@ -482,31 +310,51 @@ func (u *StdConn) Close() error {
 }

 func NewUDPStatsEmitter(udpConns []Conn) func() {
-	// Check if our kernel supports SO_MEMINFO before registering the gauges
-	var udpGauges [][unix.SK_MEMINFO_VARS]metrics.Gauge
+	if len(udpConns) == 0 {
+		return func() {}
+	}
+
+	type statsProvider struct {
+		index int
+		conn  *StdConn
+	}
+
+	providers := make([]statsProvider, 0, len(udpConns))
+	for i, c := range udpConns {
+		if sc, ok := c.(*StdConn); ok {
+			providers = append(providers, statsProvider{index: i, conn: sc})
+		}
+	}
+
+	if len(providers) == 0 {
+		return func() {}
+	}
+
 	var meminfo [unix.SK_MEMINFO_VARS]uint32
-	if err := udpConns[0].(*StdConn).getMemInfo(&meminfo); err == nil {
-		udpGauges = make([][unix.SK_MEMINFO_VARS]metrics.Gauge, len(udpConns))
-		for i := range udpConns {
-			udpGauges[i] = [unix.SK_MEMINFO_VARS]metrics.Gauge{
-				metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.rmem_alloc", i), nil),
-				metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.rcvbuf", i), nil),
-				metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.wmem_alloc", i), nil),
-				metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.sndbuf", i), nil),
-				metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.fwd_alloc", i), nil),
-				metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.wmem_queued", i), nil),
-				metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.optmem", i), nil),
-				metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.backlog", i), nil),
-				metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.drops", i), nil),
-			}
+	if err := providers[0].conn.getMemInfo(&meminfo); err != nil {
+		return func() {}
+	}
+
+	udpGauges := make([][unix.SK_MEMINFO_VARS]metrics.Gauge, len(providers))
+	for i, provider := range providers {
+		udpGauges[i] = [unix.SK_MEMINFO_VARS]metrics.Gauge{
+			metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.rmem_alloc", provider.index), nil),
+			metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.rcvbuf", provider.index), nil),
+			metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.wmem_alloc", provider.index), nil),
+			metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.sndbuf", provider.index), nil),
+			metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.fwd_alloc", provider.index), nil),
+			metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.wmem_queued", provider.index), nil),
+			metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.optmem", provider.index), nil),
+			metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.backlog", provider.index), nil),
+			metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.drops", provider.index), nil),
 		}
 	}

 	return func() {
-		for i, gauges := range udpGauges {
-			if err := udpConns[i].(*StdConn).getMemInfo(&meminfo); err == nil {
+		for i, provider := range providers {
+			if err := provider.conn.getMemInfo(&meminfo); err == nil {
 				for j := 0; j < unix.SK_MEMINFO_VARS; j++ {
-					gauges[j].Update(int64(meminfo[j]))
+					udpGauges[i][j].Update(int64(meminfo[j]))
 				}
 			}
 		}
--- a/udp/udp_linux_64.go
+++ b/udp/udp_linux_64.go
@@ -7,7 +7,6 @@
 package udp

 import (
-	"github.com/slackhq/nebula/packet"
 	"golang.org/x/sys/unix"
 )

@@ -34,59 +33,25 @@ type rawMessage struct {
 	Pad0 [4]byte
 }

-func setRawMessageControl(msg *rawMessage, buf []byte) {
-	if len(buf) == 0 {
-		msg.Hdr.Control = nil
-		msg.Hdr.Controllen = 0
-		return
-	}
-	msg.Hdr.Control = &buf[0]
-	msg.Hdr.Controllen = uint64(len(buf))
-}
-
-func getRawMessageControlLen(msg *rawMessage) int {
-	return int(msg.Hdr.Controllen)
-}
-
-func setCmsgLen(h *unix.Cmsghdr, l int) {
-	h.Len = uint64(l)
-}
-
-func (u *StdConn) PrepareRawMessages(n int, isV4 bool) ([]rawMessage, []*packet.Packet) {
+func (u *StdConn) PrepareRawMessages(n int) ([]rawMessage, [][]byte, [][]byte) {
 	msgs := make([]rawMessage, n)
-	packets := make([]*packet.Packet, n)
+	buffers := make([][]byte, n)
+	names := make([][]byte, n)

 	for i := range msgs {
-		packets[i] = packet.New(isV4)
+		buffers[i] = make([]byte, MTU)
+		names[i] = make([]byte, unix.SizeofSockaddrInet6)

 		vs := []iovec{
-			{Base: &packets[i].Payload[0], Len: uint64(packet.Size)},
+			{Base: &buffers[i][0], Len: uint64(len(buffers[i]))},
 		}

 		msgs[i].Hdr.Iov = &vs[0]
 		msgs[i].Hdr.Iovlen = uint64(len(vs))

-		msgs[i].Hdr.Name = &packets[i].Name[0]
-		msgs[i].Hdr.Namelen = uint32(len(packets[i].Name))
-
-		if u.enableGRO {
-			msgs[i].Hdr.Control = &packets[i].Control[0]
-			msgs[i].Hdr.Controllen = uint64(len(packets[i].Control))
-		} else {
-			msgs[i].Hdr.Control = nil
-			msgs[i].Hdr.Controllen = 0
-		}
+		msgs[i].Hdr.Name = &names[i][0]
+		msgs[i].Hdr.Namelen = uint32(len(names[i]))
 	}

-	return msgs, packets
-}
-
-func setIovecSlice(iov *iovec, b []byte) {
-	if len(b) == 0 {
-		iov.Base = nil
-		iov.Len = 0
-		return
-	}
-	iov.Base = &b[0]
-	iov.Len = uint64(len(b))
+	return msgs, buffers, names
 }
--- a/udp/wireguard_conn_linux.go
+++ b/udp/wireguard_conn_linux.go
@@ -0,0 +1,226 @@
+//go:build linux && !android && !e2e_testing
+
+package udp
+
+import (
+	"errors"
+	"net"
+	"net/netip"
+	"sync"
+	"sync/atomic"
+
+	"github.com/sirupsen/logrus"
+	"github.com/slackhq/nebula/config"
+	wgconn "github.com/slackhq/nebula/wgstack/conn"
+)
+
+// WGConn adapts WireGuard's batched UDP bind implementation to Nebula's udp.Conn interface.
+type WGConn struct {
+	l         *logrus.Logger
+	bind      *wgconn.StdNetBind
+	recvers   []wgconn.ReceiveFunc
+	batch     int
+	reqBatch  int
+	localIP   netip.Addr
+	localPort uint16
+	enableGSO bool
+	enableGRO bool
+	gsoMaxSeg int
+	closed    atomic.Bool
+	q         int
+	closeOnce sync.Once
+}
+
+// NewWireguardListener creates a UDP listener backed by WireGuard's StdNetBind.
+func NewWireguardListener(l *logrus.Logger, ip netip.Addr, port int, multi bool, batch int, q int) (Conn, error) {
+	bind := wgconn.NewStdNetBindForAddr(ip, multi, q)
+	recvers, actualPort, err := bind.Open(uint16(port))
+	if err != nil {
+		return nil, err
+	}
+	if batch <= 0 {
+		batch = bind.BatchSize()
+	} else if batch > bind.BatchSize() {
+		batch = bind.BatchSize()
+	}
+	return &WGConn{
+		l:         l,
+		bind:      bind,
+		recvers:   recvers,
+		batch:     batch,
+		reqBatch:  batch,
+		localIP:   ip,
+		localPort: actualPort,
+		q:         q,
+	}, nil
+}
+
+func (c *WGConn) Rebind() error {
+	// WireGuard's bind does not support rebinding in place.
+	return nil
+}
+
+func (c *WGConn) LocalAddr() (netip.AddrPort, error) {
+	if !c.localIP.IsValid() || c.localIP.IsUnspecified() {
+		// Fallback to wildcard IPv4 for display purposes.
+		return netip.AddrPortFrom(netip.IPv4Unspecified(), c.localPort), nil
+	}
+	return netip.AddrPortFrom(c.localIP, c.localPort), nil
+}
+
+func (c *WGConn) listen(fn wgconn.ReceiveFunc, r EncReader) {
+	batchSize := c.batch
+	packets := make([][]byte, batchSize)
+	for i := range packets {
+		packets[i] = make([]byte, 0xffff)
+	}
+	sizes := make([]int, batchSize)
+	endpoints := make([]wgconn.Endpoint, batchSize)
+
+	for {
+		if c.closed.Load() {
+			return
+		}
+		n, err := fn(packets, sizes, endpoints)
+		if err != nil {
+			if errors.Is(err, net.ErrClosed) {
+				return
+			}
+			if c.l != nil {
+				c.l.WithError(err).Debug("wireguard UDP listener receive error")
+			}
+			continue
+		}
+		for i := 0; i < n; i++ {
+			if sizes[i] == 0 {
+				continue
+			}
+			stdEp, ok := endpoints[i].(*wgconn.StdNetEndpoint)
+			if !ok {
+				if c.l != nil {
+					c.l.Warn("wireguard UDP listener received unexpected endpoint type")
+				}
+				continue
+			}
+			addr := stdEp.AddrPort
+			r(addr, packets[i][:sizes[i]])
+			endpoints[i] = nil
+		}
+	}
+}
+
+func (c *WGConn) ListenOut(r EncReader) {
+	for _, fn := range c.recvers {
+		go c.listen(fn, r)
+	}
+}
+
+func (c *WGConn) WriteTo(b []byte, addr netip.AddrPort) error {
+	if len(b) == 0 {
+		return nil
+	}
+	if c.closed.Load() {
+		return net.ErrClosed
+	}
+	ep := &wgconn.StdNetEndpoint{AddrPort: addr}
+	return c.bind.Send([][]byte{b}, ep)
+}
+
+func (c *WGConn) WriteBatch(datagrams []Datagram) error {
+	if len(datagrams) == 0 {
+		return nil
+	}
+	if c.closed.Load() {
+		return net.ErrClosed
+	}
+	max := c.batch
+	if max <= 0 {
+		max = len(datagrams)
+		if max == 0 {
+			max = 1
+		}
+	}
+	bufs := make([][]byte, 0, max)
+	var (
+		current  netip.AddrPort
+		endpoint *wgconn.StdNetEndpoint
+		haveAddr bool
+	)
+	flush := func() error {
+		if len(bufs) == 0 || endpoint == nil {
+			bufs = bufs[:0]
+			return nil
+		}
+		err := c.bind.Send(bufs, endpoint)
+		bufs = bufs[:0]
+		return err
+	}
+
+	for _, d := range datagrams {
+		if len(d.Payload) == 0 || !d.Addr.IsValid() {
+			continue
+		}
+		if !haveAddr || d.Addr != current {
+			if err := flush(); err != nil {
+				return err
+			}
+			current = d.Addr
+			endpoint = &wgconn.StdNetEndpoint{AddrPort: current}
+			haveAddr = true
+		}
+		bufs = append(bufs, d.Payload)
+		if len(bufs) >= max {
+			if err := flush(); err != nil {
+				return err
+			}
+		}
+	}
+	return flush()
+}
+
+func (c *WGConn) ConfigureOffload(enableGSO, enableGRO bool, maxSegments int) {
+	c.enableGSO = enableGSO
+	c.enableGRO = enableGRO
+	if maxSegments <= 0 {
+		maxSegments = 1
+	} else if maxSegments > wgconn.IdealBatchSize {
+		maxSegments = wgconn.IdealBatchSize
+	}
+	c.gsoMaxSeg = maxSegments
+
+	effectiveBatch := c.reqBatch
+	if enableGSO && c.bind != nil {
+		bindBatch := c.bind.BatchSize()
+		if effectiveBatch < bindBatch {
+			if c.l != nil {
+				c.l.WithFields(logrus.Fields{
+					"requested": c.reqBatch,
+					"effective": bindBatch,
+				}).Warn("listen.batch below wireguard minimum; using bind batch size for UDP GSO support")
+			}
+			effectiveBatch = bindBatch
+		}
+	}
+	c.batch = effectiveBatch
+
+	if c.l != nil {
+		c.l.WithFields(logrus.Fields{
+			"enableGSO":      enableGSO,
+			"enableGRO":      enableGRO,
+			"gsoMaxSegments": maxSegments,
+		}).Debug("configured wireguard UDP offload")
+	}
+}
+
+func (c *WGConn) ReloadConfig(*config.C) {
+	// WireGuard bind currently does not expose runtime configuration knobs.
+}
+
+func (c *WGConn) Close() error {
+	var err error
+	c.closeOnce.Do(func() {
+		c.closed.Store(true)
+		err = c.bind.Close()
+	})
+	return err
+}
--- a/udp/wireguard_conn_unsupported.go
+++ b/udp/wireguard_conn_unsupported.go
@@ -0,0 +1,15 @@
+//go:build !linux || android || e2e_testing
+
+package udp
+
+import (
+	"fmt"
+	"net/netip"
+
+	"github.com/sirupsen/logrus"
+)
+
+// NewWireguardListener is only available on Linux builds.
+func NewWireguardListener(*logrus.Logger, netip.Addr, int, bool, int) (Conn, error) {
+	return nil, fmt.Errorf("wireguard experimental UDP listener is only supported on Linux")
+}
--- a/util/virtio/doc.go
+++ b/util/virtio/doc.go
@@ -1,3 +0,0 @@
-// Package virtio contains some generic types and concepts related to the virtio
-// protocol.
-package virtio
--- a/util/virtio/features.go
+++ b/util/virtio/features.go
@@ -1,136 +0,0 @@
-package virtio
-
-// Feature contains feature bits that describe a virtio device or driver.
-type Feature uint64
-
-// Device-independent feature bits.
-//
-// Source: https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html#x1-6600006
-const (
-	// FeatureIndirectDescriptors indicates that the driver can use descriptors
-	// with an additional layer of indirection.
-	FeatureIndirectDescriptors Feature = 1 << 28
-
-	// FeatureVersion1 indicates compliance with version 1.0 of the virtio
-	// specification.
-	FeatureVersion1 Feature = 1 << 32
-)
-
-// Feature bits for networking devices.
-//
-// Source: https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html#x1-2200003
-const (
-	// FeatureNetDeviceCsum indicates that the device can handle packets with
-	// partial checksum (checksum offload).
-	FeatureNetDeviceCsum Feature = 1 << 0
-
-	// FeatureNetDriverCsum indicates that the driver can handle packets with
-	// partial checksum.
-	FeatureNetDriverCsum Feature = 1 << 1
-
-	// FeatureNetCtrlDriverOffloads indicates support for dynamic offload state
-	// reconfiguration.
-	FeatureNetCtrlDriverOffloads Feature = 1 << 2
-
-	// FeatureNetMTU indicates that the device reports a maximum MTU value.
-	FeatureNetMTU Feature = 1 << 3
-
-	// FeatureNetMAC indicates that the device provides a MAC address.
-	FeatureNetMAC Feature = 1 << 5
-
-	// FeatureNetDriverTSO4 indicates that the driver supports the TCP
-	// segmentation offload for received IPv4 packets.
-	FeatureNetDriverTSO4 Feature = 1 << 7
-
-	// FeatureNetDriverTSO6 indicates that the driver supports the TCP
-	// segmentation offload for received IPv6 packets.
-	FeatureNetDriverTSO6 Feature = 1 << 8
-
-	// FeatureNetDriverECN indicates that the driver supports the TCP
-	// segmentation offload with ECN for received packets.
-	FeatureNetDriverECN Feature = 1 << 9
-
-	// FeatureNetDriverUFO indicates that the driver supports the UDP
-	// fragmentation offload for received packets.
-	FeatureNetDriverUFO Feature = 1 << 10
-
-	// FeatureNetDeviceTSO4 indicates that the device supports the TCP
-	// segmentation offload for received IPv4 packets.
-	FeatureNetDeviceTSO4 Feature = 1 << 11
-
-	// FeatureNetDeviceTSO6 indicates that the device supports the TCP
-	// segmentation offload for received IPv6 packets.
-	FeatureNetDeviceTSO6 Feature = 1 << 12
-
-	// FeatureNetDeviceECN indicates that the device supports the TCP
-	// segmentation offload with ECN for received packets.
-	FeatureNetDeviceECN Feature = 1 << 13
-
-	// FeatureNetDeviceUFO indicates that the device supports the UDP
-	// fragmentation offload for received packets.
-	FeatureNetDeviceUFO Feature = 1 << 14
-
-	// FeatureNetMergeRXBuffers indicates that the driver can handle merged
-	// receive buffers.
-	// When this feature is negotiated, devices may merge multiple descriptor
-	// chains together to transport large received packets. [NetHdr.NumBuffers]
-	// will then contain the number of merged descriptor chains.
-	FeatureNetMergeRXBuffers Feature = 1 << 15
-
-	// FeatureNetStatus indicates that the device configuration status field is
-	// available.
-	FeatureNetStatus Feature = 1 << 16
-
-	// FeatureNetCtrlVQ indicates that a control channel virtqueue is
-	// available.
-	FeatureNetCtrlVQ Feature = 1 << 17
-
-	// FeatureNetCtrlRX indicates support for RX mode control (e.g. promiscuous
-	// or all-multicast) for packet receive filtering.
-	FeatureNetCtrlRX Feature = 1 << 18
-
-	// FeatureNetCtrlVLAN indicates support for VLAN filtering through the
-	// control channel.
-	FeatureNetCtrlVLAN Feature = 1 << 19
-
-	// FeatureNetDriverAnnounce indicates that the driver can send gratuitous
-	// packets.
-	FeatureNetDriverAnnounce Feature = 1 << 21
-
-	// FeatureNetMQ indicates that the device supports multiqueue with automatic
-	// receive steering.
-	FeatureNetMQ Feature = 1 << 22
-
-	// FeatureNetCtrlMACAddr indicates that the MAC address can be set through
-	// the control channel.
-	FeatureNetCtrlMACAddr Feature = 1 << 23
-
-	// FeatureNetDeviceUSO indicates that the device supports the UDP
-	// segmentation offload for received packets.
-	FeatureNetDeviceUSO Feature = 1 << 56
-
-	// FeatureNetHashReport indicates that the device can report a per-packet
-	// hash value and type.
-	FeatureNetHashReport Feature = 1 << 57
-
-	// FeatureNetDriverHdrLen indicates that the driver can provide the exact
-	// header length value (see [NetHdr.HdrLen]).
-	// Devices may benefit from knowing the exact header length.
-	FeatureNetDriverHdrLen Feature = 1 << 59
-
-	// FeatureNetRSS indicates that the device supports RSS (receive-side
-	// scaling) with configurable hash parameters.
-	FeatureNetRSS Feature = 1 << 60
-
-	// FeatureNetRSCExt indicates that the device can process duplicated ACKs
-	// and report the number of coalesced segments and duplicated ACKs.
-	FeatureNetRSCExt Feature = 1 << 61
-
-	// FeatureNetStandby indicates that the device may act as a standby for a
-	// primary device with the same MAC address.
-	FeatureNetStandby Feature = 1 << 62
-
-	// FeatureNetSpeedDuplex indicates that the device can report link speed and
-	// duplex mode.
-	FeatureNetSpeedDuplex Feature = 1 << 63
-)
--- a/util/virtio/net_hdr.go
+++ b/util/virtio/net_hdr.go
@@ -1,77 +0,0 @@
-package virtio
-
-import (
-	"errors"
-	"unsafe"
-
-	"golang.org/x/sys/unix"
-)
-
-// Workaround to make Go doc links work.
-var _ unix.Errno
-
-// NetHdrSize is the number of bytes needed to store a [NetHdr] in memory.
-const NetHdrSize = 12
-
-// ErrNetHdrBufferTooSmall is returned when a buffer is too small to fit a
-// virtio_net_hdr.
-var ErrNetHdrBufferTooSmall = errors.New("the buffer is too small to fit a virtio_net_hdr")
-
-// NetHdr defines the virtio_net_hdr as described by the virtio specification.
-type NetHdr struct {
-	// Flags that describe the packet.
-	// Possible values are:
-	//   - [unix.VIRTIO_NET_HDR_F_NEEDS_CSUM]
-	//   - [unix.VIRTIO_NET_HDR_F_DATA_VALID]
-	//   - [unix.VIRTIO_NET_HDR_F_RSC_INFO]
-	Flags uint8
-	// GSOType contains the type of segmentation offload that should be used for
-	// the packet.
-	// Possible values are:
-	//   - [unix.VIRTIO_NET_HDR_GSO_NONE]
-	//   - [unix.VIRTIO_NET_HDR_GSO_TCPV4]
-	//   - [unix.VIRTIO_NET_HDR_GSO_UDP]
-	//   - [unix.VIRTIO_NET_HDR_GSO_TCPV6]
-	//   - [unix.VIRTIO_NET_HDR_GSO_UDP_L4]
-	//   - [unix.VIRTIO_NET_HDR_GSO_ECN]
-	GSOType uint8
-	// HdrLen contains the length of the headers that need to be replicated by
-	// segmentation offloads. It's the number of bytes from the beginning of the
-	// packet to the beginning of the transport payload.
-	// Only used when [FeatureNetDriverHdrLen] is negotiated.
-	HdrLen uint16
-	// GSOSize contains the maximum size of each segmented packet beyond the
-	// header (payload size). In case of TCP, this is the MSS.
-	GSOSize uint16
-	// CsumStart contains the offset within the packet from which on the
-	// checksum should be computed.
-	CsumStart uint16
-	// CsumOffset specifies how many bytes after [NetHdr.CsumStart] the computed
-	// 16-bit checksum should be inserted.
-	CsumOffset uint16
-	// NumBuffers contains the number of merged descriptor chains when
-	// [FeatureNetMergeRXBuffers] is negotiated.
-	// This field is only used for packets received by the driver and should be
-	// zero for transmitted packets.
-	NumBuffers uint16
-}
-
-// Decode decodes the [NetHdr] from the given byte slice. The slice must contain
-// at least [NetHdrSize] bytes.
-func (v *NetHdr) Decode(data []byte) error {
-	if len(data) < NetHdrSize {
-		return ErrNetHdrBufferTooSmall
-	}
-	copy(unsafe.Slice((*byte)(unsafe.Pointer(v)), NetHdrSize), data[:NetHdrSize])
-	return nil
-}
-
-// Encode encodes the [NetHdr] into the given byte slice. The slice must have
-// room for at least [NetHdrSize] bytes.
-func (v *NetHdr) Encode(data []byte) error {
-	if len(data) < NetHdrSize {
-		return ErrNetHdrBufferTooSmall
-	}
-	copy(data[:NetHdrSize], unsafe.Slice((*byte)(unsafe.Pointer(v)), NetHdrSize))
-	return nil
-}
--- a/util/virtio/net_hdr_test.go
+++ b/util/virtio/net_hdr_test.go
@@ -1,43 +0,0 @@
-package virtio
-
-import (
-	"testing"
-	"unsafe"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-	"golang.org/x/sys/unix"
-)
-
-func TestNetHdr_Size(t *testing.T) {
-	assert.EqualValues(t, NetHdrSize, unsafe.Sizeof(NetHdr{}))
-}
-
-func TestNetHdr_Encoding(t *testing.T) {
-	vnethdr := NetHdr{
-		Flags:      unix.VIRTIO_NET_HDR_F_NEEDS_CSUM,
-		GSOType:    unix.VIRTIO_NET_HDR_GSO_UDP_L4,
-		HdrLen:     42,
-		GSOSize:    1472,
-		CsumStart:  34,
-		CsumOffset: 6,
-		NumBuffers: 16,
-	}
-
-	buf := make([]byte, NetHdrSize)
-	require.NoError(t, vnethdr.Encode(buf))
-
-	assert.Equal(t, []byte{
-		0x01, 0x05,
-		0x2a, 0x00,
-		0xc0, 0x05,
-		0x22, 0x00,
-		0x06, 0x00,
-		0x10, 0x00,
-	}, buf)
-
-	var decoded NetHdr
-	require.NoError(t, decoded.Decode(buf))
-
-	assert.Equal(t, vnethdr, decoded)
-}
--- a/wgstack/conn/bind_std.go
+++ b/wgstack/conn/bind_std.go
@@ -0,0 +1,587 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+ */
+
+package conn
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net"
+	"net/netip"
+	"runtime"
+	"strconv"
+	"sync"
+	"syscall"
+
+	"golang.org/x/net/ipv4"
+	"golang.org/x/net/ipv6"
+)
+
+var (
+	_ Bind = (*StdNetBind)(nil)
+)
+
+// StdNetBind implements Bind for all platforms. While Windows has its own Bind
+// (see bind_windows.go), it may fall back to StdNetBind.
+// TODO: Remove usage of ipv{4,6}.PacketConn when net.UDPConn has comparable
+// methods for sending and receiving multiple datagrams per-syscall. See the
+// proposal in https://github.com/golang/go/issues/45886#issuecomment-1218301564.
+type StdNetBind struct {
+	mu            sync.Mutex // protects all fields except as specified
+	ipv4          *net.UDPConn
+	ipv6          *net.UDPConn
+	ipv4PC        *ipv4.PacketConn // will be nil on non-Linux
+	ipv6PC        *ipv6.PacketConn // will be nil on non-Linux
+	ipv4TxOffload bool
+	ipv4RxOffload bool
+	ipv6TxOffload bool
+	ipv6RxOffload bool
+
+	// these two fields are not guarded by mu
+	udpAddrPool sync.Pool
+	msgsPool    sync.Pool
+
+	blackhole4 bool
+	blackhole6 bool
+	q          int
+}
+
+// NewStdNetBind creates a bind that listens on all interfaces.
+func NewStdNetBind() *StdNetBind {
+	return newStdNetBind().(*StdNetBind)
+}
+
+// NewStdNetBindForAddr creates a bind that listens on a specific address.
+// If addr is IPv4, only the IPv4 socket will be created. For IPv6, only the
+// IPv6 socket will be created.
+func NewStdNetBindForAddr(addr netip.Addr, reusePort bool, q int) *StdNetBind {
+	b := NewStdNetBind()
+	b.q = q
+	//if addr.IsValid() {
+	//	if addr.IsUnspecified() {
+	//		// keep dual-stack defaults with empty listen addresses
+	//	} else if addr.Is4() {
+	//		b.listenAddr4 = addr.Unmap().String()
+	//		b.bindV4 = true
+	//		b.bindV6 = false
+	//	} else {
+	//		b.listenAddr6 = addr.Unmap().String()
+	//		b.bindV6 = true
+	//		b.bindV4 = false
+	//	}
+	//}
+	//b.reusePort = reusePort
+
+	return b
+}
+
+func newStdNetBind() Bind {
+	return &StdNetBind{
+		udpAddrPool: sync.Pool{
+			New: func() any {
+				return &net.UDPAddr{
+					IP: make([]byte, 16),
+				}
+			},
+		},
+
+		msgsPool: sync.Pool{
+			New: func() any {
+				// ipv6.Message and ipv4.Message are interchangeable as they are
+				// both aliases for x/net/internal/socket.Message.
+				msgs := make([]ipv6.Message, IdealBatchSize)
+				for i := range msgs {
+					msgs[i].Buffers = make(net.Buffers, 1)
+					msgs[i].OOB = make([]byte, 0, stickyControlSize+gsoControlSize)
+				}
+				return &msgs
+			},
+		},
+	}
+}
+
+type StdNetEndpoint struct {
+	// AddrPort is the endpoint destination.
+	netip.AddrPort
+	// src is the current sticky source address and interface index, if
+	// supported. Typically this is a PKTINFO structure from/for control
+	// messages, see unix.PKTINFO for an example.
+	src []byte
+}
+
+var (
+	_ Bind     = (*StdNetBind)(nil)
+	_ Endpoint = &StdNetEndpoint{}
+)
+
+func (*StdNetBind) ParseEndpoint(s string) (Endpoint, error) {
+	e, err := netip.ParseAddrPort(s)
+	if err != nil {
+		return nil, err
+	}
+	return &StdNetEndpoint{
+		AddrPort: e,
+	}, nil
+}
+
+func (e *StdNetEndpoint) ClearSrc() {
+	if e.src != nil {
+		// Truncate src, no need to reallocate.
+		e.src = e.src[:0]
+	}
+}
+
+func (e *StdNetEndpoint) DstIP() netip.Addr {
+	return e.AddrPort.Addr()
+}
+
+// See control_default,linux, etc for implementations of SrcIP and SrcIfidx.
+
+func (e *StdNetEndpoint) DstToBytes() []byte {
+	b, _ := e.AddrPort.MarshalBinary()
+	return b
+}
+
+func (e *StdNetEndpoint) DstToString() string {
+	return e.AddrPort.String()
+}
+
+func listenNet(network string, port int, q int) (*net.UDPConn, int, error) {
+	lc := listenConfig(q)
+
+	conn, err := lc.ListenPacket(context.Background(), network, ":"+strconv.Itoa(port))
+	if err != nil {
+		return nil, 0, err
+	}
+
+	if q == 0 {
+		if EvilFdZero == 0 {
+			panic("fuck")
+		}
+		err = reusePortHax(EvilFdZero)
+		if err != nil {
+			return nil, 0, fmt.Errorf("reuse port hax: %v", err)
+		}
+	}
+
+	// Retrieve port.
+	laddr := conn.LocalAddr()
+	uaddr, err := net.ResolveUDPAddr(
+		laddr.Network(),
+		laddr.String(),
+	)
+	if err != nil {
+		return nil, 0, err
+	}
+
+	return conn.(*net.UDPConn), uaddr.Port, nil
+}
+
+func (s *StdNetBind) Open(uport uint16) ([]ReceiveFunc, uint16, error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	var err error
+	var tries int
+
+	if s.ipv4 != nil || s.ipv6 != nil {
+		return nil, 0, ErrBindAlreadyOpen
+	}
+
+	// Attempt to open ipv4 and ipv6 listeners on the same port.
+	// If uport is 0, we can retry on failure.
+again:
+	port := int(uport)
+	var v4conn, v6conn *net.UDPConn
+	var v4pc *ipv4.PacketConn
+	var v6pc *ipv6.PacketConn
+
+	v4conn, port, err = listenNet("udp4", port, s.q)
+	if err != nil && !errors.Is(err, syscall.EAFNOSUPPORT) {
+		return nil, 0, err
+	}
+
+	// Listen on the same port as we're using for ipv4.
+	v6conn, port, err = listenNet("udp6", port, s.q)
+	if uport == 0 && errors.Is(err, syscall.EADDRINUSE) && tries < 100 {
+		v4conn.Close()
+		tries++
+		goto again
+	}
+	if err != nil && !errors.Is(err, syscall.EAFNOSUPPORT) {
+		v4conn.Close()
+		return nil, 0, err
+	}
+	var fns []ReceiveFunc
+	if v4conn != nil {
+		s.ipv4TxOffload, s.ipv4RxOffload = supportsUDPOffload(v4conn)
+		if runtime.GOOS == "linux" || runtime.GOOS == "android" {
+			v4pc = ipv4.NewPacketConn(v4conn)
+			s.ipv4PC = v4pc
+		}
+		fns = append(fns, s.makeReceiveIPv4(v4pc, v4conn, s.ipv4RxOffload))
+		s.ipv4 = v4conn
+	}
+	if v6conn != nil {
+		s.ipv6TxOffload, s.ipv6RxOffload = supportsUDPOffload(v6conn)
+		if runtime.GOOS == "linux" || runtime.GOOS == "android" {
+			v6pc = ipv6.NewPacketConn(v6conn)
+			s.ipv6PC = v6pc
+		}
+		fns = append(fns, s.makeReceiveIPv6(v6pc, v6conn, s.ipv6RxOffload))
+		s.ipv6 = v6conn
+	}
+	if len(fns) == 0 {
+		return nil, 0, syscall.EAFNOSUPPORT
+	}
+
+	return fns, uint16(port), nil
+}
+
+func (s *StdNetBind) putMessages(msgs *[]ipv6.Message) {
+	for i := range *msgs {
+		(*msgs)[i].OOB = (*msgs)[i].OOB[:0]
+		(*msgs)[i] = ipv6.Message{Buffers: (*msgs)[i].Buffers, OOB: (*msgs)[i].OOB}
+	}
+	s.msgsPool.Put(msgs)
+}
+
+func (s *StdNetBind) getMessages() *[]ipv6.Message {
+	return s.msgsPool.Get().(*[]ipv6.Message)
+}
+
+var (
+	// If compilation fails here these are no longer the same underlying type.
+	_ ipv6.Message = ipv4.Message{}
+)
+
+type batchReader interface {
+	ReadBatch([]ipv6.Message, int) (int, error)
+}
+
+type batchWriter interface {
+	WriteBatch([]ipv6.Message, int) (int, error)
+}
+
+func (s *StdNetBind) receiveIP(
+	br batchReader,
+	conn *net.UDPConn,
+	rxOffload bool,
+	bufs [][]byte,
+	sizes []int,
+	eps []Endpoint,
+) (n int, err error) {
+	msgs := s.getMessages()
+	for i := range bufs {
+		(*msgs)[i].Buffers[0] = bufs[i]
+		(*msgs)[i].OOB = (*msgs)[i].OOB[:cap((*msgs)[i].OOB)]
+	}
+	defer s.putMessages(msgs)
+	var numMsgs int
+	if runtime.GOOS == "linux" || runtime.GOOS == "android" {
+		if rxOffload {
+			readAt := len(*msgs) - (IdealBatchSize / udpSegmentMaxDatagrams)
+			numMsgs, err = br.ReadBatch((*msgs)[readAt:], 0)
+			if err != nil {
+				return 0, err
+			}
+			numMsgs, err = splitCoalescedMessages(*msgs, readAt, getGSOSize)
+			if err != nil {
+				return 0, err
+			}
+		} else {
+			numMsgs, err = br.ReadBatch(*msgs, 0)
+			if err != nil {
+				return 0, err
+			}
+		}
+	} else {
+		msg := &(*msgs)[0]
+		msg.N, msg.NN, _, msg.Addr, err = conn.ReadMsgUDP(msg.Buffers[0], msg.OOB)
+		if err != nil {
+			return 0, err
+		}
+		numMsgs = 1
+	}
+	for i := 0; i < numMsgs; i++ {
+		msg := &(*msgs)[i]
+		sizes[i] = msg.N
+		if sizes[i] == 0 {
+			continue
+		}
+		addrPort := msg.Addr.(*net.UDPAddr).AddrPort()
+		ep := &StdNetEndpoint{AddrPort: addrPort} // TODO: remove allocation
+		getSrcFromControl(msg.OOB[:msg.NN], ep)
+		eps[i] = ep
+	}
+	return numMsgs, nil
+}
+
+func (s *StdNetBind) makeReceiveIPv4(pc *ipv4.PacketConn, conn *net.UDPConn, rxOffload bool) ReceiveFunc {
+	return func(bufs [][]byte, sizes []int, eps []Endpoint) (n int, err error) {
+		return s.receiveIP(pc, conn, rxOffload, bufs, sizes, eps)
+	}
+}
+
+func (s *StdNetBind) makeReceiveIPv6(pc *ipv6.PacketConn, conn *net.UDPConn, rxOffload bool) ReceiveFunc {
+	return func(bufs [][]byte, sizes []int, eps []Endpoint) (n int, err error) {
+		return s.receiveIP(pc, conn, rxOffload, bufs, sizes, eps)
+	}
+}
+
+// TODO: When all Binds handle IdealBatchSize, remove this dynamic function and
+// rename the IdealBatchSize constant to BatchSize.
+func (s *StdNetBind) BatchSize() int {
+	if runtime.GOOS == "linux" || runtime.GOOS == "android" {
+		return IdealBatchSize
+	}
+	return 1
+}
+
+func (s *StdNetBind) Close() error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	var err1, err2 error
+	if s.ipv4 != nil {
+		err1 = s.ipv4.Close()
+		s.ipv4 = nil
+		s.ipv4PC = nil
+	}
+	if s.ipv6 != nil {
+		err2 = s.ipv6.Close()
+		s.ipv6 = nil
+		s.ipv6PC = nil
+	}
+	s.blackhole4 = false
+	s.blackhole6 = false
+	s.ipv4TxOffload = false
+	s.ipv4RxOffload = false
+	s.ipv6TxOffload = false
+	s.ipv6RxOffload = false
+	if err1 != nil {
+		return err1
+	}
+	return err2
+}
+
+type ErrUDPGSODisabled struct {
+	onLaddr  string
+	RetryErr error
+}
+
+func (e ErrUDPGSODisabled) Error() string {
+	return fmt.Sprintf("disabled UDP GSO on %s, NIC(s) may not support checksum offload", e.onLaddr)
+}
+
+func (e ErrUDPGSODisabled) Unwrap() error {
+	return e.RetryErr
+}
+
+func (s *StdNetBind) Send(bufs [][]byte, endpoint Endpoint) error {
+	s.mu.Lock()
+	blackhole := s.blackhole4
+	conn := s.ipv4
+	offload := s.ipv4TxOffload
+	br := batchWriter(s.ipv4PC)
+	is6 := false
+	if endpoint.DstIP().Is6() {
+		blackhole = s.blackhole6
+		conn = s.ipv6
+		br = s.ipv6PC
+		is6 = true
+		offload = s.ipv6TxOffload
+	}
+	s.mu.Unlock()
+
+	if blackhole {
+		return nil
+	}
+	if conn == nil {
+		return syscall.EAFNOSUPPORT
+	}
+
+	msgs := s.getMessages()
+	defer s.putMessages(msgs)
+	ua := s.udpAddrPool.Get().(*net.UDPAddr)
+	defer s.udpAddrPool.Put(ua)
+	if is6 {
+		as16 := endpoint.DstIP().As16()
+		copy(ua.IP, as16[:])
+		ua.IP = ua.IP[:16]
+	} else {
+		as4 := endpoint.DstIP().As4()
+		copy(ua.IP, as4[:])
+		ua.IP = ua.IP[:4]
+	}
+	ua.Port = int(endpoint.(*StdNetEndpoint).Port())
+	var (
+		retried bool
+		err     error
+	)
+retry:
+	if offload {
+		n := coalesceMessages(ua, endpoint.(*StdNetEndpoint), bufs, *msgs, setGSOSize)
+		err = s.send(conn, br, (*msgs)[:n])
+		if err != nil && offload && errShouldDisableUDPGSO(err) {
+			offload = false
+			s.mu.Lock()
+			if is6 {
+				s.ipv6TxOffload = false
+			} else {
+				s.ipv4TxOffload = false
+			}
+			s.mu.Unlock()
+			retried = true
+			goto retry
+		}
+	} else {
+		for i := range bufs {
+			(*msgs)[i].Addr = ua
+			(*msgs)[i].Buffers[0] = bufs[i]
+			setSrcControl(&(*msgs)[i].OOB, endpoint.(*StdNetEndpoint))
+		}
+		err = s.send(conn, br, (*msgs)[:len(bufs)])
+	}
+	if retried {
+		return ErrUDPGSODisabled{onLaddr: conn.LocalAddr().String(), RetryErr: err}
+	}
+	return err
+}
+
+func (s *StdNetBind) send(conn *net.UDPConn, pc batchWriter, msgs []ipv6.Message) error {
+	var (
+		n     int
+		err   error
+		start int
+	)
+	if runtime.GOOS == "linux" || runtime.GOOS == "android" {
+		for {
+			n, err = pc.WriteBatch(msgs[start:], 0)
+			if err != nil || n == len(msgs[start:]) {
+				break
+			}
+			start += n
+		}
+	} else {
+		for _, msg := range msgs {
+			_, _, err = conn.WriteMsgUDP(msg.Buffers[0], msg.OOB, msg.Addr.(*net.UDPAddr))
+			if err != nil {
+				break
+			}
+		}
+	}
+	return err
+}
+
+const (
+	// Exceeding these values results in EMSGSIZE. They account for layer3 and
+	// layer4 headers. IPv6 does not need to account for itself as the payload
+	// length field is self excluding.
+	maxIPv4PayloadLen = 1<<16 - 1 - 20 - 8
+	maxIPv6PayloadLen = 1<<16 - 1 - 8
+
+	// This is a hard limit imposed by the kernel.
+	udpSegmentMaxDatagrams = 64
+)
+
+type setGSOFunc func(control *[]byte, gsoSize uint16)
+
+func coalesceMessages(addr *net.UDPAddr, ep *StdNetEndpoint, bufs [][]byte, msgs []ipv6.Message, setGSO setGSOFunc) int {
+	var (
+		base     = -1 // index of msg we are currently coalescing into
+		gsoSize  int  // segmentation size of msgs[base]
+		dgramCnt int  // number of dgrams coalesced into msgs[base]
+		endBatch bool // tracking flag to start a new batch on next iteration of bufs
+	)
+	maxPayloadLen := maxIPv4PayloadLen
+	if ep.DstIP().Is6() {
+		maxPayloadLen = maxIPv6PayloadLen
+	}
+	for i, buf := range bufs {
+		if i > 0 {
+			msgLen := len(buf)
+			baseLenBefore := len(msgs[base].Buffers[0])
+			freeBaseCap := cap(msgs[base].Buffers[0]) - baseLenBefore
+			if msgLen+baseLenBefore <= maxPayloadLen &&
+				msgLen <= gsoSize &&
+				msgLen <= freeBaseCap &&
+				dgramCnt < udpSegmentMaxDatagrams &&
+				!endBatch {
+				msgs[base].Buffers[0] = append(msgs[base].Buffers[0], buf...)
+				if i == len(bufs)-1 {
+					setGSO(&msgs[base].OOB, uint16(gsoSize))
+				}
+				dgramCnt++
+				if msgLen < gsoSize {
+					// A smaller than gsoSize packet on the tail is legal, but
+					// it must end the batch.
+					endBatch = true
+				}
+				continue
+			}
+		}
+		if dgramCnt > 1 {
+			setGSO(&msgs[base].OOB, uint16(gsoSize))
+		}
+		// Reset prior to incrementing base since we are preparing to start a
+		// new potential batch.
+		endBatch = false
+		base++
+		gsoSize = len(buf)
+		setSrcControl(&msgs[base].OOB, ep)
+		msgs[base].Buffers[0] = buf
+		msgs[base].Addr = addr
+		dgramCnt = 1
+	}
+	return base + 1
+}
+
+type getGSOFunc func(control []byte) (int, error)
+
+func splitCoalescedMessages(msgs []ipv6.Message, firstMsgAt int, getGSO getGSOFunc) (n int, err error) {
+	for i := firstMsgAt; i < len(msgs); i++ {
+		msg := &msgs[i]
+		if msg.N == 0 {
+			return n, err
+		}
+		var (
+			gsoSize    int
+			start      int
+			end        = msg.N
+			numToSplit = 1
+		)
+		gsoSize, err = getGSO(msg.OOB[:msg.NN])
+		if err != nil {
+			return n, err
+		}
+		if gsoSize > 0 {
+			numToSplit = (msg.N + gsoSize - 1) / gsoSize
+			end = gsoSize
+		}
+		for j := 0; j < numToSplit; j++ {
+			if n > i {
+				return n, errors.New("splitting coalesced packet resulted in overflow")
+			}
+			copied := copy(msgs[n].Buffers[0], msg.Buffers[0][start:end])
+			msgs[n].N = copied
+			msgs[n].Addr = msg.Addr
+			start = end
+			end += gsoSize
+			if end > msg.N {
+				end = msg.N
+			}
+			n++
+		}
+		if i != n-1 {
+			// It is legal for bytes to move within msg.Buffers[0] as a result
+			// of splitting, so we only zero the source msg len when it is not
+			// the destination of the last split operation above.
+			msg.N = 0
+		}
+	}
+	return n, nil
+}
--- a/wgstack/conn/conn.go
+++ b/wgstack/conn/conn.go
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+
+package conn
+
+import (
+	"errors"
+	"fmt"
+	"net/netip"
+	"reflect"
+	"runtime"
+	"strings"
+)
+
+const (
+	IdealBatchSize = 128 // maximum number of packets handled per read and write
+)
+
+// A ReceiveFunc receives at least one packet from the network and writes them
+// into packets. On a successful read it returns the number of elements of
+// sizes, packets, and endpoints that should be evaluated. Some elements of
+// sizes may be zero, and callers should ignore them. Callers must pass a sizes
+// and eps slice with a length greater than or equal to the length of packets.
+// These lengths must not exceed the length of the associated Bind.BatchSize().
+type ReceiveFunc func(packets [][]byte, sizes []int, eps []Endpoint) (n int, err error)
+
+// A Bind listens on a port for both IPv6 and IPv4 UDP traffic.
+//
+// A Bind interface may also be a PeekLookAtSocketFd or BindSocketToInterface,
+// depending on the platform-specific implementation.
+type Bind interface {
+	// Open puts the Bind into a listening state on a given port and reports the actual
+	// port that it bound to. Passing zero results in a random selection.
+	// fns is the set of functions that will be called to receive packets.
+	Open(port uint16) (fns []ReceiveFunc, actualPort uint16, err error)
+
+	// Close closes the Bind listener.
+	// All fns returned by Open must return net.ErrClosed after a call to Close.
+	Close() error
+
+	// SetMark sets the mark for each packet sent through this Bind.
+	// This mark is passed to the kernel as the socket option SO_MARK.
+	SetMark(mark uint32) error
+
+	// Send writes one or more packets in bufs to address ep. The length of
+	// bufs must not exceed BatchSize().
+	Send(bufs [][]byte, ep Endpoint) error
+
+	// ParseEndpoint creates a new endpoint from a string.
+	ParseEndpoint(s string) (Endpoint, error)
+
+	// BatchSize is the number of buffers expected to be passed to
+	// the ReceiveFuncs, and the maximum expected to be passed to SendBatch.
+	BatchSize() int
+}
+
+// BindSocketToInterface is implemented by Bind objects that support being
+// tied to a single network interface. Used by wireguard-windows.
+type BindSocketToInterface interface {
+	BindSocketToInterface4(interfaceIndex uint32, blackhole bool) error
+	BindSocketToInterface6(interfaceIndex uint32, blackhole bool) error
+}
+
+// PeekLookAtSocketFd is implemented by Bind objects that support having their
+// file descriptor peeked at. Used by wireguard-android.
+type PeekLookAtSocketFd interface {
+	PeekLookAtSocketFd4() (fd int, err error)
+	PeekLookAtSocketFd6() (fd int, err error)
+}
+
+// An Endpoint maintains the source/destination caching for a peer.
+//
+//	dst: the remote address of a peer ("endpoint" in uapi terminology)
+//	src: the local address from which datagrams originate going to the peer
+type Endpoint interface {
+	ClearSrc()           // clears the source address
+	SrcToString() string // returns the local source address (ip:port)
+	DstToString() string // returns the destination address (ip:port)
+	DstToBytes() []byte  // used for mac2 cookie calculations
+	DstIP() netip.Addr
+	SrcIP() netip.Addr
+}
+
+var (
+	ErrBindAlreadyOpen   = errors.New("bind is already open")
+	ErrWrongEndpointType = errors.New("endpoint type does not correspond with bind type")
+)
+
+func (fn ReceiveFunc) PrettyName() string {
+	name := runtime.FuncForPC(reflect.ValueOf(fn).Pointer()).Name()
+	// 0. cheese/taco.beansIPv6.func12.func21218-fm
+	name = strings.TrimSuffix(name, "-fm")
+	// 1. cheese/taco.beansIPv6.func12.func21218
+	if idx := strings.LastIndexByte(name, '/'); idx != -1 {
+		name = name[idx+1:]
+		// 2. taco.beansIPv6.func12.func21218
+	}
+	for {
+		var idx int
+		for idx = len(name) - 1; idx >= 0; idx-- {
+			if name[idx] < '0' || name[idx] > '9' {
+				break
+			}
+		}
+		if idx == len(name)-1 {
+			break
+		}
+		const dotFunc = ".func"
+		if !strings.HasSuffix(name[:idx+1], dotFunc) {
+			break
+		}
+		name = name[:idx+1-len(dotFunc)]
+		// 3. taco.beansIPv6.func12
+		// 4. taco.beansIPv6
+	}
+	if idx := strings.LastIndexByte(name, '.'); idx != -1 {
+		name = name[idx+1:]
+		// 5. beansIPv6
+	}
+	if name == "" {
+		return fmt.Sprintf("%p", fn)
+	}
+	if strings.HasSuffix(name, "IPv4") {
+		return "v4"
+	}
+	if strings.HasSuffix(name, "IPv6") {
+		return "v6"
+	}
+	return name
+}
--- a/wgstack/conn/controlfns.go
+++ b/wgstack/conn/controlfns.go
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+
+package conn
+
+import (
+	"fmt"
+	"net"
+	"syscall"
+
+	"github.com/cilium/ebpf"
+	"github.com/cilium/ebpf/asm"
+)
+
+// UDP socket read/write buffer size (7MB). The value of 7MB is chosen as it is
+// the max supported by a default configuration of macOS. Some platforms will
+// silently clamp the value to other maximums, such as linux clamping to
+// net.core.{r,w}mem_max (see _linux.go for additional implementation that works
+// around this limitation)
+const socketBufferSize = 7 << 20
+
+// controlFn is the callback function signature from net.ListenConfig.Control.
+// It is used to apply platform specific configuration to the socket prior to
+// bind.
+type controlFn func(network, address string, c syscall.RawConn) error
+
+// controlFns is a list of functions that are called from the listen config
+// that can apply socket options.
+var controlFns = []controlFn{}
+
+const SO_ATTACH_REUSEPORT_EBPF = 52
+
+//Create eBPF program that returns a hash to distribute packets
+
+func createReuseportProgram() (*ebpf.Program, error) {
+	// This program uses the packet's hash and returns it modulo number of sockets
+	// Simple version: just return a counter-based distribution
+	//instructions := asm.Instructions{
+	//	// Load the skb->hash value (already computed by kernel)
+	//	asm.LoadMem(asm.R0, asm.R1, int16(unsafe.Offsetof(unix.XDPMd{}.RxQueueIndex)), asm.Word),
+	//	asm.Return(),
+	//}
+	//
+	//// Alternative: simpler round-robin approach
+	//// This returns the CPU number, effectively round-robin
+	//instructions := asm.Instructions{
+	//	asm.Mov.Reg(asm.R0, asm.R1),              // Move ctx to R0
+	//	asm.LoadMem(asm.R0, asm.R1, 0, asm.Word), // Load some field
+	//	asm.Return(),
+	//}
+
+	// Better: Use BPF helper to get random/hash value
+	//instructions := asm.Instructions{
+	//	// Call get_prandom_u32() to get random value for distribution
+	//	asm.Mov.Imm(asm.R0, 0),
+	//	asm.Call.Label("get_prandom_u32"),
+	//	asm.Return(),
+	//}
+	//
+	//prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
+	//	Type:         ebpf.SocketFilter,
+	//	Instructions: instructions,
+	//	License:      "GPL",
+	//})
+
+	//instructions := asm.Instructions{
+	//	// R1 contains pointer to skb
+	//	// Load skb->hash at offset 0x20 (may vary by kernel, but 0x20 is common)
+	//	asm.LoadMem(asm.R0, asm.R1, 0x20, asm.Word),
+	//
+	//	// If hash is 0, use rxhash instead (fallback)
+	//	asm.JEq.Imm(asm.R0, 0, "use_rxhash"),
+	//	asm.Return().Sym("return"),
+	//
+	//	// Fallback: load rxhash
+	//	asm.LoadMem(asm.R0, asm.R1, 0x24, asm.Word).Sym("use_rxhash"),
+	//	asm.Return(),
+	//}
+	//
+	//prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
+	//	Type:         ebpf.SkReuseport,
+	//	Instructions: instructions,
+	//	License:      "GPL",
+	//})
+
+	//instructions := asm.Instructions{
+	//	// R1 = ctx (sk_reuseport_md)
+	//	// R2 = sk_reuseport map (we'll use NULL/0 for default behavior)
+	//	// R3 = key (select socket index)
+	//	// R4 = flags
+	//
+	//	// Simple approach: use the hash field from sk_reuseport_md
+	//	// struct sk_reuseport_md { ... __u32 hash; ... } at offset 24
+	//	asm.Mov.Reg(asm.R6, asm.R1), // Save ctx
+	//
+	//	// Load the hash value at offset 24
+	//	asm.LoadMem(asm.R2, asm.R6, 24, asm.Word),
+	//
+	//	// Call bpf_sk_select_reuseport(ctx, map, key, flags)
+	//	asm.Mov.Reg(asm.R1, asm.R6), // ctx
+	//	asm.Mov.Imm(asm.R2, 0),      // map (NULL = use default)
+	//	asm.Mov.Reg(asm.R3, asm.R2), // key = hash we loaded (in R2)
+	//	asm.Mov.Imm(asm.R4, 0),      // flags
+	//	asm.Call.Label("sk_select_reuseport"),
+	//
+	//	// Return 0
+	//	asm.Mov.Imm(asm.R0, 0),
+	//	asm.Return(),
+	//}
+	//
+	//prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
+	//	Type:         ebpf.SkReuseport,
+	//	Instructions: instructions,
+	//	License:      "GPL",
+	//})
+
+	instructions := asm.Instructions{
+		// R1 = ctx (sk_reuseport_md pointer)
+		// Load hash from sk_reuseport_md at offset 24
+		//asm.LoadMem(asm.R0, asm.R1, 20, asm.Word),
+
+		// R1 = ctx (save it)
+		asm.Mov.Reg(asm.R6, asm.R1),
+
+		// Prepare string on stack: "BPF called!\n"
+		// We need to build the format string on the stack
+		asm.Mov.Reg(asm.R1, asm.R10), // R1 = frame pointer
+		asm.Add.Imm(asm.R1, -16),     // R1 = stack location for string
+
+		// Write "BPF called!\n" to stack (we'll use a simpler version)
+		// Store immediate 64-bit values
+		asm.StoreImm(asm.R1, 0, 0x2066706220, asm.DWord), // "bpf "
+		asm.StoreImm(asm.R1, 8, 0x0a21, asm.DWord),       // "!\n"
+
+		// Call bpf_trace_printk(fmt, fmt_size)
+		// R1 already points to format string
+		asm.Mov.Imm(asm.R2, 16), // R2 = format size
+		asm.Call.Label("bpf_printk"),
+
+		// Return 0 (send to socket 0 for testing)
+		asm.Mov.Imm(asm.R0, 0),
+		asm.Return(),
+
+		//asm.Mov.Imm(asm.R0, 0),
+		//// Just return the hash directly
+		//// The kernel will automatically modulo by number of sockets
+		//asm.Return(),
+	}
+
+	prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
+		Type:         ebpf.SkReuseport,
+		Instructions: instructions,
+		License:      "GPL",
+	})
+
+	return prog, err
+}
+
+//func createReuseportProgram() (*ebpf.Program, error) {
+//	// Try offset 20 (common in newer kernels)
+//	instructions := asm.Instructions{
+//		asm.LoadMem(asm.R0, asm.R1, 20, asm.Word),
+//		asm.Return(),
+//	}
+//
+//	prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
+//		Type:         ebpf.SkReuseport,
+//		Instructions: instructions,
+//		License:      "GPL",
+//	})
+//
+//	return prog, err
+//}
+
+func reusePortHax(fd uintptr) error {
+	prog, err := createReuseportProgram()
+	if err != nil {
+		return fmt.Errorf("failed to create eBPF program: %w", err)
+	}
+	//defer prog.Close()
+	sockErr := syscall.SetsockoptInt(int(fd), syscall.SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, prog.FD())
+	if sockErr != nil {
+		return sockErr
+	}
+	return nil
+}
+
+var EvilFdZero uintptr
+
+// listenConfig returns a net.ListenConfig that applies the controlFns to the
+// socket prior to bind. This is used to apply socket buffer sizing and packet
+// information OOB configuration for sticky sockets.
+func listenConfig(q int) *net.ListenConfig {
+	return &net.ListenConfig{
+		Control: func(network, address string, c syscall.RawConn) error {
+			for _, fn := range controlFns {
+				if err := fn(network, address, c); err != nil {
+					return err
+				}
+			}
+
+			if q == 0 {
+				c.Control(func(fd uintptr) {
+					EvilFdZero = fd
+				})
+				//	var e error
+				//	err := c.Control(func(fd uintptr) {
+				//		e = reusePortHax(fd)
+				//	})
+				//	if err != nil {
+				//		return err
+				//	}
+				//	if e != nil {
+				//		return e
+				//	}
+			}
+
+			return nil
+		},
+	}
+}
--- a/wgstack/conn/controlfns_linux.go
+++ b/wgstack/conn/controlfns_linux.go
@@ -0,0 +1,66 @@
+//go:build linux
+
+// SPDX-License-Identifier: MIT
+//
+// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+
+package conn
+
+import (
+	"fmt"
+	"runtime"
+	"syscall"
+
+	"golang.org/x/sys/unix"
+)
+
+func init() {
+	controlFns = append(controlFns,
+
+		// Attempt to set the socket buffer size beyond net.core.{r,w}mem_max by
+		// using SO_*BUFFORCE. This requires CAP_NET_ADMIN, and is allowed here to
+		// fail silently - the result of failure is lower performance on very fast
+		// links or high latency links.
+		func(network, address string, c syscall.RawConn) error {
+			return c.Control(func(fd uintptr) {
+				// Set up to *mem_max
+				_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_RCVBUF, socketBufferSize)
+				_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_SNDBUF, socketBufferSize)
+				// Set beyond *mem_max if CAP_NET_ADMIN
+				_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_RCVBUFFORCE, socketBufferSize)
+				_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_SNDBUFFORCE, socketBufferSize)
+				_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_REUSEPORT, 1)  //todo!!!
+				_ = unix.SetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_GRO, 1)      //todo!!!
+				_ = unix.SetsockoptInt(int(fd), unix.SOL_UDP, unix.UDP_SEGMENT, 0xffff) //todo!!!
+				//print(err.Error())
+			})
+		},
+
+		// Enable receiving of the packet information (IP_PKTINFO for IPv4,
+		// IPV6_PKTINFO for IPv6) that is used to implement sticky socket support.
+		func(network, address string, c syscall.RawConn) error {
+			var err error
+			switch network {
+			case "udp4":
+				if runtime.GOOS != "android" {
+					c.Control(func(fd uintptr) {
+						err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_PKTINFO, 1)
+					})
+				}
+			case "udp6":
+				c.Control(func(fd uintptr) {
+					if runtime.GOOS != "android" {
+						err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_RECVPKTINFO, 1)
+						if err != nil {
+							return
+						}
+					}
+					err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_V6ONLY, 1)
+				})
+			default:
+				err = fmt.Errorf("unhandled network: %s: %w", network, unix.EINVAL)
+			}
+			return err
+		},
+	)
+}
--- a/wgstack/conn/default.go
+++ b/wgstack/conn/default.go
@@ -0,0 +1,9 @@
+//go:build !windows
+
+// SPDX-License-Identifier: MIT
+//
+// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+
+package conn
+
+func NewDefaultBind() Bind { return NewStdNetBind() }
--- a/wgstack/conn/errors_default.go
+++ b/wgstack/conn/errors_default.go
@@ -0,0 +1,12 @@
+//go:build !linux
+
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+ */
+
+package conn
+
+func errShouldDisableUDPGSO(err error) bool {
+	return false
+}
--- a/wgstack/conn/errors_linux.go
+++ b/wgstack/conn/errors_linux.go
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+ */
+
+package conn
+
+import (
+	"errors"
+	"os"
+
+	"golang.org/x/sys/unix"
+)
+
+func errShouldDisableUDPGSO(err error) bool {
+	var serr *os.SyscallError
+	if errors.As(err, &serr) {
+		// EIO is returned by udp_send_skb() if the device driver does not have
+		// tx checksumming enabled, which is a hard requirement of UDP_SEGMENT.
+		// See:
+		// https://git.kernel.org/pub/scm/docs/man-pages/man-pages.git/tree/man7/udp.7?id=806eabd74910447f21005160e90957bde4db0183#n228
+		// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/net/ipv4/udp.c?h=v6.2&id=c9c3395d5e3dcc6daee66c6908354d47bf98cb0c#n942
+		return serr.Err == unix.EIO
+	}
+	return false
+}
--- a/wgstack/conn/features_default.go
+++ b/wgstack/conn/features_default.go
@@ -0,0 +1,15 @@
+//go:build !linux
+// +build !linux
+
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+ */
+
+package conn
+
+import "net"
+
+func supportsUDPOffload(conn *net.UDPConn) (txOffload, rxOffload bool) {
+	return
+}
--- a/wgstack/conn/features_linux.go
+++ b/wgstack/conn/features_linux.go
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+ */
+
+package conn
+
+import (
+	"fmt"
+	"net"
+
+	"golang.org/x/sys/unix"
+)
+
+func supportsUDPOffload(conn *net.UDPConn) (txOffload, rxOffload bool) {
+	rc, err := conn.SyscallConn()
+	if err != nil {
+		return
+	}
+	a := 0
+	err = rc.Control(func(fd uintptr) {
+		a, err = unix.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_SEGMENT)
+
+		txOffload = err == nil
+		opt, errSyscall := unix.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_GRO)
+		rxOffload = errSyscall == nil && opt == 1
+	})
+	fmt.Printf("%d", a)
+	if err != nil {
+		return false, false
+	}
+	return txOffload, rxOffload
+}
--- a/wgstack/conn/gso_default.go
+++ b/wgstack/conn/gso_default.go
@@ -0,0 +1,21 @@
+//go:build !linux
+
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+ */
+
+package conn
+
+// getGSOSize parses control for UDP_GRO and if found returns its GSO size data.
+func getGSOSize(control []byte) (int, error) {
+	return 0, nil
+}
+
+// setGSOSize sets a UDP_SEGMENT in control based on gsoSize.
+func setGSOSize(control *[]byte, gsoSize uint16) {
+}
+
+// gsoControlSize returns the recommended buffer size for pooling sticky and UDP
+// offloading control data.
+const gsoControlSize = 0
--- a/wgstack/conn/gso_linux.go
+++ b/wgstack/conn/gso_linux.go
@@ -0,0 +1,65 @@
+//go:build linux
+
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+ */
+
+package conn
+
+import (
+	"fmt"
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+)
+
+const (
+	sizeOfGSOData = 2
+)
+
+// getGSOSize parses control for UDP_GRO and if found returns its GSO size data.
+func getGSOSize(control []byte) (int, error) {
+	var (
+		hdr  unix.Cmsghdr
+		data []byte
+		rem  = control
+		err  error
+	)
+
+	for len(rem) > unix.SizeofCmsghdr {
+		hdr, data, rem, err = unix.ParseOneSocketControlMessage(rem)
+		if err != nil {
+			return 0, fmt.Errorf("error parsing socket control message: %w", err)
+		}
+		if hdr.Level == unix.SOL_UDP && hdr.Type == unix.UDP_GRO && len(data) >= sizeOfGSOData {
+			var gso uint16
+			copy(unsafe.Slice((*byte)(unsafe.Pointer(&gso)), sizeOfGSOData), data[:sizeOfGSOData])
+			return int(gso), nil
+		}
+	}
+	return 0, nil
+}
+
+// setGSOSize sets a UDP_SEGMENT in control based on gsoSize. It leaves existing
+// data in control untouched.
+func setGSOSize(control *[]byte, gsoSize uint16) {
+	existingLen := len(*control)
+	avail := cap(*control) - existingLen
+	space := unix.CmsgSpace(sizeOfGSOData)
+	if avail < space {
+		return
+	}
+	*control = (*control)[:cap(*control)]
+	gsoControl := (*control)[existingLen:]
+	hdr := (*unix.Cmsghdr)(unsafe.Pointer(&(gsoControl)[0]))
+	hdr.Level = unix.SOL_UDP
+	hdr.Type = unix.UDP_SEGMENT
+	hdr.SetLen(unix.CmsgLen(sizeOfGSOData))
+	copy((gsoControl)[unix.CmsgLen(0):], unsafe.Slice((*byte)(unsafe.Pointer(&gsoSize)), sizeOfGSOData))
+	*control = (*control)[:existingLen+space]
+}
+
+// gsoControlSize returns the recommended buffer size for pooling UDP
+// offloading control data.
+var gsoControlSize = unix.CmsgSpace(sizeOfGSOData)
--- a/wgstack/conn/mark_unix.go
+++ b/wgstack/conn/mark_unix.go
@@ -0,0 +1,64 @@
+//go:build linux || openbsd || freebsd
+
+// SPDX-License-Identifier: MIT
+//
+// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+
+package conn
+
+import (
+	"runtime"
+
+	"golang.org/x/sys/unix"
+)
+
+var fwmarkIoctl int
+
+func init() {
+	switch runtime.GOOS {
+	case "linux", "android":
+		fwmarkIoctl = 36 /* unix.SO_MARK */
+	case "freebsd":
+		fwmarkIoctl = 0x1015 /* unix.SO_USER_COOKIE */
+	case "openbsd":
+		fwmarkIoctl = 0x1021 /* unix.SO_RTABLE */
+	}
+}
+
+func (s *StdNetBind) SetMark(mark uint32) error {
+	var operr error
+	if fwmarkIoctl == 0 {
+		return nil
+	}
+	if s.ipv4 != nil {
+		fd, err := s.ipv4.SyscallConn()
+		if err != nil {
+			return err
+		}
+		err = fd.Control(func(fd uintptr) {
+			operr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, fwmarkIoctl, int(mark))
+		})
+		if err == nil {
+			err = operr
+		}
+		if err != nil {
+			return err
+		}
+	}
+	if s.ipv6 != nil {
+		fd, err := s.ipv6.SyscallConn()
+		if err != nil {
+			return err
+		}
+		err = fd.Control(func(fd uintptr) {
+			operr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, fwmarkIoctl, int(mark))
+		})
+		if err == nil {
+			err = operr
+		}
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
--- a/wgstack/conn/sticky_default.go
+++ b/wgstack/conn/sticky_default.go
@@ -0,0 +1,42 @@
+//go:build !linux || android
+
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+ */
+
+package conn
+
+import "net/netip"
+
+func (e *StdNetEndpoint) SrcIP() netip.Addr {
+	return netip.Addr{}
+}
+
+func (e *StdNetEndpoint) SrcIfidx() int32 {
+	return 0
+}
+
+func (e *StdNetEndpoint) SrcToString() string {
+	return ""
+}
+
+// TODO: macOS, FreeBSD and other BSDs likely do support the sticky sockets
+// {get,set}srcControl feature set, but use alternatively named flags and need
+// ports and require testing.
+
+// getSrcFromControl parses the control for PKTINFO and if found updates ep with
+// the source information found.
+func getSrcFromControl(control []byte, ep *StdNetEndpoint) {
+}
+
+// setSrcControl parses the control for PKTINFO and if found updates ep with
+// the source information found.
+func setSrcControl(control *[]byte, ep *StdNetEndpoint) {
+}
+
+// stickyControlSize returns the recommended buffer size for pooling sticky
+// offloading control data.
+const stickyControlSize = 0
+
+const StdNetSupportsStickySockets = false
--- a/wgstack/conn/sticky_linux.go
+++ b/wgstack/conn/sticky_linux.go
@@ -0,0 +1,105 @@
+package conn
+
+import (
+	"net/netip"
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+)
+
+func (e *StdNetEndpoint) SrcIP() netip.Addr {
+	switch len(e.src) {
+	case unix.CmsgSpace(unix.SizeofInet4Pktinfo):
+		info := (*unix.Inet4Pktinfo)(unsafe.Pointer(&e.src[unix.CmsgLen(0)]))
+		return netip.AddrFrom4(info.Spec_dst)
+	case unix.CmsgSpace(unix.SizeofInet6Pktinfo):
+		info := (*unix.Inet6Pktinfo)(unsafe.Pointer(&e.src[unix.CmsgLen(0)]))
+		// TODO: set zone. in order to do so we need to check if the address is
+		// link local, and if it is perform a syscall to turn the ifindex into a
+		// zone string because netip uses string zones.
+		return netip.AddrFrom16(info.Addr)
+	}
+	return netip.Addr{}
+}
+
+func (e *StdNetEndpoint) SrcIfidx() int32 {
+	switch len(e.src) {
+	case unix.CmsgSpace(unix.SizeofInet4Pktinfo):
+		info := (*unix.Inet4Pktinfo)(unsafe.Pointer(&e.src[unix.CmsgLen(0)]))
+		return info.Ifindex
+	case unix.CmsgSpace(unix.SizeofInet6Pktinfo):
+		info := (*unix.Inet6Pktinfo)(unsafe.Pointer(&e.src[unix.CmsgLen(0)]))
+		return int32(info.Ifindex)
+	}
+	return 0
+}
+
+func (e *StdNetEndpoint) SrcToString() string {
+	return e.SrcIP().String()
+}
+
+// getSrcFromControl parses the control for PKTINFO and if found updates ep with
+// the source information found.
+func getSrcFromControl(control []byte, ep *StdNetEndpoint) {
+	ep.ClearSrc()
+
+	var (
+		hdr  unix.Cmsghdr
+		data []byte
+		rem  []byte = control
+		err  error
+	)
+
+	for len(rem) > unix.SizeofCmsghdr {
+		hdr, data, rem, err = unix.ParseOneSocketControlMessage(rem)
+		if err != nil {
+			return
+		}
+
+		if hdr.Level == unix.IPPROTO_IP &&
+			hdr.Type == unix.IP_PKTINFO {
+
+			if ep.src == nil || cap(ep.src) < unix.CmsgSpace(unix.SizeofInet4Pktinfo) {
+				ep.src = make([]byte, 0, unix.CmsgSpace(unix.SizeofInet4Pktinfo))
+			}
+			ep.src = ep.src[:unix.CmsgSpace(unix.SizeofInet4Pktinfo)]
+
+			hdrBuf := unsafe.Slice((*byte)(unsafe.Pointer(&hdr)), unix.SizeofCmsghdr)
+			copy(ep.src, hdrBuf)
+			copy(ep.src[unix.CmsgLen(0):], data)
+			return
+		}
+
+		if hdr.Level == unix.IPPROTO_IPV6 &&
+			hdr.Type == unix.IPV6_PKTINFO {
+
+			if ep.src == nil || cap(ep.src) < unix.CmsgSpace(unix.SizeofInet6Pktinfo) {
+				ep.src = make([]byte, 0, unix.CmsgSpace(unix.SizeofInet6Pktinfo))
+			}
+
+			ep.src = ep.src[:unix.CmsgSpace(unix.SizeofInet6Pktinfo)]
+
+			hdrBuf := unsafe.Slice((*byte)(unsafe.Pointer(&hdr)), unix.SizeofCmsghdr)
+			copy(ep.src, hdrBuf)
+			copy(ep.src[unix.CmsgLen(0):], data)
+			return
+		}
+	}
+}
+
+// setSrcControl sets an IP{V6}_PKTINFO in control based on the source address
+// and source ifindex found in ep. control's len will be set to 0 in the event
+// that ep is a default value.
+func setSrcControl(control *[]byte, ep *StdNetEndpoint) {
+	if cap(*control) < len(ep.src) {
+		return
+	}
+	*control = (*control)[:0]
+	*control = append(*control, ep.src...)
+}
+
+// stickyControlSize returns the recommended buffer size for pooling sticky
+// offloading control data.
+var stickyControlSize = unix.CmsgSpace(unix.SizeofInet6Pktinfo)
+
+const StdNetSupportsStickySockets = true
--- a/wgstack/tun/checksum.go
+++ b/wgstack/tun/checksum.go
@@ -0,0 +1,42 @@
+package tun
+
+import "encoding/binary"
+
+// TODO: Explore SIMD and/or other assembly optimizations.
+func checksumNoFold(b []byte, initial uint64) uint64 {
+	ac := initial
+	i := 0
+	n := len(b)
+	for n >= 4 {
+		ac += uint64(binary.BigEndian.Uint32(b[i : i+4]))
+		n -= 4
+		i += 4
+	}
+	for n >= 2 {
+		ac += uint64(binary.BigEndian.Uint16(b[i : i+2]))
+		n -= 2
+		i += 2
+	}
+	if n == 1 {
+		ac += uint64(b[i]) << 8
+	}
+	return ac
+}
+
+func checksum(b []byte, initial uint64) uint16 {
+	ac := checksumNoFold(b, initial)
+	ac = (ac >> 16) + (ac & 0xffff)
+	ac = (ac >> 16) + (ac & 0xffff)
+	ac = (ac >> 16) + (ac & 0xffff)
+	ac = (ac >> 16) + (ac & 0xffff)
+	return uint16(ac)
+}
+
+func pseudoHeaderChecksumNoFold(protocol uint8, srcAddr, dstAddr []byte, totalLen uint16) uint64 {
+	sum := checksumNoFold(srcAddr, 0)
+	sum = checksumNoFold(dstAddr, sum)
+	sum = checksumNoFold([]byte{0, protocol}, sum)
+	tmp := make([]byte, 2)
+	binary.BigEndian.PutUint16(tmp, totalLen)
+	return checksumNoFold(tmp, sum)
+}
--- a/wgstack/tun/export.go
+++ b/wgstack/tun/export.go
@@ -0,0 +1,3 @@
+package tun
+
+const VirtioNetHdrLen = virtioNetHdrLen
--- a/wgstack/tun/tcp_offload_linux.go
+++ b/wgstack/tun/tcp_offload_linux.go
@@ -0,0 +1,630 @@
+//go:build linux
+
+// SPDX-License-Identifier: MIT
+//
+// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+
+package tun
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"io"
+	"unsafe"
+
+	wgconn "github.com/slackhq/nebula/wgstack/conn"
+	"golang.org/x/sys/unix"
+)
+
+var ErrTooManySegments = errors.New("tun: too many segments for TSO")
+
+const tcpFlagsOffset = 13
+
+const (
+	tcpFlagFIN uint8 = 0x01
+	tcpFlagPSH uint8 = 0x08
+	tcpFlagACK uint8 = 0x10
+)
+
+// virtioNetHdr is defined in the kernel in include/uapi/linux/virtio_net.h. The
+// kernel symbol is virtio_net_hdr.
+type virtioNetHdr struct {
+	flags      uint8
+	gsoType    uint8
+	hdrLen     uint16
+	gsoSize    uint16
+	csumStart  uint16
+	csumOffset uint16
+}
+
+func (v *virtioNetHdr) decode(b []byte) error {
+	if len(b) < virtioNetHdrLen {
+		return io.ErrShortBuffer
+	}
+	copy(unsafe.Slice((*byte)(unsafe.Pointer(v)), virtioNetHdrLen), b[:virtioNetHdrLen])
+	return nil
+}
+
+func (v *virtioNetHdr) encode(b []byte) error {
+	if len(b) < virtioNetHdrLen {
+		return io.ErrShortBuffer
+	}
+	copy(b[:virtioNetHdrLen], unsafe.Slice((*byte)(unsafe.Pointer(v)), virtioNetHdrLen))
+	return nil
+}
+
+const (
+	// virtioNetHdrLen is the length in bytes of virtioNetHdr. This matches the
+	// shape of the C ABI for its kernel counterpart -- sizeof(virtio_net_hdr).
+	virtioNetHdrLen = int(unsafe.Sizeof(virtioNetHdr{}))
+)
+
+// flowKey represents the key for a flow.
+type flowKey struct {
+	srcAddr, dstAddr [16]byte
+	srcPort, dstPort uint16
+	rxAck            uint32 // varying ack values should not be coalesced. Treat them as separate flows.
+}
+
+// tcpGROTable holds flow and coalescing information for the purposes of GRO.
+type tcpGROTable struct {
+	itemsByFlow map[flowKey][]tcpGROItem
+	itemsPool   [][]tcpGROItem
+}
+
+func newTCPGROTable() *tcpGROTable {
+	t := &tcpGROTable{
+		itemsByFlow: make(map[flowKey][]tcpGROItem, wgconn.IdealBatchSize),
+		itemsPool:   make([][]tcpGROItem, wgconn.IdealBatchSize),
+	}
+	for i := range t.itemsPool {
+		t.itemsPool[i] = make([]tcpGROItem, 0, wgconn.IdealBatchSize)
+	}
+	return t
+}
+
+func newFlowKey(pkt []byte, srcAddr, dstAddr, tcphOffset int) flowKey {
+	key := flowKey{}
+	addrSize := dstAddr - srcAddr
+	copy(key.srcAddr[:], pkt[srcAddr:dstAddr])
+	copy(key.dstAddr[:], pkt[dstAddr:dstAddr+addrSize])
+	key.srcPort = binary.BigEndian.Uint16(pkt[tcphOffset:])
+	key.dstPort = binary.BigEndian.Uint16(pkt[tcphOffset+2:])
+	key.rxAck = binary.BigEndian.Uint32(pkt[tcphOffset+8:])
+	return key
+}
+
+// lookupOrInsert looks up a flow for the provided packet and metadata,
+// returning the packets found for the flow, or inserting a new one if none
+// is found.
+func (t *tcpGROTable) lookupOrInsert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) ([]tcpGROItem, bool) {
+	key := newFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset)
+	items, ok := t.itemsByFlow[key]
+	if ok {
+		return items, ok
+	}
+	// TODO: insert() performs another map lookup. This could be rearranged to avoid.
+	t.insert(pkt, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex)
+	return nil, false
+}
+
+// insert an item in the table for the provided packet and packet metadata.
+func (t *tcpGROTable) insert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) {
+	key := newFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset)
+	item := tcpGROItem{
+		key:       key,
+		bufsIndex: uint16(bufsIndex),
+		gsoSize:   uint16(len(pkt[tcphOffset+tcphLen:])),
+		iphLen:    uint8(tcphOffset),
+		tcphLen:   uint8(tcphLen),
+		sentSeq:   binary.BigEndian.Uint32(pkt[tcphOffset+4:]),
+		pshSet:    pkt[tcphOffset+tcpFlagsOffset]&tcpFlagPSH != 0,
+	}
+	items, ok := t.itemsByFlow[key]
+	if !ok {
+		items = t.newItems()
+	}
+	items = append(items, item)
+	t.itemsByFlow[key] = items
+}
+
+func (t *tcpGROTable) updateAt(item tcpGROItem, i int) {
+	items, _ := t.itemsByFlow[item.key]
+	items[i] = item
+}
+
+func (t *tcpGROTable) deleteAt(key flowKey, i int) {
+	items, _ := t.itemsByFlow[key]
+	items = append(items[:i], items[i+1:]...)
+	t.itemsByFlow[key] = items
+}
+
+// tcpGROItem represents bookkeeping data for a TCP packet during the lifetime
+// of a GRO evaluation across a vector of packets.
+type tcpGROItem struct {
+	key       flowKey
+	sentSeq   uint32 // the sequence number
+	bufsIndex uint16 // the index into the original bufs slice
+	numMerged uint16 // the number of packets merged into this item
+	gsoSize   uint16 // payload size
+	iphLen    uint8  // ip header len
+	tcphLen   uint8  // tcp header len
+	pshSet    bool   // psh flag is set
+}
+
+func (t *tcpGROTable) newItems() []tcpGROItem {
+	var items []tcpGROItem
+	items, t.itemsPool = t.itemsPool[len(t.itemsPool)-1], t.itemsPool[:len(t.itemsPool)-1]
+	return items
+}
+
+func (t *tcpGROTable) reset() {
+	for k, items := range t.itemsByFlow {
+		items = items[:0]
+		t.itemsPool = append(t.itemsPool, items)
+		delete(t.itemsByFlow, k)
+	}
+}
+
+// canCoalesce represents the outcome of checking if two TCP packets are
+// candidates for coalescing.
+type canCoalesce int
+
+const (
+	coalescePrepend     canCoalesce = -1
+	coalesceUnavailable canCoalesce = 0
+	coalesceAppend      canCoalesce = 1
+)
+
+// tcpPacketsCanCoalesce evaluates if pkt can be coalesced with the packet
+// described by item. This function makes considerations that match the kernel's
+// GRO self tests, which can be found in tools/testing/selftests/net/gro.c.
+func tcpPacketsCanCoalesce(pkt []byte, iphLen, tcphLen uint8, seq uint32, pshSet bool, gsoSize uint16, item tcpGROItem, bufs [][]byte, bufsOffset int) canCoalesce {
+	pktTarget := bufs[item.bufsIndex][bufsOffset:]
+	if tcphLen != item.tcphLen {
+		// cannot coalesce with unequal tcp options len
+		return coalesceUnavailable
+	}
+	if tcphLen > 20 {
+		if !bytes.Equal(pkt[iphLen+20:iphLen+tcphLen], pktTarget[item.iphLen+20:iphLen+tcphLen]) {
+			// cannot coalesce with unequal tcp options
+			return coalesceUnavailable
+		}
+	}
+	if pkt[0]>>4 == 6 {
+		if pkt[0] != pktTarget[0] || pkt[1]>>4 != pktTarget[1]>>4 {
+			// cannot coalesce with unequal Traffic class values
+			return coalesceUnavailable
+		}
+		if pkt[7] != pktTarget[7] {
+			// cannot coalesce with unequal Hop limit values
+			return coalesceUnavailable
+		}
+	} else {
+		if pkt[1] != pktTarget[1] {
+			// cannot coalesce with unequal ToS values
+			return coalesceUnavailable
+		}
+		if pkt[6]>>5 != pktTarget[6]>>5 {
+			// cannot coalesce with unequal DF or reserved bits. MF is checked
+			// further up the stack.
+			return coalesceUnavailable
+		}
+		if pkt[8] != pktTarget[8] {
+			// cannot coalesce with unequal TTL values
+			return coalesceUnavailable
+		}
+	}
+	// seq adjacency
+	lhsLen := item.gsoSize
+	lhsLen += item.numMerged * item.gsoSize
+	if seq == item.sentSeq+uint32(lhsLen) { // pkt aligns following item from a seq num perspective
+		if item.pshSet {
+			// We cannot append to a segment that has the PSH flag set, PSH
+			// can only be set on the final segment in a reassembled group.
+			return coalesceUnavailable
+		}
+		if len(pktTarget[iphLen+tcphLen:])%int(item.gsoSize) != 0 {
+			// A smaller than gsoSize packet has been appended previously.
+			// Nothing can come after a smaller packet on the end.
+			return coalesceUnavailable
+		}
+		if gsoSize > item.gsoSize {
+			// We cannot have a larger packet following a smaller one.
+			return coalesceUnavailable
+		}
+		return coalesceAppend
+	} else if seq+uint32(gsoSize) == item.sentSeq { // pkt aligns in front of item from a seq num perspective
+		if pshSet {
+			// We cannot prepend with a segment that has the PSH flag set, PSH
+			// can only be set on the final segment in a reassembled group.
+			return coalesceUnavailable
+		}
+		if gsoSize < item.gsoSize {
+			// We cannot have a larger packet following a smaller one.
+			return coalesceUnavailable
+		}
+		if gsoSize > item.gsoSize && item.numMerged > 0 {
+			// There's at least one previous merge, and we're larger than all
+			// previous. This would put multiple smaller packets on the end.
+			return coalesceUnavailable
+		}
+		return coalescePrepend
+	}
+	return coalesceUnavailable
+}
+
+func tcpChecksumValid(pkt []byte, iphLen uint8, isV6 bool) bool {
+	srcAddrAt := ipv4SrcAddrOffset
+	addrSize := 4
+	if isV6 {
+		srcAddrAt = ipv6SrcAddrOffset
+		addrSize = 16
+	}
+	tcpTotalLen := uint16(len(pkt) - int(iphLen))
+	tcpCSumNoFold := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, pkt[srcAddrAt:srcAddrAt+addrSize], pkt[srcAddrAt+addrSize:srcAddrAt+addrSize*2], tcpTotalLen)
+	return ^checksum(pkt[iphLen:], tcpCSumNoFold) == 0
+}
+
+// coalesceResult represents the result of attempting to coalesce two TCP
+// packets.
+type coalesceResult int
+
+const (
+	coalesceInsufficientCap coalesceResult = 0
+	coalescePSHEnding       coalesceResult = 1
+	coalesceItemInvalidCSum coalesceResult = 2
+	coalescePktInvalidCSum  coalesceResult = 3
+	coalesceSuccess         coalesceResult = 4
+)
+
+// coalesceTCPPackets attempts to coalesce pkt with the packet described by
+// item, returning the outcome. This function may swap bufs elements in the
+// event of a prepend as item's bufs index is already being tracked for writing
+// to a Device.
+func coalesceTCPPackets(mode canCoalesce, pkt []byte, pktBuffsIndex int, gsoSize uint16, seq uint32, pshSet bool, item *tcpGROItem, bufs [][]byte, bufsOffset int, isV6 bool) coalesceResult {
+	var pktHead []byte // the packet that will end up at the front
+	headersLen := item.iphLen + item.tcphLen
+	coalescedLen := len(bufs[item.bufsIndex][bufsOffset:]) + len(pkt) - int(headersLen)
+
+	// Copy data
+	if mode == coalescePrepend {
+		pktHead = pkt
+		if cap(pkt)-bufsOffset < coalescedLen {
+			// We don't want to allocate a new underlying array if capacity is
+			// too small.
+			return coalesceInsufficientCap
+		}
+		if pshSet {
+			return coalescePSHEnding
+		}
+		if item.numMerged == 0 {
+			if !tcpChecksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, isV6) {
+				return coalesceItemInvalidCSum
+			}
+		}
+		if !tcpChecksumValid(pkt, item.iphLen, isV6) {
+			return coalescePktInvalidCSum
+		}
+		item.sentSeq = seq
+		extendBy := coalescedLen - len(pktHead)
+		bufs[pktBuffsIndex] = append(bufs[pktBuffsIndex], make([]byte, extendBy)...)
+		copy(bufs[pktBuffsIndex][bufsOffset+len(pkt):], bufs[item.bufsIndex][bufsOffset+int(headersLen):])
+		// Flip the slice headers in bufs as part of prepend. The index of item
+		// is already being tracked for writing.
+		bufs[item.bufsIndex], bufs[pktBuffsIndex] = bufs[pktBuffsIndex], bufs[item.bufsIndex]
+	} else {
+		pktHead = bufs[item.bufsIndex][bufsOffset:]
+		if cap(pktHead)-bufsOffset < coalescedLen {
+			// We don't want to allocate a new underlying array if capacity is
+			// too small.
+			return coalesceInsufficientCap
+		}
+		if item.numMerged == 0 {
+			if !tcpChecksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, isV6) {
+				return coalesceItemInvalidCSum
+			}
+		}
+		if !tcpChecksumValid(pkt, item.iphLen, isV6) {
+			return coalescePktInvalidCSum
+		}
+		if pshSet {
+			// We are appending a segment with PSH set.
+			item.pshSet = pshSet
+			pktHead[item.iphLen+tcpFlagsOffset] |= tcpFlagPSH
+		}
+		extendBy := len(pkt) - int(headersLen)
+		bufs[item.bufsIndex] = append(bufs[item.bufsIndex], make([]byte, extendBy)...)
+		copy(bufs[item.bufsIndex][bufsOffset+len(pktHead):], pkt[headersLen:])
+	}
+
+	if gsoSize > item.gsoSize {
+		item.gsoSize = gsoSize
+	}
+	hdr := virtioNetHdr{
+		flags:      unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, // this turns into CHECKSUM_PARTIAL in the skb
+		hdrLen:     uint16(headersLen),
+		gsoSize:    uint16(item.gsoSize),
+		csumStart:  uint16(item.iphLen),
+		csumOffset: 16,
+	}
+
+	// Recalculate the total len (IPv4) or payload len (IPv6). Recalculate the
+	// (IPv4) header checksum.
+	if isV6 {
+		hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV6
+		binary.BigEndian.PutUint16(pktHead[4:], uint16(coalescedLen)-uint16(item.iphLen)) // set new payload len
+	} else {
+		hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV4
+		pktHead[10], pktHead[11] = 0, 0                               // clear checksum field
+		binary.BigEndian.PutUint16(pktHead[2:], uint16(coalescedLen)) // set new total length
+		iphCSum := ^checksum(pktHead[:item.iphLen], 0)                // compute checksum
+		binary.BigEndian.PutUint16(pktHead[10:], iphCSum)             // set checksum field
+	}
+	hdr.encode(bufs[item.bufsIndex][bufsOffset-virtioNetHdrLen:])
+
+	// Calculate the pseudo header checksum and place it at the TCP checksum
+	// offset. Downstream checksum offloading will combine this with computation
+	// of the tcp header and payload checksum.
+	addrLen := 4
+	addrOffset := ipv4SrcAddrOffset
+	if isV6 {
+		addrLen = 16
+		addrOffset = ipv6SrcAddrOffset
+	}
+	srcAddrAt := bufsOffset + addrOffset
+	srcAddr := bufs[item.bufsIndex][srcAddrAt : srcAddrAt+addrLen]
+	dstAddr := bufs[item.bufsIndex][srcAddrAt+addrLen : srcAddrAt+addrLen*2]
+	psum := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, srcAddr, dstAddr, uint16(coalescedLen-int(item.iphLen)))
+	binary.BigEndian.PutUint16(pktHead[hdr.csumStart+hdr.csumOffset:], checksum([]byte{}, psum))
+
+	item.numMerged++
+	return coalesceSuccess
+}
+
+const (
+	ipv4FlagMoreFragments uint8 = 0x20
+)
+
+const (
+	ipv4SrcAddrOffset = 12
+	ipv6SrcAddrOffset = 8
+	maxUint16         = 1<<16 - 1
+)
+
+// tcpGRO evaluates the TCP packet at pktI in bufs for coalescing with
+// existing packets tracked in table. It will return false when pktI is not
+// coalesced, otherwise true. This indicates to the caller if bufs[pktI]
+// should be written to the Device.
+func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool) (pktCoalesced bool) {
+	pkt := bufs[pktI][offset:]
+	if len(pkt) > maxUint16 {
+		// A valid IPv4 or IPv6 packet will never exceed this.
+		return false
+	}
+	iphLen := int((pkt[0] & 0x0F) * 4)
+	if isV6 {
+		iphLen = 40
+		ipv6HPayloadLen := int(binary.BigEndian.Uint16(pkt[4:]))
+		if ipv6HPayloadLen != len(pkt)-iphLen {
+			return false
+		}
+	} else {
+		totalLen := int(binary.BigEndian.Uint16(pkt[2:]))
+		if totalLen != len(pkt) {
+			return false
+		}
+	}
+	if len(pkt) < iphLen {
+		return false
+	}
+	tcphLen := int((pkt[iphLen+12] >> 4) * 4)
+	if tcphLen < 20 || tcphLen > 60 {
+		return false
+	}
+	if len(pkt) < iphLen+tcphLen {
+		return false
+	}
+	if !isV6 {
+		if pkt[6]&ipv4FlagMoreFragments != 0 || pkt[6]<<3 != 0 || pkt[7] != 0 {
+			// no GRO support for fragmented segments for now
+			return false
+		}
+	}
+	tcpFlags := pkt[iphLen+tcpFlagsOffset]
+	var pshSet bool
+	// not a candidate if any non-ACK flags (except PSH+ACK) are set
+	if tcpFlags != tcpFlagACK {
+		if pkt[iphLen+tcpFlagsOffset] != tcpFlagACK|tcpFlagPSH {
+			return false
+		}
+		pshSet = true
+	}
+	gsoSize := uint16(len(pkt) - tcphLen - iphLen)
+	// not a candidate if payload len is 0
+	if gsoSize < 1 {
+		return false
+	}
+	seq := binary.BigEndian.Uint32(pkt[iphLen+4:])
+	srcAddrOffset := ipv4SrcAddrOffset
+	addrLen := 4
+	if isV6 {
+		srcAddrOffset = ipv6SrcAddrOffset
+		addrLen = 16
+	}
+	items, existing := table.lookupOrInsert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI)
+	if !existing {
+		return false
+	}
+	for i := len(items) - 1; i >= 0; i-- {
+		// In the best case of packets arriving in order iterating in reverse is
+		// more efficient if there are multiple items for a given flow. This
+		// also enables a natural table.deleteAt() in the
+		// coalesceItemInvalidCSum case without the need for index tracking.
+		// This algorithm makes a best effort to coalesce in the event of
+		// unordered packets, where pkt may land anywhere in items from a
+		// sequence number perspective, however once an item is inserted into
+		// the table it is never compared across other items later.
+		item := items[i]
+		can := tcpPacketsCanCoalesce(pkt, uint8(iphLen), uint8(tcphLen), seq, pshSet, gsoSize, item, bufs, offset)
+		if can != coalesceUnavailable {
+			result := coalesceTCPPackets(can, pkt, pktI, gsoSize, seq, pshSet, &item, bufs, offset, isV6)
+			switch result {
+			case coalesceSuccess:
+				table.updateAt(item, i)
+				return true
+			case coalesceItemInvalidCSum:
+				// delete the item with an invalid csum
+				table.deleteAt(item.key, i)
+			case coalescePktInvalidCSum:
+				// no point in inserting an item that we can't coalesce
+				return false
+			default:
+			}
+		}
+	}
+	// failed to coalesce with any other packets; store the item in the flow
+	table.insert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI)
+	return false
+}
+
+func isTCP4NoIPOptions(b []byte) bool {
+	if len(b) < 40 {
+		return false
+	}
+	if b[0]>>4 != 4 {
+		return false
+	}
+	if b[0]&0x0F != 5 {
+		return false
+	}
+	if b[9] != unix.IPPROTO_TCP {
+		return false
+	}
+	return true
+}
+
+func isTCP6NoEH(b []byte) bool {
+	if len(b) < 60 {
+		return false
+	}
+	if b[0]>>4 != 6 {
+		return false
+	}
+	if b[6] != unix.IPPROTO_TCP {
+		return false
+	}
+	return true
+}
+
+// handleGRO evaluates bufs for GRO, and writes the indices of the resulting
+// packets into toWrite. toWrite, tcp4Table, and tcp6Table should initially be
+// empty (but non-nil), and are passed in to save allocs as the caller may reset
+// and recycle them across vectors of packets.
+func handleGRO(bufs [][]byte, offset int, tcp4Table, tcp6Table *tcpGROTable, toWrite *[]int) error {
+	for i := range bufs {
+		if offset < virtioNetHdrLen || offset > len(bufs[i])-1 {
+			return errors.New("invalid offset")
+		}
+		var coalesced bool
+		switch {
+		case isTCP4NoIPOptions(bufs[i][offset:]): // ipv4 packets w/IP options do not coalesce
+			coalesced = tcpGRO(bufs, offset, i, tcp4Table, false)
+		case isTCP6NoEH(bufs[i][offset:]): // ipv6 packets w/extension headers do not coalesce
+			coalesced = tcpGRO(bufs, offset, i, tcp6Table, true)
+		}
+		if !coalesced {
+			hdr := virtioNetHdr{}
+			err := hdr.encode(bufs[i][offset-virtioNetHdrLen:])
+			if err != nil {
+				return err
+			}
+			*toWrite = append(*toWrite, i)
+		}
+	}
+	return nil
+}
+
+// tcpTSO splits packets from in into outBuffs, writing the size of each
+// element into sizes. It returns the number of buffers populated, and/or an
+// error.
+func tcpTSO(in []byte, hdr virtioNetHdr, outBuffs [][]byte, sizes []int, outOffset int) (int, error) {
+	iphLen := int(hdr.csumStart)
+	srcAddrOffset := ipv6SrcAddrOffset
+	addrLen := 16
+	if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV4 {
+		in[10], in[11] = 0, 0 // clear ipv4 header checksum
+		srcAddrOffset = ipv4SrcAddrOffset
+		addrLen = 4
+	}
+	tcpCSumAt := int(hdr.csumStart + hdr.csumOffset)
+	in[tcpCSumAt], in[tcpCSumAt+1] = 0, 0 // clear tcp checksum
+	firstTCPSeqNum := binary.BigEndian.Uint32(in[hdr.csumStart+4:])
+	nextSegmentDataAt := int(hdr.hdrLen)
+	i := 0
+	for ; nextSegmentDataAt < len(in); i++ {
+		if i == len(outBuffs) {
+			return i - 1, ErrTooManySegments
+		}
+		nextSegmentEnd := nextSegmentDataAt + int(hdr.gsoSize)
+		if nextSegmentEnd > len(in) {
+			nextSegmentEnd = len(in)
+		}
+		segmentDataLen := nextSegmentEnd - nextSegmentDataAt
+		totalLen := int(hdr.hdrLen) + segmentDataLen
+		sizes[i] = totalLen
+		out := outBuffs[i][outOffset:]
+
+		copy(out, in[:iphLen])
+		if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV4 {
+			// For IPv4 we are responsible for incrementing the ID field,
+			// updating the total len field, and recalculating the header
+			// checksum.
+			if i > 0 {
+				id := binary.BigEndian.Uint16(out[4:])
+				id += uint16(i)
+				binary.BigEndian.PutUint16(out[4:], id)
+			}
+			binary.BigEndian.PutUint16(out[2:], uint16(totalLen))
+			ipv4CSum := ^checksum(out[:iphLen], 0)
+			binary.BigEndian.PutUint16(out[10:], ipv4CSum)
+		} else {
+			// For IPv6 we are responsible for updating the payload length field.
+			binary.BigEndian.PutUint16(out[4:], uint16(totalLen-iphLen))
+		}
+
+		// TCP header
+		copy(out[hdr.csumStart:hdr.hdrLen], in[hdr.csumStart:hdr.hdrLen])
+		tcpSeq := firstTCPSeqNum + uint32(hdr.gsoSize*uint16(i))
+		binary.BigEndian.PutUint32(out[hdr.csumStart+4:], tcpSeq)
+		if nextSegmentEnd != len(in) {
+			// FIN and PSH should only be set on last segment
+			clearFlags := tcpFlagFIN | tcpFlagPSH
+			out[hdr.csumStart+tcpFlagsOffset] &^= clearFlags
+		}
+
+		// payload
+		copy(out[hdr.hdrLen:], in[nextSegmentDataAt:nextSegmentEnd])
+
+		// TCP checksum
+		tcpHLen := int(hdr.hdrLen - hdr.csumStart)
+		tcpLenForPseudo := uint16(tcpHLen + segmentDataLen)
+		tcpCSumNoFold := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, in[srcAddrOffset:srcAddrOffset+addrLen], in[srcAddrOffset+addrLen:srcAddrOffset+addrLen*2], tcpLenForPseudo)
+		tcpCSum := ^checksum(out[hdr.csumStart:totalLen], tcpCSumNoFold)
+		binary.BigEndian.PutUint16(out[hdr.csumStart+hdr.csumOffset:], tcpCSum)
+
+		nextSegmentDataAt += int(hdr.gsoSize)
+	}
+	return i, nil
+}
+
+func gsoNoneChecksum(in []byte, cSumStart, cSumOffset uint16) error {
+	cSumAt := cSumStart + cSumOffset
+	// The initial value at the checksum offset should be summed with the
+	// checksum we compute. This is typically the pseudo-header checksum.
+	initial := binary.BigEndian.Uint16(in[cSumAt:])
+	in[cSumAt], in[cSumAt+1] = 0, 0
+	binary.BigEndian.PutUint16(in[cSumAt:], ^checksum(in[cSumStart:], uint64(initial)))
+	return nil
+}
--- a/wgstack/tun/tun.go
+++ b/wgstack/tun/tun.go
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+
+package tun
+
+import (
+	"os"
+)
+
+type Event int
+
+const (
+	EventUp = 1 << iota
+	EventDown
+	EventMTUUpdate
+)
+
+type Device interface {
+	// File returns the file descriptor of the device.
+	File() *os.File
+
+	// Read one or more packets from the Device (without any additional headers).
+	// On a successful read it returns the number of packets read, and sets
+	// packet lengths within the sizes slice. len(sizes) must be >= len(bufs).
+	// A nonzero offset can be used to instruct the Device on where to begin
+	// reading into each element of the bufs slice.
+	Read(bufs [][]byte, sizes []int, offset int) (n int, err error)
+
+	// Write one or more packets to the device (without any additional headers).
+	// On a successful write it returns the number of packets written. A nonzero
+	// offset can be used to instruct the Device on where to begin writing from
+	// each packet contained within the bufs slice.
+	Write(bufs [][]byte, offset int) (int, error)
+
+	// MTU returns the MTU of the Device.
+	MTU() (int, error)
+
+	// Name returns the current name of the Device.
+	Name() (string, error)
+
+	// Events returns a channel of type Event, which is fed Device events.
+	Events() <-chan Event
+
+	// Close stops the Device and closes the Event channel.
+	Close() error
+
+	// BatchSize returns the preferred/max number of packets that can be read or
+	// written in a single read/write call. BatchSize must not change over the
+	// lifetime of a Device.
+	BatchSize() int
+}
--- a/wgstack/tun/tun_linux.go
+++ b/wgstack/tun/tun_linux.go
@@ -0,0 +1,664 @@
+//go:build linux
+
+// SPDX-License-Identifier: MIT
+//
+// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
+
+package tun
+
+/* Implementation of the TUN device interface for linux
+ */
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"sync"
+	"syscall"
+	"time"
+	"unsafe"
+
+	wgconn "github.com/slackhq/nebula/wgstack/conn"
+	"golang.org/x/sys/unix"
+	"golang.zx2c4.com/wireguard/rwcancel"
+)
+
+const (
+	cloneDevicePath = "/dev/net/tun"
+	ifReqSize       = unix.IFNAMSIZ + 64
+)
+
+type NativeTun struct {
+	tunFile                 *os.File
+	index                   int32      // if index
+	errors                  chan error // async error handling
+	events                  chan Event // device related events
+	netlinkSock             int
+	netlinkCancel           *rwcancel.RWCancel
+	hackListenerClosed      sync.Mutex
+	statusListenersShutdown chan struct{}
+	batchSize               int
+	vnetHdr                 bool
+
+	closeOnce sync.Once
+
+	nameOnce  sync.Once // guards calling initNameCache, which sets following fields
+	nameCache string    // name of interface
+	nameErr   error
+
+	readOpMu sync.Mutex                    // readOpMu guards readBuff
+	readBuff [virtioNetHdrLen + 65535]byte // if vnetHdr every read() is prefixed by virtioNetHdr
+
+	writeOpMu                  sync.Mutex // writeOpMu guards toWrite, tcp4GROTable, tcp6GROTable
+	toWrite                    []int
+	tcp4GROTable, tcp6GROTable *tcpGROTable
+}
+
+func (tun *NativeTun) File() *os.File {
+	return tun.tunFile
+}
+
+func (tun *NativeTun) routineHackListener() {
+	defer tun.hackListenerClosed.Unlock()
+	/* This is needed for the detection to work across network namespaces
+	 * If you are reading this and know a better method, please get in touch.
+	 */
+	last := 0
+	const (
+		up   = 1
+		down = 2
+	)
+	for {
+		sysconn, err := tun.tunFile.SyscallConn()
+		if err != nil {
+			return
+		}
+		err2 := sysconn.Control(func(fd uintptr) {
+			_, err = unix.Write(int(fd), nil)
+		})
+		if err2 != nil {
+			return
+		}
+		switch err {
+		case unix.EINVAL:
+			if last != up {
+				// If the tunnel is up, it reports that write() is
+				// allowed but we provided invalid data.
+				tun.events <- EventUp
+				last = up
+			}
+		case unix.EIO:
+			if last != down {
+				// If the tunnel is down, it reports that no I/O
+				// is possible, without checking our provided data.
+				tun.events <- EventDown
+				last = down
+			}
+		default:
+			return
+		}
+		select {
+		case <-time.After(time.Second):
+			// nothing
+		case <-tun.statusListenersShutdown:
+			return
+		}
+	}
+}
+
+func createNetlinkSocket() (int, error) {
+	sock, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW|unix.SOCK_CLOEXEC, unix.NETLINK_ROUTE)
+	if err != nil {
+		return -1, err
+	}
+	saddr := &unix.SockaddrNetlink{
+		Family: unix.AF_NETLINK,
+		Groups: unix.RTMGRP_LINK | unix.RTMGRP_IPV4_IFADDR | unix.RTMGRP_IPV6_IFADDR,
+	}
+	err = unix.Bind(sock, saddr)
+	if err != nil {
+		return -1, err
+	}
+	return sock, nil
+}
+
+func (tun *NativeTun) routineNetlinkListener() {
+	defer func() {
+		unix.Close(tun.netlinkSock)
+		tun.hackListenerClosed.Lock()
+		close(tun.events)
+		tun.netlinkCancel.Close()
+	}()
+
+	for msg := make([]byte, 1<<16); ; {
+		var err error
+		var msgn int
+		for {
+			msgn, _, _, _, err = unix.Recvmsg(tun.netlinkSock, msg[:], nil, 0)
+			if err == nil || !rwcancel.RetryAfterError(err) {
+				break
+			}
+			if !tun.netlinkCancel.ReadyRead() {
+				tun.errors <- fmt.Errorf("netlink socket closed: %w", err)
+				return
+			}
+		}
+		if err != nil {
+			tun.errors <- fmt.Errorf("failed to receive netlink message: %w", err)
+			return
+		}
+
+		select {
+		case <-tun.statusListenersShutdown:
+			return
+		default:
+		}
+
+		wasEverUp := false
+		for remain := msg[:msgn]; len(remain) >= unix.SizeofNlMsghdr; {
+
+			hdr := *(*unix.NlMsghdr)(unsafe.Pointer(&remain[0]))
+
+			if int(hdr.Len) > len(remain) {
+				break
+			}
+
+			switch hdr.Type {
+			case unix.NLMSG_DONE:
+				remain = []byte{}
+
+			case unix.RTM_NEWLINK:
+				info := *(*unix.IfInfomsg)(unsafe.Pointer(&remain[unix.SizeofNlMsghdr]))
+				remain = remain[hdr.Len:]
+
+				if info.Index != tun.index {
+					// not our interface
+					continue
+				}
+
+				if info.Flags&unix.IFF_RUNNING != 0 {
+					tun.events <- EventUp
+					wasEverUp = true
+				}
+
+				if info.Flags&unix.IFF_RUNNING == 0 {
+					// Don't emit EventDown before we've ever emitted EventUp.
+					// This avoids a startup race with HackListener, which
+					// might detect Up before we have finished reporting Down.
+					if wasEverUp {
+						tun.events <- EventDown
+					}
+				}
+
+				tun.events <- EventMTUUpdate
+
+			default:
+				remain = remain[hdr.Len:]
+			}
+		}
+	}
+}
+
+func getIFIndex(name string) (int32, error) {
+	fd, err := unix.Socket(
+		unix.AF_INET,
+		unix.SOCK_DGRAM|unix.SOCK_CLOEXEC,
+		0,
+	)
+	if err != nil {
+		return 0, err
+	}
+
+	defer unix.Close(fd)
+
+	var ifr [ifReqSize]byte
+	copy(ifr[:], name)
+	_, _, errno := unix.Syscall(
+		unix.SYS_IOCTL,
+		uintptr(fd),
+		uintptr(unix.SIOCGIFINDEX),
+		uintptr(unsafe.Pointer(&ifr[0])),
+	)
+
+	if errno != 0 {
+		return 0, errno
+	}
+
+	return *(*int32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])), nil
+}
+
+func (tun *NativeTun) setMTU(n int) error {
+	name, err := tun.Name()
+	if err != nil {
+		return err
+	}
+
+	// open datagram socket
+	fd, err := unix.Socket(
+		unix.AF_INET,
+		unix.SOCK_DGRAM|unix.SOCK_CLOEXEC,
+		0,
+	)
+	if err != nil {
+		return err
+	}
+	defer unix.Close(fd)
+
+	var ifr [ifReqSize]byte
+	copy(ifr[:], name)
+	*(*uint32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])) = uint32(n)
+
+	_, _, errno := unix.Syscall(
+		unix.SYS_IOCTL,
+		uintptr(fd),
+		uintptr(unix.SIOCSIFMTU),
+		uintptr(unsafe.Pointer(&ifr[0])),
+	)
+
+	if errno != 0 {
+		return errno
+	}
+	return nil
+}
+
+func (tun *NativeTun) routineNetlinkRead() {
+	defer func() {
+		unix.Close(tun.netlinkSock)
+		tun.hackListenerClosed.Lock()
+		close(tun.events)
+		tun.netlinkCancel.Close()
+	}()
+
+	for msg := make([]byte, 1<<16); ; {
+		var err error
+		var msgn int
+		for {
+			msgn, _, _, _, err = unix.Recvmsg(tun.netlinkSock, msg[:], nil, 0)
+			if err == nil || !rwcancel.RetryAfterError(err) {
+				break
+			}
+			if !tun.netlinkCancel.ReadyRead() {
+				tun.errors <- fmt.Errorf("netlink socket closed: %w", err)
+				return
+			}
+		}
+		if err != nil {
+			tun.errors <- fmt.Errorf("failed to receive netlink message: %w", err)
+			return
+		}
+
+		wasEverUp := false
+		for remain := msg[:msgn]; len(remain) >= unix.SizeofNlMsghdr; {
+
+			hdr := *(*unix.NlMsghdr)(unsafe.Pointer(&remain[0]))
+
+			if int(hdr.Len) > len(remain) {
+				break
+			}
+
+			switch hdr.Type {
+			case unix.NLMSG_DONE:
+				remain = []byte{}
+
+			case unix.RTM_NEWLINK:
+				info := *(*unix.IfInfomsg)(unsafe.Pointer(&remain[unix.SizeofNlMsghdr]))
+				remain = remain[hdr.Len:]
+
+				if info.Index != tun.index {
+					continue
+				}
+
+				if info.Flags&unix.IFF_RUNNING != 0 {
+					tun.events <- EventUp
+					wasEverUp = true
+				}
+
+				if info.Flags&unix.IFF_RUNNING == 0 {
+					if wasEverUp {
+						tun.events <- EventDown
+					}
+				}
+				tun.events <- EventMTUUpdate
+
+			default:
+				remain = remain[hdr.Len:]
+			}
+		}
+	}
+}
+
+func (tun *NativeTun) routineNetlink() {
+	var err error
+
+	tun.netlinkSock, err = createNetlinkSocket()
+	if err != nil {
+		tun.errors <- fmt.Errorf("failed to create netlink socket: %w", err)
+		return
+	}
+
+	tun.netlinkCancel, err = rwcancel.NewRWCancel(tun.netlinkSock)
+	if err != nil {
+		tun.errors <- fmt.Errorf("failed to create netlink cancel: %w", err)
+		return
+	}
+
+	go tun.routineNetlinkListener()
+}
+
+func (tun *NativeTun) Close() error {
+	var err1, err2 error
+	tun.closeOnce.Do(func() {
+		if tun.statusListenersShutdown != nil {
+			close(tun.statusListenersShutdown)
+			if tun.netlinkCancel != nil {
+				err1 = tun.netlinkCancel.Cancel()
+			}
+		} else if tun.events != nil {
+			close(tun.events)
+		}
+		err2 = tun.tunFile.Close()
+	})
+	if err1 != nil {
+		return err1
+	}
+	return err2
+}
+
+func (tun *NativeTun) BatchSize() int {
+	return tun.batchSize
+}
+
+const (
+	// TODO: support TSO with ECN bits
+	tunOffloads = unix.TUN_F_CSUM | unix.TUN_F_TSO4 | unix.TUN_F_TSO6
+)
+
+func (tun *NativeTun) initFromFlags(name string) error {
+	sc, err := tun.tunFile.SyscallConn()
+	if err != nil {
+		return err
+	}
+	if e := sc.Control(func(fd uintptr) {
+		var (
+			ifr *unix.Ifreq
+		)
+		ifr, err = unix.NewIfreq(name)
+		if err != nil {
+			return
+		}
+		err = unix.IoctlIfreq(int(fd), unix.TUNGETIFF, ifr)
+		if err != nil {
+			return
+		}
+		got := ifr.Uint16()
+		if got&unix.IFF_VNET_HDR != 0 {
+			err = unix.IoctlSetInt(int(fd), unix.TUNSETOFFLOAD, tunOffloads)
+			if err != nil {
+				return
+			}
+			tun.vnetHdr = true
+			tun.batchSize = wgconn.IdealBatchSize
+		} else {
+			tun.batchSize = 1
+		}
+	}); e != nil {
+		return e
+	}
+	return err
+}
+
+// CreateTUN creates a Device with the provided name and MTU.
+func CreateTUN(name string, mtu int) (Device, error) {
+	nfd, err := unix.Open(cloneDevicePath, unix.O_RDWR|unix.O_CLOEXEC, 0)
+	if err != nil {
+		return nil, fmt.Errorf("CreateTUN(%q) failed; %s does not exist", name, cloneDevicePath)
+	}
+	fd := os.NewFile(uintptr(nfd), cloneDevicePath)
+	tun, err := CreateTUNFromFile(fd, mtu)
+	if err != nil {
+		return nil, err
+	}
+	if name != "tun" {
+		if err := tun.(*NativeTun).initFromFlags(name); err != nil {
+			tun.Close()
+			return nil, fmt.Errorf("CreateTUN(%q) failed to set flags: %w", name, err)
+		}
+	}
+	return tun, nil
+}
+
+// CreateTUNFromFile creates a Device from an os.File with the provided MTU.
+func CreateTUNFromFile(file *os.File, mtu int) (Device, error) {
+	tun := &NativeTun{
+		tunFile: file,
+		errors:  make(chan error, 5),
+		events:  make(chan Event, 5),
+	}
+
+	name, err := tun.Name()
+	if err != nil {
+		return nil, fmt.Errorf("failed to determine TUN name: %w", err)
+	}
+
+	if err := tun.initFromFlags(name); err != nil {
+		return nil, fmt.Errorf("failed to query TUN flags: %w", err)
+	}
+
+	if tun.batchSize == 0 {
+		tun.batchSize = 1
+	}
+
+	tun.index, err = getIFIndex(name)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get TUN index: %w", err)
+	}
+
+	if err = tun.setMTU(mtu); err != nil {
+		return nil, fmt.Errorf("failed to set MTU: %w", err)
+	}
+
+	tun.statusListenersShutdown = make(chan struct{})
+	go tun.routineNetlink()
+
+	if tun.batchSize == 0 {
+		tun.batchSize = 1
+	}
+
+	tun.tcp4GROTable = newTCPGROTable()
+	tun.tcp6GROTable = newTCPGROTable()
+
+	return tun, nil
+}
+
+func (tun *NativeTun) Name() (string, error) {
+	tun.nameOnce.Do(tun.initNameCache)
+	return tun.nameCache, tun.nameErr
+}
+
+func (tun *NativeTun) initNameCache() {
+	sysconn, err := tun.tunFile.SyscallConn()
+	if err != nil {
+		tun.nameErr = err
+		return
+	}
+	err = sysconn.Control(func(fd uintptr) {
+		var ifr [ifReqSize]byte
+		_, _, errno := unix.Syscall(
+			unix.SYS_IOCTL,
+			fd,
+			uintptr(unix.TUNGETIFF),
+			uintptr(unsafe.Pointer(&ifr[0])),
+		)
+		if errno != 0 {
+			tun.nameErr = errno
+			return
+		}
+		tun.nameCache = unix.ByteSliceToString(ifr[:])
+	})
+	if err != nil && tun.nameErr == nil {
+		tun.nameErr = err
+	}
+}
+
+func (tun *NativeTun) MTU() (int, error) {
+	name, err := tun.Name()
+	if err != nil {
+		return 0, err
+	}
+
+	// open datagram socket
+	fd, err := unix.Socket(
+		unix.AF_INET,
+		unix.SOCK_DGRAM|unix.SOCK_CLOEXEC,
+		0,
+	)
+	if err != nil {
+		return 0, err
+	}
+	defer unix.Close(fd)
+
+	var ifr [ifReqSize]byte
+	copy(ifr[:], name)
+
+	_, _, errno := unix.Syscall(
+		unix.SYS_IOCTL,
+		uintptr(fd),
+		uintptr(unix.SIOCGIFMTU),
+		uintptr(unsafe.Pointer(&ifr[0])),
+	)
+
+	if errno != 0 {
+		return 0, errno
+	}
+
+	return int(*(*uint32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ]))), nil
+}
+
+func (tun *NativeTun) Events() <-chan Event {
+	return tun.events
+}
+
+func (tun *NativeTun) Write(bufs [][]byte, offset int) (int, error) {
+	tun.writeOpMu.Lock()
+	defer func() {
+		tun.tcp4GROTable.reset()
+		tun.tcp6GROTable.reset()
+		tun.writeOpMu.Unlock()
+	}()
+	var (
+		errs  error
+		total int
+	)
+	tun.toWrite = tun.toWrite[:0]
+	if tun.vnetHdr {
+		err := handleGRO(bufs, offset, tun.tcp4GROTable, tun.tcp6GROTable, &tun.toWrite)
+		if err != nil {
+			return 0, err
+		}
+		offset -= virtioNetHdrLen
+	} else {
+		for i := range bufs {
+			tun.toWrite = append(tun.toWrite, i)
+		}
+	}
+	for _, bufsI := range tun.toWrite {
+		n, err := tun.tunFile.Write(bufs[bufsI][offset:])
+		if errors.Is(err, syscall.EBADFD) {
+			return total, os.ErrClosed
+		}
+		if err != nil {
+			errs = errors.Join(errs, err)
+		} else {
+			total += n
+		}
+	}
+	return total, errs
+}
+
+// handleVirtioRead splits in into bufs, leaving offset bytes at the front of
+// each buffer. It mutates sizes to reflect the size of each element of bufs,
+// and returns the number of packets read.
+func handleVirtioRead(in []byte, bufs [][]byte, sizes []int, offset int) (int, error) {
+	var hdr virtioNetHdr
+	if err := hdr.decode(in); err != nil {
+		return 0, err
+	}
+	in = in[virtioNetHdrLen:]
+	if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_NONE {
+		if hdr.flags&unix.VIRTIO_NET_HDR_F_NEEDS_CSUM != 0 {
+			if err := gsoNoneChecksum(in, hdr.csumStart, hdr.csumOffset); err != nil {
+				return 0, err
+			}
+		}
+		if len(in) > len(bufs[0][offset:]) {
+			return 0, fmt.Errorf("read len %d overflows bufs element len %d", len(in), len(bufs[0][offset:]))
+		}
+		n := copy(bufs[0][offset:], in)
+		sizes[0] = n
+		return 1, nil
+	}
+	if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 {
+		return 0, fmt.Errorf("unsupported virtio GSO type: %d", hdr.gsoType)
+	}
+
+	ipVersion := in[0] >> 4
+	switch ipVersion {
+	case 4:
+		if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 {
+			return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType)
+		}
+	case 6:
+		if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 {
+			return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType)
+		}
+	default:
+		return 0, fmt.Errorf("invalid ip header version: %d", ipVersion)
+	}
+
+	if len(in) <= int(hdr.csumStart+12) {
+		return 0, errors.New("packet is too short")
+	}
+	tcpHLen := uint16(in[hdr.csumStart+12] >> 4 * 4)
+	if tcpHLen < 20 || tcpHLen > 60 {
+		return 0, fmt.Errorf("tcp header len is invalid: %d", tcpHLen)
+	}
+	hdr.hdrLen = hdr.csumStart + tcpHLen
+	if len(in) < int(hdr.hdrLen) {
+		return 0, fmt.Errorf("length of packet (%d) < virtioNetHdr.hdrLen (%d)", len(in), hdr.hdrLen)
+	}
+	if hdr.hdrLen < hdr.csumStart {
+		return 0, fmt.Errorf("virtioNetHdr.hdrLen (%d) < virtioNetHdr.csumStart (%d)", hdr.hdrLen, hdr.csumStart)
+	}
+	cSumAt := int(hdr.csumStart + hdr.csumOffset)
+	if cSumAt+1 >= len(in) {
+		return 0, fmt.Errorf("end of checksum offset (%d) exceeds packet length (%d)", cSumAt+1, len(in))
+	}
+
+	return tcpTSO(in, hdr, bufs, sizes, offset)
+}
+
+func (tun *NativeTun) Read(bufs [][]byte, sizes []int, offset int) (int, error) {
+	tun.readOpMu.Lock()
+	defer tun.readOpMu.Unlock()
+	select {
+	case err := <-tun.errors:
+		return 0, err
+	default:
+		readInto := bufs[0][offset:]
+		if tun.vnetHdr {
+			readInto = tun.readBuff[:]
+		}
+		n, err := tun.tunFile.Read(readInto)
+		if errors.Is(err, syscall.EBADFD) {
+			err = os.ErrClosed
+		}
+		if err != nil {
+			return 0, err
+		}
+		if tun.vnetHdr {
+			return handleVirtioRead(readInto[:n], bufs, sizes, offset)
+		}
+		sizes[0] = n
+		return 1, nil
+	}
+}
Author	SHA1	Message	Date
JackDoan	e7423d39f9	cursed	2025-11-06 09:18:33 -06:00
JackDoan	befba57366	hmmm	2025-11-05 15:38:47 -06:00
Ryan	2d128a3254	add locking for stop crash	2025-11-05 11:58:25 -05:00
Ryan	c8980d34cf	fixes	2025-11-05 10:54:08 -05:00
Ryan	98f264cf14	works well	2025-11-04 19:33:52 -05:00
Ryan	aa44f4c7c9	hmmmmmm it works i guess maybe	2025-11-04 16:08:31 -05:00
Ryan Huber	419157c407	passes traffic	2025-11-04 04:50:35 +00:00
Ryan Huber	0864852d33	updated bind	2025-11-04 04:39:07 +00:00
Ryan Huber	2b5aec9a18	updated udp	2025-11-04 04:34:59 +00:00
Ryan Huber	f0665bee20	pem.go restored	2025-11-04 04:32:22 +00:00
Ryan Huber	11da0baab1	quick fix	2025-11-04 04:21:27 +00:00
Ryan Huber	608904b9dd	add new files for compat layer	2025-11-04 04:10:51 +00:00
Ryan Huber	fd1c52127f	first try	2025-11-04 04:00:29 +00:00