mirror of
https://github.com/slackhq/nebula.git
synced 2026-05-16 04:47:38 +02:00
no allocs
This commit is contained in:
@@ -10,6 +10,7 @@ import (
|
|||||||
"net"
|
"net"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
@@ -44,8 +45,14 @@ type tunFile struct {
|
|||||||
segBuf []byte // backing store for segmented output
|
segBuf []byte // backing store for segmented output
|
||||||
pending [][]byte // segments waiting to be drained by Read
|
pending [][]byte // segments waiting to be drained by Read
|
||||||
pendingIdx int
|
pendingIdx int
|
||||||
|
writeIovs [2]unix.Iovec // preallocated iovecs for vnetHdr writes; iovs[0] is fixed to zeroVnetHdr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// zeroVnetHdr is the 10-byte virtio_net_hdr we prepend to every TUN write when
|
||||||
|
// IFF_VNET_HDR is active. All-zero signals "no GSO, no checksum offload"; the
|
||||||
|
// kernel accepts the packet as-is.
|
||||||
|
var zeroVnetHdr [virtioNetHdrLen]byte
|
||||||
|
|
||||||
// newFriend makes a tunFile for a MultiQueueReader that copies the shutdown eventfd from the parent tun
|
// newFriend makes a tunFile for a MultiQueueReader that copies the shutdown eventfd from the parent tun
|
||||||
func (r *tunFile) newFriend(fd int) (*tunFile, error) {
|
func (r *tunFile) newFriend(fd int) (*tunFile, error) {
|
||||||
if err := unix.SetNonblock(fd, true); err != nil {
|
if err := unix.SetNonblock(fd, true); err != nil {
|
||||||
@@ -67,6 +74,8 @@ func (r *tunFile) newFriend(fd int) (*tunFile, error) {
|
|||||||
if r.vnetHdr {
|
if r.vnetHdr {
|
||||||
out.readBuf = make([]byte, tunReadBufSize)
|
out.readBuf = make([]byte, tunReadBufSize)
|
||||||
out.segBuf = make([]byte, tunSegBufSize)
|
out.segBuf = make([]byte, tunSegBufSize)
|
||||||
|
out.writeIovs[0].Base = &zeroVnetHdr[0]
|
||||||
|
out.writeIovs[0].SetLen(virtioNetHdrLen)
|
||||||
}
|
}
|
||||||
return out, nil
|
return out, nil
|
||||||
}
|
}
|
||||||
@@ -98,6 +107,8 @@ func newTunFd(fd int, vnetHdr bool) (*tunFile, error) {
|
|||||||
if vnetHdr {
|
if vnetHdr {
|
||||||
out.readBuf = make([]byte, tunReadBufSize)
|
out.readBuf = make([]byte, tunReadBufSize)
|
||||||
out.segBuf = make([]byte, tunSegBufSize)
|
out.segBuf = make([]byte, tunSegBufSize)
|
||||||
|
out.writeIovs[0].Base = &zeroVnetHdr[0]
|
||||||
|
out.writeIovs[0].SetLen(virtioNetHdrLen)
|
||||||
}
|
}
|
||||||
|
|
||||||
return out, nil
|
return out, nil
|
||||||
@@ -203,10 +214,6 @@ func (r *tunFile) Read(buf []byte) (int, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// zeroVnetHdr is the prefix we prepend to every write when IFF_VNET_HDR is
|
|
||||||
// active and we have no offload info to convey.
|
|
||||||
var zeroVnetHdr [virtioNetHdrLen]byte
|
|
||||||
|
|
||||||
func (r *tunFile) Write(buf []byte) (int, error) {
|
func (r *tunFile) Write(buf []byte) (int, error) {
|
||||||
if !r.vnetHdr {
|
if !r.vnetHdr {
|
||||||
for {
|
for {
|
||||||
@@ -225,25 +232,34 @@ func (r *tunFile) Write(buf []byte) (int, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
iovs := [][]byte{zeroVnetHdr[:], buf}
|
if len(buf) == 0 {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
// Point the payload iovec at the caller's buffer. iovs[0] is pre-wired
|
||||||
|
// to zeroVnetHdr during tunFile construction so we don't rebuild it here.
|
||||||
|
r.writeIovs[1].Base = &buf[0]
|
||||||
|
r.writeIovs[1].SetLen(len(buf))
|
||||||
|
iovPtr := uintptr(unsafe.Pointer(&r.writeIovs[0]))
|
||||||
for {
|
for {
|
||||||
n, err := unix.Writev(r.fd, iovs)
|
n, _, errno := unix.Syscall(unix.SYS_WRITEV, uintptr(r.fd), iovPtr, 2)
|
||||||
if err == nil {
|
if errno == 0 {
|
||||||
if n < virtioNetHdrLen {
|
runtime.KeepAlive(buf)
|
||||||
|
if int(n) < virtioNetHdrLen {
|
||||||
return 0, io.ErrShortWrite
|
return 0, io.ErrShortWrite
|
||||||
}
|
}
|
||||||
return n - virtioNetHdrLen, nil
|
return int(n) - virtioNetHdrLen, nil
|
||||||
}
|
}
|
||||||
if err == unix.EAGAIN {
|
if errno == unix.EAGAIN {
|
||||||
if err = r.blockOnWrite(); err != nil {
|
if err := r.blockOnWrite(); err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if err == unix.EINTR {
|
if errno == unix.EINTR {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
return 0, err
|
runtime.KeepAlive(buf)
|
||||||
|
return 0, errno
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ package overlay
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
@@ -245,3 +246,32 @@ func TestSegmentRejectsUDP(t *testing.T) {
|
|||||||
t.Fatalf("expected rejection for UDP GSO")
|
t.Fatalf("expected rejection for UDP GSO")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestTunFileWriteVnetHdrNoAlloc verifies the IFF_VNET_HDR fast-path write is
|
||||||
|
// allocation-free. We write to /dev/null so every call succeeds synchronously.
|
||||||
|
func TestTunFileWriteVnetHdrNoAlloc(t *testing.T) {
|
||||||
|
fd, err := unix.Open("/dev/null", os.O_WRONLY, 0)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("open /dev/null: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() { _ = unix.Close(fd) })
|
||||||
|
|
||||||
|
tf := &tunFile{fd: fd, vnetHdr: true}
|
||||||
|
tf.writeIovs[0].Base = &zeroVnetHdr[0]
|
||||||
|
tf.writeIovs[0].SetLen(virtioNetHdrLen)
|
||||||
|
|
||||||
|
payload := make([]byte, 1400)
|
||||||
|
// Warm up (first call may trigger one-time internal allocations elsewhere).
|
||||||
|
if _, err := tf.Write(payload); err != nil {
|
||||||
|
t.Fatalf("Write: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
allocs := testing.AllocsPerRun(1000, func() {
|
||||||
|
if _, err := tf.Write(payload); err != nil {
|
||||||
|
t.Fatalf("Write: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
if allocs != 0 {
|
||||||
|
t.Fatalf("Write allocated %.1f times per call, want 0", allocs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user