mirror of
https://github.com/slackhq/nebula.git
synced 2026-05-16 04:47:38 +02:00
holy crap 2x
This commit is contained in:
@@ -30,3 +30,22 @@ type Device interface {
|
||||
SupportsMultiqueue() bool
|
||||
NewMultiQueueReader() (Queue, error)
|
||||
}
|
||||
|
||||
// GSOWriter is implemented by Queues that can write a TCP TSO superpacket as
|
||||
// a single virtio_net_hdr + payload writev, letting the kernel segment on
|
||||
// egress. Callers type-assert on it; backends that don't support GSO return
|
||||
// false from Supported and all coalescing logic is skipped.
|
||||
//
|
||||
// pkt must contain the IPv4/IPv6 + TCP header plus the concatenated
|
||||
// coalesced payload. hdrLen is the total L3+L4 header length (where the
|
||||
// payload starts). csumStart is the byte offset where the TCP header
|
||||
// begins (= IP header length). gsoSize is the MSS — every segment except
|
||||
// possibly the last must be exactly this many payload bytes. isV6 selects
|
||||
// GSO_TCPV4 vs GSO_TCPV6.
|
||||
//
|
||||
// pkt's TCP checksum field must already hold the pseudo-header partial
|
||||
// sum (single-fold, not inverted), per virtio NEEDS_CSUM semantics.
|
||||
type GSOWriter interface {
|
||||
WriteGSO(pkt []byte, gsoSize uint16, isV6 bool, hdrLen, csumStart uint16) error
|
||||
GSOSupported() bool
|
||||
}
|
||||
|
||||
@@ -48,6 +48,12 @@ type tunFile struct {
|
||||
pending [][]byte // segments waiting to be drained by Read
|
||||
pendingIdx int
|
||||
writeIovs [2]unix.Iovec // preallocated iovecs for vnetHdr writes; iovs[0] is fixed to zeroVnetHdr
|
||||
|
||||
// gsoHdrBuf is a per-queue 10-byte scratch for the virtio_net_hdr emitted
|
||||
// by WriteGSO. Separate from zeroVnetHdr so a concurrent non-GSO Write on
|
||||
// another queue never observes a half-written header.
|
||||
gsoHdrBuf [virtioNetHdrLen]byte
|
||||
gsoIovs [2]unix.Iovec
|
||||
}
|
||||
|
||||
// zeroVnetHdr is the 10-byte virtio_net_hdr we prepend to every TUN write when
|
||||
@@ -78,6 +84,8 @@ func (r *tunFile) newFriend(fd int) (*tunFile, error) {
|
||||
out.segBuf = make([]byte, tunSegBufCap)
|
||||
out.writeIovs[0].Base = &zeroVnetHdr[0]
|
||||
out.writeIovs[0].SetLen(virtioNetHdrLen)
|
||||
out.gsoIovs[0].Base = &out.gsoHdrBuf[0]
|
||||
out.gsoIovs[0].SetLen(virtioNetHdrLen)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
@@ -111,6 +119,8 @@ func newTunFd(fd int, vnetHdr bool) (*tunFile, error) {
|
||||
out.segBuf = make([]byte, tunSegBufCap)
|
||||
out.writeIovs[0].Base = &zeroVnetHdr[0]
|
||||
out.writeIovs[0].SetLen(virtioNetHdrLen)
|
||||
out.gsoIovs[0].Base = &out.gsoHdrBuf[0]
|
||||
out.gsoIovs[0].SetLen(virtioNetHdrLen)
|
||||
}
|
||||
|
||||
return out, nil
|
||||
@@ -331,6 +341,64 @@ func (r *tunFile) Write(buf []byte) (int, error) {
|
||||
}
|
||||
}
|
||||
|
||||
// GSOSupported reports whether this queue was opened with IFF_VNET_HDR and
|
||||
// can accept WriteGSO. When false, callers should fall back to per-segment
|
||||
// Write calls.
|
||||
func (r *tunFile) GSOSupported() bool { return r.vnetHdr }
|
||||
|
||||
// WriteGSO emits pkt as a single TCP TSO superpacket via writev. pkt must
|
||||
// contain a full IPv4/IPv6 + TCP header prefix followed by the concatenated
|
||||
// coalesced payload. The TCP checksum field must already hold the
|
||||
// pseudo-header partial (NEEDS_CSUM semantics). gsoSize is the MSS; every
|
||||
// segment except the last must be exactly that many payload bytes.
|
||||
func (r *tunFile) WriteGSO(pkt []byte, gsoSize uint16, isV6 bool, hdrLen, csumStart uint16) error {
|
||||
if !r.vnetHdr {
|
||||
return fmt.Errorf("WriteGSO called on tun without IFF_VNET_HDR")
|
||||
}
|
||||
if len(pkt) == 0 {
|
||||
return nil
|
||||
}
|
||||
hdr := virtioNetHdr{
|
||||
Flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM,
|
||||
HdrLen: hdrLen,
|
||||
GSOSize: gsoSize,
|
||||
CsumStart: csumStart,
|
||||
CsumOffset: 16, // TCP checksum field lives 16 bytes into the TCP header
|
||||
}
|
||||
if isV6 {
|
||||
hdr.GSOType = unix.VIRTIO_NET_HDR_GSO_TCPV6
|
||||
} else {
|
||||
hdr.GSOType = unix.VIRTIO_NET_HDR_GSO_TCPV4
|
||||
}
|
||||
hdr.encode(r.gsoHdrBuf[:])
|
||||
|
||||
r.gsoIovs[1].Base = &pkt[0]
|
||||
r.gsoIovs[1].SetLen(len(pkt))
|
||||
iovPtr := uintptr(unsafe.Pointer(&r.gsoIovs[0]))
|
||||
for {
|
||||
n, _, errno := syscall.RawSyscall(unix.SYS_WRITEV, uintptr(r.fd), iovPtr, 2)
|
||||
if errno == 0 {
|
||||
runtime.KeepAlive(pkt)
|
||||
if int(n) < virtioNetHdrLen {
|
||||
return io.ErrShortWrite
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if errno == unix.EAGAIN {
|
||||
runtime.KeepAlive(pkt)
|
||||
if err := r.blockOnWrite(); err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
if errno == unix.EINTR {
|
||||
continue
|
||||
}
|
||||
runtime.KeepAlive(pkt)
|
||||
return errno
|
||||
}
|
||||
}
|
||||
|
||||
func (r *tunFile) wakeForShutdown() error {
|
||||
var buf [8]byte
|
||||
binary.NativeEndian.PutUint64(buf[:], 1)
|
||||
|
||||
@@ -54,6 +54,18 @@ func (h *virtioNetHdr) decode(b []byte) {
|
||||
h.CsumOffset = binary.NativeEndian.Uint16(b[8:10])
|
||||
}
|
||||
|
||||
// encode is the inverse of decode: writes the virtio_net_hdr fields into b
|
||||
// (must be at least virtioNetHdrLen bytes). Used to emit a TSO superpacket
|
||||
// on egress.
|
||||
func (h *virtioNetHdr) encode(b []byte) {
|
||||
b[0] = h.Flags
|
||||
b[1] = h.GSOType
|
||||
binary.NativeEndian.PutUint16(b[2:4], h.HdrLen)
|
||||
binary.NativeEndian.PutUint16(b[4:6], h.GSOSize)
|
||||
binary.NativeEndian.PutUint16(b[6:8], h.CsumStart)
|
||||
binary.NativeEndian.PutUint16(b[8:10], h.CsumOffset)
|
||||
}
|
||||
|
||||
// segmentInto splits a TUN-side packet described by hdr into one or more
|
||||
// IP packets, each appended to *out as a slice of scratch. scratch must be
|
||||
// sized to hold every segment (including replicated headers).
|
||||
|
||||
Reference in New Issue
Block a user