mirror of
https://github.com/slackhq/nebula.git
synced 2026-05-16 12:57:38 +02:00
pretty spicy
This commit is contained in:
170
udp/udp_linux.go
170
udp/udp_linux.go
@@ -38,6 +38,18 @@ type StdConn struct {
|
||||
writeSent int
|
||||
writeErrno syscall.Errno
|
||||
writeFunc func(fd uintptr) bool
|
||||
|
||||
// UDP GSO (sendmsg with UDP_SEGMENT cmsg) support. gsoSupported is
|
||||
// probed once at socket creation. When true, WriteSegmented takes a
|
||||
// single-syscall GSO path; otherwise it falls back to a WriteTo loop.
|
||||
gsoSupported bool
|
||||
gsoMsg msghdr
|
||||
gsoIovs []iovec
|
||||
gsoName []byte // SizeofSockaddrInet6
|
||||
gsoCmsg []byte // CmsgSpace(2)
|
||||
gsoSent int
|
||||
gsoErrno syscall.Errno
|
||||
gsoFunc func(fd uintptr) bool
|
||||
}
|
||||
|
||||
func setReusePort(network, address string, c syscall.RawConn) error {
|
||||
@@ -87,9 +99,58 @@ func NewListener(l *logrus.Logger, ip netip.Addr, port int, multi bool, batch in
|
||||
out.prepareWriteMessages(MaxWriteBatch)
|
||||
out.writeFunc = out.sendmmsgRawWrite
|
||||
|
||||
out.prepareGSO()
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// maxGSOSegments caps the per-sendmsg GSO fan-out. Linux kernels have
|
||||
// historically capped UDP_MAX_SEGMENTS at 64; newer kernels raise it to 128
|
||||
// but we stay conservative so the same code works everywhere.
|
||||
const maxGSOSegments = 64
|
||||
|
||||
// maxGSOBytes bounds the total payload per sendmsg() when UDP_SEGMENT is
|
||||
// set. The kernel stitches all iovecs into a single skb whose length the
|
||||
// UDP length field can represent, and also enforces sk_gso_max_size (which
|
||||
// on most devices is 65536). We use 65535 so ciphertext + headers always
|
||||
// fits, avoiding EMSGSIZE on large TSO superpackets.
|
||||
const maxGSOBytes = 65535
|
||||
|
||||
// prepareGSO probes UDP_SEGMENT support and, on success, sets up the
|
||||
// reusable sendmsg scratch (iovecs, sockaddr, cmsg) plus the preallocated
|
||||
// raw-write closure used to avoid heap allocations on the hot path.
|
||||
func (u *StdConn) prepareGSO() {
|
||||
var probeErr error
|
||||
if err := u.rawConn.Control(func(fd uintptr) {
|
||||
probeErr = unix.SetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_SEGMENT, 0)
|
||||
}); err != nil {
|
||||
return
|
||||
}
|
||||
if probeErr != nil {
|
||||
return
|
||||
}
|
||||
u.gsoSupported = true
|
||||
u.gsoIovs = make([]iovec, maxGSOSegments)
|
||||
u.gsoName = make([]byte, unix.SizeofSockaddrInet6)
|
||||
u.gsoCmsg = make([]byte, unix.CmsgSpace(2))
|
||||
|
||||
// Wire up the static pieces of gsoMsg. Iovlen / Controllen / Namelen /
|
||||
// cmsg contents get refreshed per call; Iov, Name, Control pointers are
|
||||
// fixed because the scratch slices never move.
|
||||
u.gsoMsg.Iov = &u.gsoIovs[0]
|
||||
u.gsoMsg.Name = &u.gsoName[0]
|
||||
u.gsoMsg.Control = &u.gsoCmsg[0]
|
||||
|
||||
// Prepopulate the cmsg header. Len/Level/Type are constant for our use;
|
||||
// only the 2-byte gso_size payload changes per call.
|
||||
cmsghdr := (*unix.Cmsghdr)(unsafe.Pointer(&u.gsoCmsg[0]))
|
||||
cmsghdr.Level = unix.SOL_UDP
|
||||
cmsghdr.Type = unix.UDP_SEGMENT
|
||||
setCmsgLen(cmsghdr, unix.CmsgLen(2))
|
||||
|
||||
u.gsoFunc = u.sendmsgRawWriteGSO
|
||||
}
|
||||
|
||||
func (u *StdConn) SupportsMultipleReaders() bool {
|
||||
return true
|
||||
}
|
||||
@@ -331,6 +392,115 @@ func (u *StdConn) sendmmsgRawWrite(fd uintptr) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (u *StdConn) SupportsGSO() bool {
|
||||
return u.gsoSupported
|
||||
}
|
||||
|
||||
// WriteSegmented sends bufs to addr as a UDP GSO superpacket. The kernel
|
||||
// emits one datagram per iovec on the wire; all iovecs except the last must
|
||||
// be exactly segSize bytes. Non-GSO kernels hit the WriteTo fallback.
|
||||
// Called with len(bufs) >= 1. len(bufs) > maxGSOSegments is chunked.
|
||||
func (u *StdConn) WriteSegmented(bufs [][]byte, addr netip.AddrPort, segSize int) error {
|
||||
if len(bufs) == 0 {
|
||||
return nil
|
||||
}
|
||||
if !u.gsoSupported {
|
||||
for _, b := range bufs {
|
||||
if err := u.WriteTo(b, addr); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
nlen, err := writeSockaddr(u.gsoName, addr, u.isV4)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
u.gsoMsg.Namelen = uint32(nlen)
|
||||
setMsgControllen(&u.gsoMsg, unix.CmsgSpace(2))
|
||||
|
||||
// Cap the per-syscall fan-out by both segment count and total bytes.
|
||||
// Kernel rejects sendmsg with EMSGSIZE when segCount*segSize would
|
||||
// exceed sk_gso_max_size (typically 65536). For segSize > maxGSOBytes
|
||||
// we can't use GSO at all and must fall back per-packet.
|
||||
segsByBytes := maxGSOBytes / segSize
|
||||
if segsByBytes == 0 {
|
||||
for _, b := range bufs {
|
||||
if werr := u.WriteTo(b, addr); werr != nil {
|
||||
return werr
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
maxChunk := maxGSOSegments
|
||||
if segsByBytes < maxChunk {
|
||||
maxChunk = segsByBytes
|
||||
}
|
||||
|
||||
i := 0
|
||||
for i < len(bufs) {
|
||||
chunk := len(bufs) - i
|
||||
if chunk > maxChunk {
|
||||
chunk = maxChunk
|
||||
}
|
||||
for k := 0; k < chunk; k++ {
|
||||
b := bufs[i+k]
|
||||
if len(b) == 0 {
|
||||
u.gsoIovs[k].Base = nil
|
||||
setIovLen(&u.gsoIovs[k], 0)
|
||||
} else {
|
||||
u.gsoIovs[k].Base = &b[0]
|
||||
setIovLen(&u.gsoIovs[k], len(b))
|
||||
}
|
||||
}
|
||||
setMsgIovlen(&u.gsoMsg, chunk)
|
||||
binary.NativeEndian.PutUint16(u.gsoCmsg[unix.CmsgLen(0):unix.CmsgLen(0)+2], uint16(segSize))
|
||||
|
||||
if serr := u.sendmsgGSO(); serr != nil {
|
||||
// Fall back to a per-packet loop for the remainder of the
|
||||
// batch. Dropping the GSO call entirely is safer than
|
||||
// returning mid-superpacket and losing bytes.
|
||||
for k := 0; k < chunk; k++ {
|
||||
if werr := u.WriteTo(bufs[i+k], addr); werr != nil {
|
||||
return werr
|
||||
}
|
||||
}
|
||||
}
|
||||
i += chunk
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// sendmsgRawWriteGSO is the preallocated rawConn.Write callback for the GSO
|
||||
// path. Reads the prebuilt u.gsoMsg and writes u.gsoSent / u.gsoErrno.
|
||||
func (u *StdConn) sendmsgRawWriteGSO(fd uintptr) bool {
|
||||
r1, _, errno := unix.Syscall(
|
||||
unix.SYS_SENDMSG,
|
||||
fd,
|
||||
uintptr(unsafe.Pointer(&u.gsoMsg)),
|
||||
0,
|
||||
)
|
||||
if errno == syscall.EAGAIN || errno == syscall.EWOULDBLOCK {
|
||||
return false
|
||||
}
|
||||
u.gsoSent = int(r1)
|
||||
u.gsoErrno = errno
|
||||
return true
|
||||
}
|
||||
|
||||
func (u *StdConn) sendmsgGSO() error {
|
||||
u.gsoSent = 0
|
||||
u.gsoErrno = 0
|
||||
if err := u.rawConn.Write(u.gsoFunc); err != nil {
|
||||
return err
|
||||
}
|
||||
if u.gsoErrno != 0 {
|
||||
return &net.OpError{Op: "sendmsg", Err: u.gsoErrno}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (u *StdConn) sendmmsg(n int) (int, error) {
|
||||
u.writeChunk = n
|
||||
u.writeSent = 0
|
||||
|
||||
Reference in New Issue
Block a user