//go:build linux && !android && !e2e_testing // +build linux,!android,!e2e_testing package tio import ( "encoding/binary" "os" "testing" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/tcpip/checksum" "github.com/slackhq/nebula/overlay/tio/virtio" ) // testSegScratchSize is a generous segmentation scratch sized to fit any // of the synthetic TSO/USO superpackets these tests generate (one // worst-case 64 KiB superpacket plus replicated per-segment headers). const testSegScratchSize = 192 * 1024 // verifyChecksum confirms that the one's-complement sum across `b`, seeded // with a folded pseudo-header sum, equals all-ones (valid). func verifyChecksum(b []byte, pseudo uint16) bool { return checksum.Checksum(b, pseudo) == 0xffff } // segmentForTest is the test-only counterpart to the production // SegmentSuperpacket path. It handles GSO_NONE (with optional // finishChecksum) inline and dispatches GSO superpackets through // SegmentSuperpacket, draining each yielded segment into a // freshly-copied [][]byte slot so callers can iterate after the call // returns. Tests pre-set hdr.HdrLen correctly, so correctHdrLen is not // invoked here. func segmentForTest(pkt []byte, hdr virtio.Hdr, out *[][]byte, scratch []byte) error { if hdr.GSOType == unix.VIRTIO_NET_HDR_GSO_NONE { cp := append([]byte(nil), pkt...) if hdr.Flags&unix.VIRTIO_NET_HDR_F_NEEDS_CSUM != 0 { if err := virtio.FinishChecksum(cp, hdr); err != nil { return err } } *out = append(*out, cp) return nil } proto, err := protoFromGSOType(hdr.GSOType) if err != nil { return err } gso := GSOInfo{ Size: hdr.GSOSize, HdrLen: hdr.HdrLen, CsumStart: hdr.CsumStart, Proto: proto, } return SegmentSuperpacket(Packet{Bytes: pkt, GSO: gso}, func(seg []byte) error { *out = append(*out, append([]byte(nil), seg...)) return nil }) } // pseudoHeaderIPv4 returns the folded pseudo-header sum used to verify a // TCP/UDP segment's checksum in tests. src/dst are 4 bytes each. func pseudoHeaderIPv4(src, dst []byte, proto byte, l4Len int) uint16 { s := uint32(checksum.Checksum(src, 0)) + uint32(checksum.Checksum(dst, 0)) s += uint32(proto) + uint32(l4Len) s = (s & 0xffff) + (s >> 16) s = (s & 0xffff) + (s >> 16) return uint16(s) } // pseudoHeaderIPv6 returns the folded pseudo-header sum used to verify a // TCP/UDP segment's checksum in tests. src/dst are 16 bytes each. func pseudoHeaderIPv6(src, dst []byte, proto byte, l4Len int) uint16 { s := uint32(checksum.Checksum(src, 0)) + uint32(checksum.Checksum(dst, 0)) s += uint32(l4Len>>16) + uint32(l4Len&0xffff) + uint32(proto) s = (s & 0xffff) + (s >> 16) s = (s & 0xffff) + (s >> 16) return uint16(s) } // buildTSOv4 builds a synthetic IPv4/TCP TSO superpacket with a payload of // `payLen` bytes split at `mss`. func buildTSOv4(t *testing.T, payLen, mss int) ([]byte, virtio.Hdr) { t.Helper() const ipLen = 20 const tcpLen = 20 pkt := make([]byte, ipLen+tcpLen+payLen) // IPv4 header pkt[0] = 0x45 // version 4, IHL 5 // total length is meaningless for TSO but set it anyway binary.BigEndian.PutUint16(pkt[2:4], uint16(ipLen+tcpLen+payLen)) binary.BigEndian.PutUint16(pkt[4:6], 0x4242) // original ID pkt[8] = 64 // TTL pkt[9] = unix.IPPROTO_TCP copy(pkt[12:16], []byte{10, 0, 0, 1}) // src copy(pkt[16:20], []byte{10, 0, 0, 2}) // dst // TCP header binary.BigEndian.PutUint16(pkt[20:22], 12345) // sport binary.BigEndian.PutUint16(pkt[22:24], 80) // dport binary.BigEndian.PutUint32(pkt[24:28], 10000) // seq binary.BigEndian.PutUint32(pkt[28:32], 20000) // ack pkt[32] = 0x50 // data offset 5 words pkt[33] = 0x18 // ACK | PSH binary.BigEndian.PutUint16(pkt[34:36], 65535) // window // payload for i := 0; i < payLen; i++ { pkt[ipLen+tcpLen+i] = byte(i & 0xff) } return pkt, virtio.Hdr{ Flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, GSOType: unix.VIRTIO_NET_HDR_GSO_TCPV4, HdrLen: uint16(ipLen + tcpLen), GSOSize: uint16(mss), CsumStart: uint16(ipLen), CsumOffset: 16, } } func TestSegmentTCPv4(t *testing.T) { const mss = 100 const numSeg = 3 pkt, hdr := buildTSOv4(t, mss*numSeg, mss) scratch := make([]byte, testSegScratchSize) var out [][]byte if err := segmentForTest(pkt, hdr, &out, scratch); err != nil { t.Fatalf("segmentForTest: %v", err) } if len(out) != numSeg { t.Fatalf("expected %d segments, got %d", numSeg, len(out)) } for i, seg := range out { if len(seg) != 40+mss { t.Errorf("seg %d: unexpected len %d", i, len(seg)) } totalLen := binary.BigEndian.Uint16(seg[2:4]) if totalLen != uint16(40+mss) { t.Errorf("seg %d: total_len=%d want %d", i, totalLen, 40+mss) } id := binary.BigEndian.Uint16(seg[4:6]) if id != 0x4242+uint16(i) { t.Errorf("seg %d: ip id=%#x want %#x", i, id, 0x4242+uint16(i)) } seq := binary.BigEndian.Uint32(seg[24:28]) wantSeq := uint32(10000 + i*mss) if seq != wantSeq { t.Errorf("seg %d: seq=%d want %d", i, seq, wantSeq) } flags := seg[33] wantFlags := byte(0x10) // ACK only, PSH cleared if i == numSeg-1 { wantFlags = 0x18 // ACK | PSH preserved on last } if flags != wantFlags { t.Errorf("seg %d: flags=%#x want %#x", i, flags, wantFlags) } // IPv4 header checksum must verify against itself. if !verifyChecksum(seg[:20], 0) { t.Errorf("seg %d: bad IPv4 header checksum", i) } // TCP checksum must verify against the pseudo-header. psum := pseudoHeaderIPv4(seg[12:16], seg[16:20], unix.IPPROTO_TCP, 20+mss) if !verifyChecksum(seg[20:], psum) { t.Errorf("seg %d: bad TCP checksum", i) } } } func TestSegmentTCPv4OddTail(t *testing.T) { // Payload of 250 bytes with MSS 100 → segments of 100, 100, 50. pkt, hdr := buildTSOv4(t, 250, 100) scratch := make([]byte, testSegScratchSize) var out [][]byte if err := segmentForTest(pkt, hdr, &out, scratch); err != nil { t.Fatalf("segmentForTest: %v", err) } if len(out) != 3 { t.Fatalf("want 3 segments, got %d", len(out)) } wantPayLens := []int{100, 100, 50} for i, seg := range out { if len(seg)-40 != wantPayLens[i] { t.Errorf("seg %d: pay len %d want %d", i, len(seg)-40, wantPayLens[i]) } if !verifyChecksum(seg[:20], 0) { t.Errorf("seg %d: bad IPv4 header checksum", i) } psum := pseudoHeaderIPv4(seg[12:16], seg[16:20], unix.IPPROTO_TCP, 20+wantPayLens[i]) if !verifyChecksum(seg[20:], psum) { t.Errorf("seg %d: bad TCP checksum", i) } } } func TestSegmentTCPv6(t *testing.T) { const ipLen = 40 const tcpLen = 20 const mss = 120 const numSeg = 2 payLen := mss * numSeg pkt := make([]byte, ipLen+tcpLen+payLen) // IPv6 header pkt[0] = 0x60 // version 6 binary.BigEndian.PutUint16(pkt[4:6], uint16(tcpLen+payLen)) pkt[6] = unix.IPPROTO_TCP pkt[7] = 64 // src/dst fe80::1 / fe80::2 pkt[8] = 0xfe pkt[9] = 0x80 pkt[23] = 1 pkt[24] = 0xfe pkt[25] = 0x80 pkt[39] = 2 // TCP header binary.BigEndian.PutUint16(pkt[40:42], 12345) binary.BigEndian.PutUint16(pkt[42:44], 80) binary.BigEndian.PutUint32(pkt[44:48], 7) binary.BigEndian.PutUint32(pkt[48:52], 99) pkt[52] = 0x50 pkt[53] = 0x19 // FIN | ACK | PSH — exercise FIN clearing too binary.BigEndian.PutUint16(pkt[54:56], 65535) for i := 0; i < payLen; i++ { pkt[ipLen+tcpLen+i] = byte(i) } hdr := virtio.Hdr{ Flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, GSOType: unix.VIRTIO_NET_HDR_GSO_TCPV6, HdrLen: uint16(ipLen + tcpLen), GSOSize: uint16(mss), CsumStart: uint16(ipLen), CsumOffset: 16, } scratch := make([]byte, testSegScratchSize) var out [][]byte if err := segmentForTest(pkt, hdr, &out, scratch); err != nil { t.Fatalf("segmentForTest: %v", err) } if len(out) != numSeg { t.Fatalf("want %d segments, got %d", numSeg, len(out)) } for i, seg := range out { if len(seg) != ipLen+tcpLen+mss { t.Errorf("seg %d: len %d want %d", i, len(seg), ipLen+tcpLen+mss) } pl := binary.BigEndian.Uint16(seg[4:6]) if pl != uint16(tcpLen+mss) { t.Errorf("seg %d: payload_length=%d want %d", i, pl, tcpLen+mss) } seq := binary.BigEndian.Uint32(seg[44:48]) if seq != uint32(7+i*mss) { t.Errorf("seg %d: seq=%d want %d", i, seq, 7+i*mss) } flags := seg[53] // Original flags = 0x19 (FIN|ACK|PSH). FIN(0x01)+PSH(0x08) should be // cleared on all but the last; ACK(0x10) always preserved. wantFlags := byte(0x10) if i == numSeg-1 { wantFlags = 0x19 } if flags != wantFlags { t.Errorf("seg %d: flags=%#x want %#x", i, flags, wantFlags) } psum := pseudoHeaderIPv6(seg[8:24], seg[24:40], unix.IPPROTO_TCP, tcpLen+mss) if !verifyChecksum(seg[ipLen:], psum) { t.Errorf("seg %d: bad TCP checksum", i) } } } func TestSegmentGSONonePassesThrough(t *testing.T) { pkt, hdr := buildTSOv4(t, 100, 100) hdr.GSOType = unix.VIRTIO_NET_HDR_GSO_NONE hdr.Flags = 0 // no NEEDS_CSUM, leave packet untouched scratch := make([]byte, testSegScratchSize) var out [][]byte if err := segmentForTest(pkt, hdr, &out, scratch); err != nil { t.Fatalf("segmentForTest: %v", err) } if len(out) != 1 { t.Fatalf("want 1 segment, got %d", len(out)) } if len(out[0]) != len(pkt) { t.Fatalf("unexpected length: %d vs %d", len(out[0]), len(pkt)) } } // TestSegmentRejectsLegacyUDPGSO ensures the legacy GSO_UDP (UFO) marker is // still rejected; only modern GSO_UDP_L4 (USO) is supported. func TestSegmentRejectsLegacyUDPGSO(t *testing.T) { hdr := virtio.Hdr{GSOType: unix.VIRTIO_NET_HDR_GSO_UDP} var out [][]byte if err := segmentForTest(nil, hdr, &out, nil); err == nil { t.Fatalf("expected rejection for legacy UDP GSO") } } // buildUSOv4 builds a synthetic IPv4/UDP USO superpacket with payload of // payLen bytes, segmented at gsoSize. func buildUSOv4(t *testing.T, payLen, gsoSize int) ([]byte, virtio.Hdr) { t.Helper() const ipLen = 20 const udpLen = 8 pkt := make([]byte, ipLen+udpLen+payLen) // IPv4 header pkt[0] = 0x45 // version 4, IHL 5 binary.BigEndian.PutUint16(pkt[2:4], uint16(ipLen+udpLen+payLen)) binary.BigEndian.PutUint16(pkt[4:6], 0x4242) pkt[8] = 64 pkt[9] = unix.IPPROTO_UDP copy(pkt[12:16], []byte{10, 0, 0, 1}) copy(pkt[16:20], []byte{10, 0, 0, 2}) // UDP header (length + checksum filled in per segment by segmentUDPYield) binary.BigEndian.PutUint16(pkt[20:22], 12345) // sport binary.BigEndian.PutUint16(pkt[22:24], 53) // dport for i := 0; i < payLen; i++ { pkt[ipLen+udpLen+i] = byte(i & 0xff) } return pkt, virtio.Hdr{ Flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, GSOType: unix.VIRTIO_NET_HDR_GSO_UDP_L4, HdrLen: uint16(ipLen + udpLen), GSOSize: uint16(gsoSize), CsumStart: uint16(ipLen), CsumOffset: 6, } } func TestSegmentUDPv4(t *testing.T) { const gso = 100 const numSeg = 3 pkt, hdr := buildUSOv4(t, gso*numSeg, gso) scratch := make([]byte, testSegScratchSize) var out [][]byte if err := segmentForTest(pkt, hdr, &out, scratch); err != nil { t.Fatalf("segmentForTest: %v", err) } if len(out) != numSeg { t.Fatalf("expected %d segments, got %d", numSeg, len(out)) } for i, seg := range out { if len(seg) != 28+gso { t.Errorf("seg %d: len %d want %d", i, len(seg), 28+gso) } totalLen := binary.BigEndian.Uint16(seg[2:4]) if totalLen != uint16(28+gso) { t.Errorf("seg %d: total_len=%d want %d", i, totalLen, 28+gso) } // kernel UDP-GSO does NOT bump the IPv4 ID across segments; every // segment carries the same ID as the seed. id := binary.BigEndian.Uint16(seg[4:6]) if id != 0x4242 { t.Errorf("seg %d: ip id=%#x want %#x", i, id, 0x4242) } udpLen := binary.BigEndian.Uint16(seg[24:26]) if udpLen != uint16(8+gso) { t.Errorf("seg %d: udp len=%d want %d", i, udpLen, 8+gso) } if !verifyChecksum(seg[:20], 0) { t.Errorf("seg %d: bad IPv4 header checksum", i) } psum := pseudoHeaderIPv4(seg[12:16], seg[16:20], unix.IPPROTO_UDP, 8+gso) if !verifyChecksum(seg[20:], psum) { t.Errorf("seg %d: bad UDP checksum", i) } } } func TestSegmentUDPv4OddTail(t *testing.T) { // 250 bytes payload, gsoSize=100 → segments of 100, 100, 50. pkt, hdr := buildUSOv4(t, 250, 100) scratch := make([]byte, testSegScratchSize) var out [][]byte if err := segmentForTest(pkt, hdr, &out, scratch); err != nil { t.Fatalf("segmentForTest: %v", err) } if len(out) != 3 { t.Fatalf("want 3 segments, got %d", len(out)) } wantPay := []int{100, 100, 50} for i, seg := range out { if len(seg)-28 != wantPay[i] { t.Errorf("seg %d: pay len %d want %d", i, len(seg)-28, wantPay[i]) } udpLen := binary.BigEndian.Uint16(seg[24:26]) if udpLen != uint16(8+wantPay[i]) { t.Errorf("seg %d: udp len=%d want %d", i, udpLen, 8+wantPay[i]) } if !verifyChecksum(seg[:20], 0) { t.Errorf("seg %d: bad IPv4 header checksum", i) } psum := pseudoHeaderIPv4(seg[12:16], seg[16:20], unix.IPPROTO_UDP, 8+wantPay[i]) if !verifyChecksum(seg[20:], psum) { t.Errorf("seg %d: bad UDP checksum", i) } } } func TestSegmentUDPv6(t *testing.T) { const ipLen = 40 const udpLen = 8 const gso = 120 const numSeg = 2 payLen := gso * numSeg pkt := make([]byte, ipLen+udpLen+payLen) // IPv6 header pkt[0] = 0x60 binary.BigEndian.PutUint16(pkt[4:6], uint16(udpLen+payLen)) pkt[6] = unix.IPPROTO_UDP pkt[7] = 64 pkt[8] = 0xfe pkt[9] = 0x80 pkt[23] = 1 pkt[24] = 0xfe pkt[25] = 0x80 pkt[39] = 2 binary.BigEndian.PutUint16(pkt[40:42], 12345) binary.BigEndian.PutUint16(pkt[42:44], 53) for i := 0; i < payLen; i++ { pkt[ipLen+udpLen+i] = byte(i) } hdr := virtio.Hdr{ Flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, GSOType: unix.VIRTIO_NET_HDR_GSO_UDP_L4, HdrLen: uint16(ipLen + udpLen), GSOSize: uint16(gso), CsumStart: uint16(ipLen), CsumOffset: 6, } scratch := make([]byte, testSegScratchSize) var out [][]byte if err := segmentForTest(pkt, hdr, &out, scratch); err != nil { t.Fatalf("segmentForTest: %v", err) } if len(out) != numSeg { t.Fatalf("want %d segments, got %d", numSeg, len(out)) } for i, seg := range out { if len(seg) != ipLen+udpLen+gso { t.Errorf("seg %d: len %d want %d", i, len(seg), ipLen+udpLen+gso) } pl := binary.BigEndian.Uint16(seg[4:6]) if pl != uint16(udpLen+gso) { t.Errorf("seg %d: payload_length=%d want %d", i, pl, udpLen+gso) } ul := binary.BigEndian.Uint16(seg[ipLen+4 : ipLen+6]) if ul != uint16(udpLen+gso) { t.Errorf("seg %d: udp len=%d want %d", i, ul, udpLen+gso) } psum := pseudoHeaderIPv6(seg[8:24], seg[24:40], unix.IPPROTO_UDP, udpLen+gso) if !verifyChecksum(seg[ipLen:], psum) { t.Errorf("seg %d: bad UDP checksum", i) } } } // TestSegmentUDPCEPropagates confirms IP-level CE marks on the seed appear on // every segment. UDP has no transport-level CWR/ECE: the IP TOS/TC byte is // copied verbatim into every segment by the segment-prefix copy. func TestSegmentUDPCEPropagates(t *testing.T) { pkt, hdr := buildUSOv4(t, 200, 100) pkt[1] = 0x03 // CE codepoint in IP-ECN scratch := make([]byte, testSegScratchSize) var out [][]byte if err := segmentForTest(pkt, hdr, &out, scratch); err != nil { t.Fatalf("segmentForTest: %v", err) } if len(out) != 2 { t.Fatalf("want 2 segments, got %d", len(out)) } for i, seg := range out { if seg[1]&0x03 != 0x03 { t.Errorf("seg %d: CE missing (tos=%#x)", i, seg[1]) } if !verifyChecksum(seg[:20], 0) { t.Errorf("seg %d: bad IPv4 header checksum", i) } } } // TestSegmentTCPCwrFirstSegmentOnly confirms RFC 3168 §6.1.2: when a TSO // burst's seed has CWR set, only the first emitted segment carries CWR. // ECE is preserved on every segment (different signal, persistent state). func TestSegmentTCPCwrFirstSegmentOnly(t *testing.T) { const mss = 100 const numSeg = 3 pkt, hdr := buildTSOv4(t, mss*numSeg, mss) // Seed flags: CWR | ECE | ACK | PSH. pkt[33] = 0x80 | 0x40 | 0x10 | 0x08 scratch := make([]byte, testSegScratchSize) var out [][]byte if err := segmentForTest(pkt, hdr, &out, scratch); err != nil { t.Fatalf("segmentForTest: %v", err) } if len(out) != numSeg { t.Fatalf("expected %d segments, got %d", numSeg, len(out)) } for i, seg := range out { flags := seg[33] hasCwr := flags&0x80 != 0 hasEce := flags&0x40 != 0 hasPsh := flags&0x08 != 0 wantCwr := i == 0 wantPsh := i == numSeg-1 if hasCwr != wantCwr { t.Errorf("seg %d: CWR=%v want %v (flags=%#x)", i, hasCwr, wantCwr, flags) } if !hasEce { t.Errorf("seg %d: ECE missing (flags=%#x)", i, flags) } if hasPsh != wantPsh { t.Errorf("seg %d: PSH=%v want %v (flags=%#x)", i, hasPsh, wantPsh, flags) } // IP and TCP checksums must still verify after the flag rewrite. if !verifyChecksum(seg[:20], 0) { t.Errorf("seg %d: bad IPv4 header checksum", i) } psum := pseudoHeaderIPv4(seg[12:16], seg[16:20], unix.IPPROTO_TCP, 20+mss) if !verifyChecksum(seg[20:], psum) { t.Errorf("seg %d: bad TCP checksum", i) } } } func BenchmarkSegmentTCPv4(b *testing.B) { sizes := []struct { name string payLen int mss int }{ {"64KiB_MSS1460", 65000, 1460}, {"16KiB_MSS1460", 16384, 1460}, {"4KiB_MSS1460", 4096, 1460}, } for _, sz := range sizes { b.Run(sz.name, func(b *testing.B) { const ipLen = 20 const tcpLen = 20 pkt := make([]byte, ipLen+tcpLen+sz.payLen) pkt[0] = 0x45 binary.BigEndian.PutUint16(pkt[2:4], uint16(ipLen+tcpLen+sz.payLen)) binary.BigEndian.PutUint16(pkt[4:6], 0x4242) pkt[8] = 64 pkt[9] = unix.IPPROTO_TCP copy(pkt[12:16], []byte{10, 0, 0, 1}) copy(pkt[16:20], []byte{10, 0, 0, 2}) binary.BigEndian.PutUint16(pkt[20:22], 12345) binary.BigEndian.PutUint16(pkt[22:24], 80) binary.BigEndian.PutUint32(pkt[24:28], 10000) binary.BigEndian.PutUint32(pkt[28:32], 20000) pkt[32] = 0x50 pkt[33] = 0x18 binary.BigEndian.PutUint16(pkt[34:36], 65535) for i := 0; i < sz.payLen; i++ { pkt[ipLen+tcpLen+i] = byte(i) } hdr := virtio.Hdr{ Flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, GSOType: unix.VIRTIO_NET_HDR_GSO_TCPV4, HdrLen: uint16(ipLen + tcpLen), GSOSize: uint16(sz.mss), CsumStart: uint16(ipLen), CsumOffset: 16, } scratch := make([]byte, testSegScratchSize) out := make([][]byte, 0, 64) // SegmentSuperpacket consumes its input destructively; restore // pkt from a master copy each iteration. The restore mirrors the // kernel→userspace copy that hands a fresh GSO blob to the // segmenter in production, so it's representative cost rather // than bench overhead. master := append([]byte(nil), pkt...) work := make([]byte, len(pkt)) b.SetBytes(int64(len(pkt))) b.ResetTimer() for i := 0; i < b.N; i++ { copy(work, master) out = out[:0] if err := segmentForTest(work, hdr, &out, scratch); err != nil { b.Fatal(err) } } }) } } // TestTunFileWriteVnetHdrNoAlloc verifies the IFF_VNET_HDR fast-path write is // allocation-free. We write to /dev/null so every call succeeds synchronously. func TestTunFileWriteVnetHdrNoAlloc(t *testing.T) { fd, err := unix.Open("/dev/null", os.O_WRONLY, 0) if err != nil { t.Fatalf("open /dev/null: %v", err) } t.Cleanup(func() { _ = unix.Close(fd) }) tf := &Offload{fd: fd} payload := make([]byte, 1400) // Warm up (first call may trigger one-time internal allocations elsewhere). if _, err := tf.Write(payload); err != nil { t.Fatalf("Write: %v", err) } allocs := testing.AllocsPerRun(1000, func() { if _, err := tf.Write(payload); err != nil { t.Fatalf("Write: %v", err) } }) if allocs != 0 { t.Fatalf("Write allocated %.1f times per call, want 0", allocs) } } // buildTSOv6 builds a synthetic IPv6/TCP TSO superpacket with payLen bytes // of payload, segmented at gso. Returns the packet bytes only; the // virtio_net_hdr is the caller's responsibility. func buildTSOv6(payLen, gso int) []byte { const ipLen = 40 const tcpLen = 20 pkt := make([]byte, ipLen+tcpLen+payLen) pkt[0] = 0x60 // version 6 binary.BigEndian.PutUint16(pkt[4:6], uint16(tcpLen+payLen)) pkt[6] = unix.IPPROTO_TCP pkt[7] = 64 pkt[8] = 0xfe pkt[9] = 0x80 pkt[23] = 1 pkt[24] = 0xfe pkt[25] = 0x80 pkt[39] = 2 binary.BigEndian.PutUint16(pkt[40:42], 12345) binary.BigEndian.PutUint16(pkt[42:44], 80) binary.BigEndian.PutUint32(pkt[44:48], 7) binary.BigEndian.PutUint32(pkt[48:52], 99) pkt[52] = 0x50 pkt[53] = 0x10 // ACK only binary.BigEndian.PutUint16(pkt[54:56], 65535) for i := 0; i < payLen; i++ { pkt[ipLen+tcpLen+i] = byte(i) } return pkt } // TestDecodeReadFitsMaxTSOAtDrainThreshold proves the rxBuf sizing is // correct: when rxOff is at the maximum value the drain headroom check // allows, decodeRead must still be able to absorb a worst-case 64KiB // TSO superpacket without dropping the burst. With segmentation deferred // to encrypt time, decodeRead writes only the kernel-supplied bytes into // rxBuf, so the size requirement is just "fit one worst-case input." // // Regression history: in a prior layout the rx buffer doubled as the // segmentation output, a near-threshold drain read returned "scratch too // small", the whole 45-segment TSO burst was dropped, and the remote's TCP // fast-retransmit collapsed cwnd. Keeping this test in the new layout // guards against re-introducing a drain headroom shortfall. func TestDecodeReadFitsMaxTSOAtDrainThreshold(t *testing.T) { const ipv6HdrLen = 40 const tcpHdrLen = 20 const headerLen = ipv6HdrLen + tcpHdrLen // Maximum TUN read body. The tunReadBufSize cap on readv's body iovec // is what bounds the kernel's superpacket length. pktLen := tunReadBufSize payLen := pktLen - headerLen const targetSegs = 64 gsoSize := (payLen + targetSegs - 1) / targetSegs pkt := buildTSOv6(payLen, gsoSize) if len(pkt) != pktLen { t.Fatalf("buildTSOv6 produced %d bytes, want %d", len(pkt), pktLen) } o := &Offload{ rxBuf: make([]byte, tunRxBufCap), } // rxOff at the maximum value the drain headroom check permits before // it would refuse another read. Any drain-time read up to this // threshold MUST still process correctly. o.rxOff = tunRxBufCap - tunRxBufSize // Stage the body in rxBuf as if readv(2) just placed it there. copy(o.rxBuf[o.rxOff:], pkt) // Encode the matching virtio_net_hdr. hdr := virtio.Hdr{ Flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, GSOType: unix.VIRTIO_NET_HDR_GSO_TCPV6, HdrLen: uint16(headerLen), GSOSize: uint16(gsoSize), CsumStart: uint16(ipv6HdrLen), CsumOffset: 16, } hdr.Encode(o.readVnetScratch[:]) startRxOff := o.rxOff if err := o.decodeRead(pktLen); err != nil { t.Fatalf("decodeRead at drain threshold returned %v — rxBuf sizing regression: "+ "tunRxBufSize=%d must hold one worst-case input (%d)", err, tunRxBufSize, pktLen) } if len(o.pending) != 1 { t.Fatalf("got %d packets, want 1 superpacket entry", len(o.pending)) } got := o.pending[0] if !got.GSO.IsSuperpacket() { t.Fatalf("expected superpacket GSO metadata, got %+v", got.GSO) } if got.GSO.Proto != GSOProtoTCP { t.Errorf("GSO.Proto=%d want TCP", got.GSO.Proto) } if got.GSO.Size != uint16(gsoSize) { t.Errorf("GSO.Size=%d want %d", got.GSO.Size, gsoSize) } if got.GSO.HdrLen != uint16(headerLen) { t.Errorf("GSO.HdrLen=%d want %d", got.GSO.HdrLen, headerLen) } if got.GSO.CsumStart != uint16(ipv6HdrLen) { t.Errorf("GSO.CsumStart=%d want %d", got.GSO.CsumStart, ipv6HdrLen) } if len(got.Bytes) != pktLen { t.Errorf("len(Bytes)=%d want %d", len(got.Bytes), pktLen) } // rxOff advances exactly by the kernel-supplied body length — no // segmentation output to account for any more. if o.rxOff != startRxOff+pktLen { t.Errorf("rxOff=%d want %d", o.rxOff, startRxOff+pktLen) } if o.rxOff > tunRxBufCap { t.Fatalf("rxOff=%d overran rxBuf (cap=%d)", o.rxOff, tunRxBufCap) } // Validate that segmenting the returned superpacket reproduces the // expected per-segment IPv6 payload length and TCP checksum. wantSegs := (payLen + gsoSize - 1) / gsoSize gotSegs := 0 if err := SegmentSuperpacket(got, func(seg []byte) error { defer func() { gotSegs++ }() if len(seg) < headerLen+1 { t.Errorf("seg %d too short: %d", gotSegs, len(seg)) return nil } if seg[0]>>4 != 6 { t.Errorf("seg %d: bad IP version %#x", gotSegs, seg[0]) } segPay := len(seg) - headerLen gotPL := binary.BigEndian.Uint16(seg[4:6]) if gotPL != uint16(tcpHdrLen+segPay) { t.Errorf("seg %d: payload_len=%d want %d", gotSegs, gotPL, tcpHdrLen+segPay) } psum := pseudoHeaderIPv6(seg[8:24], seg[24:40], unix.IPPROTO_TCP, tcpHdrLen+segPay) if !verifyChecksum(seg[ipv6HdrLen:], psum) { t.Errorf("seg %d: bad TCP checksum", gotSegs) } return nil }); err != nil { t.Fatalf("SegmentSuperpacket: %v", err) } if gotSegs != wantSegs { t.Fatalf("got %d segments, want %d", gotSegs, wantSegs) } }