This commit is contained in:
JackDoan
2025-11-07 16:50:43 -06:00
parent 6e22bfeeb1
commit e3be0943fd
13 changed files with 1469 additions and 8 deletions

232
overlay/tuntap/device.go Normal file
View File

@@ -0,0 +1,232 @@
package tuntap
import (
"fmt"
"net"
"os"
"time"
"unsafe"
"github.com/hetznercloud/virtio-go/virtio"
"golang.org/x/sys/unix"
)
// Documentation:
// https://docs.kernel.org/networking/tuntap.html
// Also worth a read:
// https://blog.cloudflare.com/virtual-networking-101-understanding-tap/
// Device represents a TUN/TAP device.
type Device struct {
name string
ifindex uint32
mac net.HardwareAddr
file *os.File
}
// NewDevice creates a new TUN/TAP device, brings it up, and returns a [Device]
// instance providing access to it.
//
// There are multiple options that can be passed to this constructor to
// influence device creation:
// - [WithName]
// - [WithDeviceType]
// - [WithVirtioNetHdr]
// - [WithInterfaceFlags]
//
// Remember to call [Device.Close] after use to free up resources.
func NewDevice(options ...Option) (_ *Device, err error) {
opts := optionDefaults
opts.apply(options)
if err = opts.validate(); err != nil {
return nil, fmt.Errorf("invalid options: %w", err)
}
// Get a file descriptor. The device will exist as long as we keep this
// file descriptor open.
fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0666)
if err != nil {
return nil, fmt.Errorf("access tuntap driver: %w", err)
}
// Create an interface request. When the name is empty, the kernel will
// auto-select one.
ifreq, err := unix.NewIfreq(opts.name)
if err != nil {
_ = unix.Close(fd)
return nil, fmt.Errorf("new ifreq: %w", err)
}
// Create the new device.
ifreq.SetUint16(opts.ifreqFlags())
if err = unix.IoctlIfreq(fd, unix.TUNSETIFF, ifreq); err != nil {
_ = unix.Close(fd)
return nil, fmt.Errorf("create device: %w", err)
}
dev := Device{
// The TUNSETIFF ioctl writes the actual name that was chosen for the
// device back to the request, so use that.
name: ifreq.Name(),
}
// Make the file descriptor of the device non-blocking. This enables us to
// cancel reads after a timeout when no packets are arriving.
// This, and the call to NewFile has to happen after creating the device:
// https://github.com/golang/go/issues/30426#issuecomment-470330742
// NewFile will recognize that the file descriptor is non-blocking and will
// configure polling for it.
if err = unix.SetNonblock(fd, true); err != nil {
_ = unix.Close(fd)
return nil, fmt.Errorf("make file descriptor non-blocking: %w", err)
}
// By wrapping the file descriptor as an os.File, we not only have a
// convenient way to read and write, but also register a finalizer that
// closes the file descriptor when it's being garbage collected.
dev.file = os.NewFile(uintptr(fd), dev.name)
// Make sure the device is removed when one of the following initialization
// steps fails.
defer func() {
if err != nil {
_ = dev.Close()
}
}()
if opts.virtioNetHdr {
// Tell the device which size we use for our virtio_net_hdr.
err = unix.IoctlSetPointerInt(fd, unix.TUNSETVNETHDRSZ, virtio.NetHdrSize)
if err != nil {
return nil, fmt.Errorf("set vnethdr size: %w", err)
}
}
// Tell the device which offloads are supported.
err = unix.IoctlSetInt(fd, unix.TUNSETOFFLOAD, opts.offloads)
if err != nil {
return nil, fmt.Errorf("set offloads: %w", err)
}
// For the following ioctls we need just any AF_INET socket, so create one.
inet, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0)
if err != nil {
return nil, fmt.Errorf("open inet socket: %w", err)
}
defer func() { _ = unix.Close(inet) }()
// Set the interface flags to bring it up.
ifreq.SetUint16(unix.IFF_UP | opts.interfaceFlags)
if err = unix.IoctlIfreq(inet, unix.SIOCSIFFLAGS, ifreq); err != nil {
return nil, fmt.Errorf("set interface flags: %w", err)
}
// Get the interface index.
if err = unix.IoctlIfreq(inet, unix.SIOCGIFINDEX, ifreq); err != nil {
return nil, fmt.Errorf("get interface index: %w", err)
}
dev.ifindex = ifreq.Uint32()
// Get the MAC address.
// This ioctl writes a sockaddr into the data ifru section of the interface
// request struct. The MAC address is in the beginning of the
// sockaddr.sa_data section.
if err = unix.IoctlIfreq(inet, unix.SIOCGIFHWADDR, ifreq); err != nil {
return nil, fmt.Errorf("get mac address: %w", err)
}
dev.mac = unsafe.Slice((*byte)(unsafe.Pointer(ifreq)), 32)[16+2 : 16+8]
return &dev, nil
}
// Close closes the file descriptor behind this device. This will cause the
// TUN/TAP device to be removed.
func (dev *Device) Close() error {
if err := dev.file.Close(); err != nil {
return fmt.Errorf("close file descriptor: %w", err)
}
dev.file = nil
return nil
}
// Name returns the name of this device.
func (dev *Device) Name() string {
dev.ensureInitialized()
return dev.name
}
// Ifindex returns the interface index of this device.
func (dev *Device) Ifindex() uint32 {
dev.ensureInitialized()
return dev.ifindex
}
// MAC returns the hardware address of this device.
func (dev *Device) MAC() net.HardwareAddr {
dev.ensureInitialized()
return dev.mac
}
// File returns the [os.File] that is used to communicate with this device.
// If you access it directly, please be careful to not interfere with this
// implementation.
func (dev *Device) File() *os.File {
dev.ensureInitialized()
return dev.file
}
// WritePacket writes the given packet to the TUN/TAP device.
// When the [WithVirtioNetHdr] option was enabled, then the caller is
// responsible to prepend the packet with a [virtio.NetHdr].
func (dev *Device) WritePacket(packet []byte) error {
dev.ensureInitialized()
_, err := dev.file.Write(packet)
if err != nil {
return fmt.Errorf("write %d bytes: %w", len(packet), err)
}
return nil
}
// ReadPacket reads the next available packet from the TUN/TAP device into the
// given buffer. Make sure that the buffer is large enough, otherwise only a
// part of the packet may be read. The number of read bytes will be returned.
//
// When the [WithVirtioNetHdr] option was enabled, then the read packet will be
// prepended with a [virtio.NetHdr]. The caller is responsible to handle it
// accordingly.
//
// A timeout can be given to limit the time this operation blocks. If no packet
// arrives within the given timeout, the read is canceled and an error that
// wraps [os.ErrDeadlineExceeded] is returned. Pass a timeout of zero to make
// this operation block infinitely.
func (dev *Device) ReadPacket(buf []byte, timeout time.Duration) (int, error) {
dev.ensureInitialized()
// Make sure the read times out. This only works for files that support
// polling (see above).
// When no timeout is desired, passing the zero time removes the deadline.
var deadline time.Time
if timeout > 0 {
deadline = time.Now().Add(timeout)
}
if err := dev.file.SetReadDeadline(deadline); err != nil {
return 0, fmt.Errorf("set deadline: %w", err)
}
n, err := dev.file.Read(buf)
if err != nil {
return n, fmt.Errorf("read up to %d bytes: %w", len(buf), err)
}
return n, nil
}
// ensureInitialized is used as a guard to prevent methods to be called on an
// uninitialized instance.
func (dev *Device) ensureInitialized() {
if dev.file == nil {
panic("device is not initialized")
}
}

View File

@@ -0,0 +1,132 @@
package tuntap_test
import (
"net"
"os"
"testing"
"time"
"github.com/gopacket/gopacket/afpacket"
"github.com/hetznercloud/virtio-go/internal/testsupport"
"github.com/hetznercloud/virtio-go/tuntap"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/sys/unix"
)
func TestNewDevice(t *testing.T) {
testsupport.VirtrunOnly(t)
t.Run("with static name", func(t *testing.T) {
const name = "test42"
dev, err := tuntap.NewDevice(
tuntap.WithDeviceType(tuntap.DeviceTypeTAP),
tuntap.WithName(name),
)
require.NoError(t, err)
t.Cleanup(func() {
assert.NoError(t, dev.Close())
})
assert.Equal(t, name, dev.Name())
iface, err := net.InterfaceByIndex(int(dev.Ifindex()))
assert.NoError(t, err)
assert.Equal(t, name, iface.Name)
assert.Equal(t, dev.MAC(), iface.HardwareAddr)
})
t.Run("with auto selected name", func(t *testing.T) {
dev, err := tuntap.NewDevice(
tuntap.WithDeviceType(tuntap.DeviceTypeTAP),
)
require.NoError(t, err)
t.Cleanup(func() {
assert.NoError(t, dev.Close())
})
assert.Contains(t, dev.Name(), "tap")
iface, err := net.InterfaceByIndex(int(dev.Ifindex()))
assert.NoError(t, err)
assert.Equal(t, dev.Name(), iface.Name)
assert.Equal(t, dev.MAC(), iface.HardwareAddr)
})
}
func TestDevice_WritePacket(t *testing.T) {
testsupport.VirtrunOnly(t)
dev, tPacket := setupTestDevice(t)
// Write a test packet to the TAP device.
_, pkt := testsupport.TestPacket(t, dev.MAC(), 64)
assert.NoError(t, dev.WritePacket(pkt))
// Check if the packet arrived in the RAW socket.
data, _, err := tPacket.ReadPacketData()
assert.NoError(t, err)
assert.Equal(t, pkt, data)
}
func TestDevice_ReadPacket(t *testing.T) {
testsupport.VirtrunOnly(t)
dev, tPacket := setupTestDevice(t)
// Write a test packet to the RAW socket.
_, pkt := testsupport.TestPacket(t, dev.MAC(), 64)
assert.NoError(t, tPacket.WritePacketData(pkt))
// Check if the packet arrived at the TAP device.
receiveBuf := make([]byte, 1024)
n, err := dev.ReadPacket(receiveBuf, time.Second)
assert.NoError(t, err)
assert.Equal(t, len(pkt), n)
assert.Equal(t, pkt, receiveBuf[:n])
}
func TestDevice_ReadPacket_Timeout(t *testing.T) {
testsupport.VirtrunOnly(t)
dev, _ := setupTestDevice(t)
// Try to receive a packet on the TAP device when none was sent.
// This should time out.
receiveBuf := make([]byte, 1024)
_, err := dev.ReadPacket(receiveBuf, 500*time.Millisecond)
assert.ErrorIs(t, err, os.ErrDeadlineExceeded)
}
func setupTestDevice(t *testing.T) (*tuntap.Device, *afpacket.TPacket) {
t.Helper()
testsupport.VirtrunOnly(t)
// Make sure the Linux kernel does not send router solicitations that may
// interfere with these tests.
testsupport.SetSysctl(t, "net.ipv6.conf.all.disable_ipv6", "1")
// Create a TAP device.
dev, err := tuntap.NewDevice(
tuntap.WithDeviceType(tuntap.DeviceTypeTAP),
// Helps to stop the Linux kernel from sending packets on this
// interface.
tuntap.WithInterfaceFlags(unix.IFF_NOARP),
)
require.NoError(t, err)
t.Cleanup(func() {
assert.NoError(t, dev.Close())
})
// Open a RAW socket to capture packets arriving at the TAP device or
// write packets to it.
tPacket, err := afpacket.NewTPacket(
afpacket.SocketRaw,
afpacket.TPacketVersion3,
afpacket.OptInterface(dev.Name()),
)
require.NoError(t, err)
t.Cleanup(tPacket.Close)
return dev, tPacket
}

3
overlay/tuntap/doc.go Normal file
View File

@@ -0,0 +1,3 @@
// Package tuntap provides methods to create TUN/TAP devices and send and
// receive packets on them.
package tuntap

116
overlay/tuntap/options.go Normal file
View File

@@ -0,0 +1,116 @@
package tuntap
import (
"errors"
"golang.org/x/sys/unix"
)
// DeviceType is the TUN/TAP device type.
type DeviceType int
const (
// DeviceTypeTUN can be used to create TUN devices that operate on layer 3.
// Packets that are transported over TUN devices do not have an Ethernet
// header.
DeviceTypeTUN DeviceType = unix.IFF_TUN
// DeviceTypeTAP can be used to create TAP devices that operate on layer 2.
// Packets that are transported over TAP devices do have an Ethernet header.
DeviceTypeTAP DeviceType = unix.IFF_TAP
)
type optionValues struct {
name string
deviceType DeviceType
virtioNetHdr bool
offloads int
interfaceFlags uint16
}
func (o *optionValues) apply(options []Option) {
for _, option := range options {
option(o)
}
}
func (o *optionValues) validate() error {
if len(o.name) >= unix.IFNAMSIZ {
return errors.New("name must not be longer that 15 characters")
}
if o.deviceType != DeviceTypeTUN && o.deviceType != DeviceTypeTAP {
return errors.New("device type is required and must be either TUN or TAP")
}
return nil
}
func (o *optionValues) ifreqFlags() uint16 {
flags := uint16(o.deviceType)
// Disable the packet information prefix.
flags |= unix.IFF_NO_PI
// Ensure the ioctl fails when a device with the same name already exists.
flags |= unix.IFF_TUN_EXCL
if o.virtioNetHdr {
// Also requires the TUNSETVNETHDRSZ ioctl at a later time.
flags |= unix.IFF_VNET_HDR
}
return flags
}
var optionDefaults = optionValues{
// Let the kernel auto-select a name.
name: "",
// Required.
deviceType: -1,
// Don't enable it by default to avoid surprises.
virtioNetHdr: false,
// Optional. No offload support advertised by default.
offloads: 0,
// Optional. IFF_UP will always be set.
interfaceFlags: 0,
}
// Option can be passed to [NewDevice] to influence device creation.
type Option func(*optionValues)
// WithName returns an [Option] that sets the name of the to be created device.
// This is optional. When no name is specified, the kernel will auto-select a
// name using the scheme "tunX" or "tapX".
func WithName(name string) Option {
return func(o *optionValues) { o.name = name }
}
// WithDeviceType returns an [Option] that sets the type of device that should
// be created.
// This is required.
func WithDeviceType(deviceType DeviceType) Option {
return func(o *optionValues) { o.deviceType = deviceType }
}
// WithVirtioNetHdr returns an [Option] that sets whether packets that are
// transported over the device are prepended with a [virtio.NetHdr].
// This is optional and disabled by default.
func WithVirtioNetHdr(enable bool) Option {
return func(o *optionValues) { o.virtioNetHdr = enable }
}
// WithOffloads returns an [Option] that sets the supported offloads that the
// device should advertise. This tells the kernel which offloads the owner of
// the device can deal with ([unix.TUN_F_CSUM] for example).
// This is optional. By default, no offloads are supported.
// When configured, then [WithVirtioNetHdr] should also be enabled.
func WithOffloads(offloads int) Option {
return func(o *optionValues) { o.offloads = offloads }
}
// WithInterfaceFlags returns an [Option] that sets the flags that should be
// used when taking the created interface up.
// This is optional. The [unix.IFF_UP] flag will always be set.
// The [unix.IFF_NOARP] flag may be useful in some scenarios to avoid packets
// from the Linux networking stack interfering with your application.
func WithInterfaceFlags(flags uint16) Option {
return func(o *optionValues) { o.interfaceFlags = flags }
}

View File

@@ -0,0 +1,79 @@
package tuntap
import (
"testing"
"github.com/stretchr/testify/assert"
"golang.org/x/sys/unix"
)
func TestOptionValues_Apply(t *testing.T) {
opts := optionDefaults
opts.apply([]Option{
WithName("name"),
WithDeviceType(DeviceTypeTAP),
WithVirtioNetHdr(true),
WithOffloads(unix.TUN_F_CSUM),
WithInterfaceFlags(unix.IFF_NOARP),
})
assert.Equal(t, optionValues{
name: "name",
deviceType: DeviceTypeTAP,
virtioNetHdr: true,
offloads: unix.TUN_F_CSUM,
interfaceFlags: unix.IFF_NOARP,
}, opts)
}
func TestOptionValues_Validate(t *testing.T) {
tests := []struct {
name string
values optionValues
assertErr assert.ErrorAssertionFunc
}{
{
name: "name too long",
values: optionValues{
name: "thisisaverylongname",
deviceType: DeviceTypeTAP,
},
assertErr: assert.Error,
},
{
name: "device type missing",
values: optionValues{},
assertErr: assert.Error,
},
{
name: "invalid device type",
values: optionValues{
deviceType: 999,
},
assertErr: assert.Error,
},
{
name: "valid minimal",
values: optionValues{
deviceType: DeviceTypeTAP,
},
assertErr: assert.NoError,
},
{
name: "valid full",
values: optionValues{
name: "name",
deviceType: DeviceTypeTAP,
virtioNetHdr: true,
offloads: unix.TUN_F_CSUM,
interfaceFlags: unix.IFF_NOARP,
},
assertErr: assert.NoError,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.assertErr(t, tt.values.validate())
})
}
}