mirror of
https://github.com/slackhq/nebula.git
synced 2025-11-23 08:54:25 +01:00
Compare commits
9 Commits
batched-pa
...
jay.wren-w
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8d4dd26484 | ||
|
|
0a94f9f990 | ||
|
|
433c531ae4 | ||
|
|
4c0aad1b1f | ||
|
|
c8b0281736 | ||
|
|
8281b1699f | ||
|
|
0827a6f1c5 | ||
|
|
273119638d | ||
|
|
484de41b58 |
@@ -1,164 +0,0 @@
|
|||||||
package nebula
|
|
||||||
|
|
||||||
import (
|
|
||||||
"net/netip"
|
|
||||||
|
|
||||||
"github.com/slackhq/nebula/overlay"
|
|
||||||
"github.com/slackhq/nebula/udp"
|
|
||||||
)
|
|
||||||
|
|
||||||
// batchPipelines tracks whether the inside device can operate on packet batches
|
|
||||||
// and, if so, holds the shared packet pool sized for the virtio headroom and
|
|
||||||
// payload limits advertised by the device. It also owns the fan-in/fan-out
|
|
||||||
// queues between the TUN readers, encrypt/decrypt workers, and the UDP writers.
|
|
||||||
type batchPipelines struct {
|
|
||||||
enabled bool
|
|
||||||
inside overlay.BatchCapableDevice
|
|
||||||
headroom int
|
|
||||||
payloadCap int
|
|
||||||
pool *overlay.PacketPool
|
|
||||||
batchSize int
|
|
||||||
routines int
|
|
||||||
rxQueues []chan *overlay.Packet
|
|
||||||
txQueues []chan queuedDatagram
|
|
||||||
tunQueues []chan *overlay.Packet
|
|
||||||
}
|
|
||||||
|
|
||||||
type queuedDatagram struct {
|
|
||||||
packet *overlay.Packet
|
|
||||||
addr netip.AddrPort
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchPipelines) init(device overlay.Device, routines int, queueDepth int, maxSegments int) {
|
|
||||||
if device == nil || routines <= 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
bcap, ok := device.(overlay.BatchCapableDevice)
|
|
||||||
if !ok {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
headroom := bcap.BatchHeadroom()
|
|
||||||
payload := bcap.BatchPayloadCap()
|
|
||||||
if maxSegments < 1 {
|
|
||||||
maxSegments = 1
|
|
||||||
}
|
|
||||||
requiredPayload := udp.MTU * maxSegments
|
|
||||||
if payload < requiredPayload {
|
|
||||||
payload = requiredPayload
|
|
||||||
}
|
|
||||||
batchSize := bcap.BatchSize()
|
|
||||||
if headroom <= 0 || payload <= 0 || batchSize <= 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
bp.enabled = true
|
|
||||||
bp.inside = bcap
|
|
||||||
bp.headroom = headroom
|
|
||||||
bp.payloadCap = payload
|
|
||||||
bp.batchSize = batchSize
|
|
||||||
bp.routines = routines
|
|
||||||
bp.pool = overlay.NewPacketPool(headroom, payload)
|
|
||||||
queueCap := batchSize * defaultBatchQueueDepthFactor
|
|
||||||
if queueDepth > 0 {
|
|
||||||
queueCap = queueDepth
|
|
||||||
}
|
|
||||||
if queueCap < batchSize {
|
|
||||||
queueCap = batchSize
|
|
||||||
}
|
|
||||||
bp.rxQueues = make([]chan *overlay.Packet, routines)
|
|
||||||
bp.txQueues = make([]chan queuedDatagram, routines)
|
|
||||||
bp.tunQueues = make([]chan *overlay.Packet, routines)
|
|
||||||
for i := 0; i < routines; i++ {
|
|
||||||
bp.rxQueues[i] = make(chan *overlay.Packet, queueCap)
|
|
||||||
bp.txQueues[i] = make(chan queuedDatagram, queueCap)
|
|
||||||
bp.tunQueues[i] = make(chan *overlay.Packet, queueCap)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchPipelines) Pool() *overlay.PacketPool {
|
|
||||||
if bp == nil || !bp.enabled {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return bp.pool
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchPipelines) Enabled() bool {
|
|
||||||
return bp != nil && bp.enabled
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchPipelines) batchSizeHint() int {
|
|
||||||
if bp == nil || bp.batchSize <= 0 {
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
return bp.batchSize
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchPipelines) rxQueue(i int) chan *overlay.Packet {
|
|
||||||
if bp == nil || !bp.enabled || i < 0 || i >= len(bp.rxQueues) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return bp.rxQueues[i]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchPipelines) txQueue(i int) chan queuedDatagram {
|
|
||||||
if bp == nil || !bp.enabled || i < 0 || i >= len(bp.txQueues) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return bp.txQueues[i]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchPipelines) tunQueue(i int) chan *overlay.Packet {
|
|
||||||
if bp == nil || !bp.enabled || i < 0 || i >= len(bp.tunQueues) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return bp.tunQueues[i]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchPipelines) txQueueLen(i int) int {
|
|
||||||
q := bp.txQueue(i)
|
|
||||||
if q == nil {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return len(q)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchPipelines) tunQueueLen(i int) int {
|
|
||||||
q := bp.tunQueue(i)
|
|
||||||
if q == nil {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
return len(q)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchPipelines) enqueueRx(i int, pkt *overlay.Packet) bool {
|
|
||||||
q := bp.rxQueue(i)
|
|
||||||
if q == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
q <- pkt
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchPipelines) enqueueTx(i int, pkt *overlay.Packet, addr netip.AddrPort) bool {
|
|
||||||
q := bp.txQueue(i)
|
|
||||||
if q == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
q <- queuedDatagram{packet: pkt, addr: addr}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchPipelines) enqueueTun(i int, pkt *overlay.Packet) bool {
|
|
||||||
q := bp.tunQueue(i)
|
|
||||||
if q == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
q <- pkt
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func (bp *batchPipelines) newPacket() *overlay.Packet {
|
|
||||||
if bp == nil || !bp.enabled || bp.pool == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return bp.pool.Get()
|
|
||||||
}
|
|
||||||
97
cert/pem.go
97
cert/pem.go
@@ -1,10 +1,8 @@
|
|||||||
package cert
|
package cert
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/hex"
|
|
||||||
"encoding/pem"
|
"encoding/pem"
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
|
||||||
|
|
||||||
"golang.org/x/crypto/ed25519"
|
"golang.org/x/crypto/ed25519"
|
||||||
)
|
)
|
||||||
@@ -140,101 +138,6 @@ func MarshalSigningPrivateKeyToPEM(curve Curve, b []byte) []byte {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Backward compatibility functions for older API
|
|
||||||
func MarshalX25519PublicKey(b []byte) []byte {
|
|
||||||
return MarshalPublicKeyToPEM(Curve_CURVE25519, b)
|
|
||||||
}
|
|
||||||
|
|
||||||
func MarshalX25519PrivateKey(b []byte) []byte {
|
|
||||||
return MarshalPrivateKeyToPEM(Curve_CURVE25519, b)
|
|
||||||
}
|
|
||||||
|
|
||||||
func MarshalPublicKey(curve Curve, b []byte) []byte {
|
|
||||||
return MarshalPublicKeyToPEM(curve, b)
|
|
||||||
}
|
|
||||||
|
|
||||||
func MarshalPrivateKey(curve Curve, b []byte) []byte {
|
|
||||||
return MarshalPrivateKeyToPEM(curve, b)
|
|
||||||
}
|
|
||||||
|
|
||||||
// NebulaCertificate is a compatibility wrapper for the old API
|
|
||||||
type NebulaCertificate struct {
|
|
||||||
Details NebulaCertificateDetails
|
|
||||||
Signature []byte
|
|
||||||
cert Certificate
|
|
||||||
}
|
|
||||||
|
|
||||||
// NebulaCertificateDetails is a compatibility wrapper for certificate details
|
|
||||||
type NebulaCertificateDetails struct {
|
|
||||||
Name string
|
|
||||||
NotBefore time.Time
|
|
||||||
NotAfter time.Time
|
|
||||||
PublicKey []byte
|
|
||||||
IsCA bool
|
|
||||||
Issuer []byte
|
|
||||||
Curve Curve
|
|
||||||
}
|
|
||||||
|
|
||||||
// UnmarshalNebulaCertificateFromPEM provides backward compatibility with the old API
|
|
||||||
func UnmarshalNebulaCertificateFromPEM(b []byte) (*NebulaCertificate, []byte, error) {
|
|
||||||
c, rest, err := UnmarshalCertificateFromPEM(b)
|
|
||||||
if err != nil {
|
|
||||||
return nil, rest, err
|
|
||||||
}
|
|
||||||
|
|
||||||
issuerBytes, err := func() ([]byte, error) {
|
|
||||||
issuer := c.Issuer()
|
|
||||||
if issuer == "" {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
decoded, err := hex.DecodeString(issuer)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to decode issuer fingerprint: %w", err)
|
|
||||||
}
|
|
||||||
return decoded, nil
|
|
||||||
}()
|
|
||||||
if err != nil {
|
|
||||||
return nil, rest, err
|
|
||||||
}
|
|
||||||
|
|
||||||
pubKey := c.PublicKey()
|
|
||||||
if pubKey != nil {
|
|
||||||
pubKey = append([]byte(nil), pubKey...)
|
|
||||||
}
|
|
||||||
|
|
||||||
sig := c.Signature()
|
|
||||||
if sig != nil {
|
|
||||||
sig = append([]byte(nil), sig...)
|
|
||||||
}
|
|
||||||
|
|
||||||
return &NebulaCertificate{
|
|
||||||
Details: NebulaCertificateDetails{
|
|
||||||
Name: c.Name(),
|
|
||||||
NotBefore: c.NotBefore(),
|
|
||||||
NotAfter: c.NotAfter(),
|
|
||||||
PublicKey: pubKey,
|
|
||||||
IsCA: c.IsCA(),
|
|
||||||
Issuer: issuerBytes,
|
|
||||||
Curve: c.Curve(),
|
|
||||||
},
|
|
||||||
Signature: sig,
|
|
||||||
cert: c,
|
|
||||||
}, rest, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// IssuerString returns the issuer in hex format for compatibility
|
|
||||||
func (n *NebulaCertificate) IssuerString() string {
|
|
||||||
if n.Details.Issuer == nil {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
return hex.EncodeToString(n.Details.Issuer)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Certificate returns the underlying certificate (read-only)
|
|
||||||
func (n *NebulaCertificate) Certificate() Certificate {
|
|
||||||
return n.cert
|
|
||||||
}
|
|
||||||
|
|
||||||
// UnmarshalPrivateKeyFromPEM will try to unmarshal the first pem block in a byte array, returning any non
|
// UnmarshalPrivateKeyFromPEM will try to unmarshal the first pem block in a byte array, returning any non
|
||||||
// consumed data or an error on failure
|
// consumed data or an error on failure
|
||||||
func UnmarshalPrivateKeyFromPEM(b []byte) ([]byte, []byte, Curve, error) {
|
func UnmarshalPrivateKeyFromPEM(b []byte) ([]byte, []byte, Curve, error) {
|
||||||
|
|||||||
@@ -114,33 +114,6 @@ func NewTestCert(v cert.Version, curve cert.Curve, ca cert.Certificate, key []by
|
|||||||
return c, pub, cert.MarshalPrivateKeyToPEM(curve, priv), pem
|
return c, pub, cert.MarshalPrivateKeyToPEM(curve, priv), pem
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewTestCertDifferentVersion(c cert.Certificate, v cert.Version, ca cert.Certificate, key []byte) (cert.Certificate, []byte) {
|
|
||||||
nc := &cert.TBSCertificate{
|
|
||||||
Version: v,
|
|
||||||
Curve: c.Curve(),
|
|
||||||
Name: c.Name(),
|
|
||||||
Networks: c.Networks(),
|
|
||||||
UnsafeNetworks: c.UnsafeNetworks(),
|
|
||||||
Groups: c.Groups(),
|
|
||||||
NotBefore: time.Unix(c.NotBefore().Unix(), 0),
|
|
||||||
NotAfter: time.Unix(c.NotAfter().Unix(), 0),
|
|
||||||
PublicKey: c.PublicKey(),
|
|
||||||
IsCA: false,
|
|
||||||
}
|
|
||||||
|
|
||||||
c, err := nc.Sign(ca, ca.Curve(), key)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
pem, err := c.MarshalPEM()
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return c, pem
|
|
||||||
}
|
|
||||||
|
|
||||||
func X25519Keypair() ([]byte, []byte) {
|
func X25519Keypair() ([]byte, []byte) {
|
||||||
privkey := make([]byte, 32)
|
privkey := make([]byte, 32)
|
||||||
if _, err := io.ReadFull(rand.Reader, privkey); err != nil {
|
if _, err := io.ReadFull(rand.Reader, privkey); err != nil {
|
||||||
|
|||||||
@@ -354,6 +354,7 @@ func (cm *connectionManager) makeTrafficDecision(localIndex uint32, now time.Tim
|
|||||||
|
|
||||||
if mainHostInfo {
|
if mainHostInfo {
|
||||||
decision = tryRehandshake
|
decision = tryRehandshake
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
if cm.shouldSwapPrimary(hostinfo) {
|
if cm.shouldSwapPrimary(hostinfo) {
|
||||||
decision = swapPrimary
|
decision = swapPrimary
|
||||||
@@ -460,10 +461,6 @@ func (cm *connectionManager) shouldSwapPrimary(current *HostInfo) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
crt := cm.intf.pki.getCertState().getCertificate(current.ConnectionState.myCert.Version())
|
crt := cm.intf.pki.getCertState().getCertificate(current.ConnectionState.myCert.Version())
|
||||||
if crt == nil {
|
|
||||||
//my cert was reloaded away. We should definitely swap from this tunnel
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
// If this tunnel is using the latest certificate then we should swap it to primary for a bit and see if things
|
// If this tunnel is using the latest certificate then we should swap it to primary for a bit and see if things
|
||||||
// settle down.
|
// settle down.
|
||||||
return bytes.Equal(current.ConnectionState.myCert.Signature(), crt.Signature())
|
return bytes.Equal(current.ConnectionState.myCert.Signature(), crt.Signature())
|
||||||
@@ -478,34 +475,31 @@ func (cm *connectionManager) swapPrimary(current, primary *HostInfo) {
|
|||||||
cm.hostMap.Unlock()
|
cm.hostMap.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
// isInvalidCertificate decides if we should destroy a tunnel.
|
// isInvalidCertificate will check if we should destroy a tunnel if pki.disconnect_invalid is true and
|
||||||
// returns true if pki.disconnect_invalid is true and the certificate is no longer valid.
|
// the certificate is no longer valid. Block listed certificates will skip the pki.disconnect_invalid
|
||||||
// Blocklisted certificates will skip the pki.disconnect_invalid check and return true.
|
// check and return true.
|
||||||
func (cm *connectionManager) isInvalidCertificate(now time.Time, hostinfo *HostInfo) bool {
|
func (cm *connectionManager) isInvalidCertificate(now time.Time, hostinfo *HostInfo) bool {
|
||||||
remoteCert := hostinfo.GetCert()
|
remoteCert := hostinfo.GetCert()
|
||||||
if remoteCert == nil {
|
if remoteCert == nil {
|
||||||
return false //don't tear down tunnels for handshakes in progress
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
caPool := cm.intf.pki.GetCAPool()
|
caPool := cm.intf.pki.GetCAPool()
|
||||||
err := caPool.VerifyCachedCertificate(now, remoteCert)
|
err := caPool.VerifyCachedCertificate(now, remoteCert)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return false //cert is still valid! yay!
|
return false
|
||||||
} else if err == cert.ErrBlockListed { //avoiding errors.Is for speed
|
}
|
||||||
|
|
||||||
|
if !cm.intf.disconnectInvalid.Load() && err != cert.ErrBlockListed {
|
||||||
// Block listed certificates should always be disconnected
|
// Block listed certificates should always be disconnected
|
||||||
hostinfo.logger(cm.l).WithError(err).
|
return false
|
||||||
WithField("fingerprint", remoteCert.Fingerprint).
|
}
|
||||||
Info("Remote certificate is blocked, tearing down the tunnel")
|
|
||||||
return true
|
|
||||||
} else if cm.intf.disconnectInvalid.Load() {
|
|
||||||
hostinfo.logger(cm.l).WithError(err).
|
hostinfo.logger(cm.l).WithError(err).
|
||||||
WithField("fingerprint", remoteCert.Fingerprint).
|
WithField("fingerprint", remoteCert.Fingerprint).
|
||||||
Info("Remote certificate is no longer valid, tearing down the tunnel")
|
Info("Remote certificate is no longer valid, tearing down the tunnel")
|
||||||
|
|
||||||
return true
|
return true
|
||||||
} else {
|
|
||||||
//if we reach here, the cert is no longer valid, but we're configured to keep tunnels from now-invalid certs open
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cm *connectionManager) sendPunch(hostinfo *HostInfo) {
|
func (cm *connectionManager) sendPunch(hostinfo *HostInfo) {
|
||||||
@@ -536,45 +530,15 @@ func (cm *connectionManager) sendPunch(hostinfo *HostInfo) {
|
|||||||
func (cm *connectionManager) tryRehandshake(hostinfo *HostInfo) {
|
func (cm *connectionManager) tryRehandshake(hostinfo *HostInfo) {
|
||||||
cs := cm.intf.pki.getCertState()
|
cs := cm.intf.pki.getCertState()
|
||||||
curCrt := hostinfo.ConnectionState.myCert
|
curCrt := hostinfo.ConnectionState.myCert
|
||||||
curCrtVersion := curCrt.Version()
|
myCrt := cs.getCertificate(curCrt.Version())
|
||||||
myCrt := cs.getCertificate(curCrtVersion)
|
if curCrt.Version() >= cs.initiatingVersion && bytes.Equal(curCrt.Signature(), myCrt.Signature()) == true {
|
||||||
if myCrt == nil {
|
// The current tunnel is using the latest certificate and version, no need to rehandshake.
|
||||||
cm.l.WithField("vpnAddrs", hostinfo.vpnAddrs).
|
|
||||||
WithField("version", curCrtVersion).
|
|
||||||
WithField("reason", "local certificate removed").
|
|
||||||
Info("Re-handshaking with remote")
|
|
||||||
cm.intf.handshakeManager.StartHandshake(hostinfo.vpnAddrs[0], nil)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
peerCrt := hostinfo.ConnectionState.peerCert
|
|
||||||
if peerCrt != nil && curCrtVersion < peerCrt.Certificate.Version() {
|
|
||||||
// if our certificate version is less than theirs, and we have a matching version available, rehandshake?
|
|
||||||
if cs.getCertificate(peerCrt.Certificate.Version()) != nil {
|
|
||||||
cm.l.WithField("vpnAddrs", hostinfo.vpnAddrs).
|
|
||||||
WithField("version", curCrtVersion).
|
|
||||||
WithField("peerVersion", peerCrt.Certificate.Version()).
|
|
||||||
WithField("reason", "local certificate version lower than peer, attempting to correct").
|
|
||||||
Info("Re-handshaking with remote")
|
|
||||||
cm.intf.handshakeManager.StartHandshake(hostinfo.vpnAddrs[0], func(hh *HandshakeHostInfo) {
|
|
||||||
hh.initiatingVersionOverride = peerCrt.Certificate.Version()
|
|
||||||
})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !bytes.Equal(curCrt.Signature(), myCrt.Signature()) {
|
|
||||||
cm.l.WithField("vpnAddrs", hostinfo.vpnAddrs).
|
cm.l.WithField("vpnAddrs", hostinfo.vpnAddrs).
|
||||||
WithField("reason", "local certificate is not current").
|
WithField("reason", "local certificate is not current").
|
||||||
Info("Re-handshaking with remote")
|
Info("Re-handshaking with remote")
|
||||||
|
|
||||||
cm.intf.handshakeManager.StartHandshake(hostinfo.vpnAddrs[0], nil)
|
cm.intf.handshakeManager.StartHandshake(hostinfo.vpnAddrs[0], nil)
|
||||||
return
|
|
||||||
}
|
|
||||||
if curCrtVersion < cs.initiatingVersion {
|
|
||||||
cm.l.WithField("vpnAddrs", hostinfo.vpnAddrs).
|
|
||||||
WithField("reason", "current cert version < pki.initiatingVersion").
|
|
||||||
Info("Re-handshaking with remote")
|
|
||||||
|
|
||||||
cm.intf.handshakeManager.StartHandshake(hostinfo.vpnAddrs[0], nil)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -129,109 +129,6 @@ func newSimpleServer(v cert.Version, caCrt cert.Certificate, caKey []byte, name
|
|||||||
return control, vpnNetworks, udpAddr, c
|
return control, vpnNetworks, udpAddr, c
|
||||||
}
|
}
|
||||||
|
|
||||||
// newServer creates a nebula instance with fewer assumptions
|
|
||||||
func newServer(caCrt []cert.Certificate, certs []cert.Certificate, key []byte, overrides m) (*nebula.Control, []netip.Prefix, netip.AddrPort, *config.C) {
|
|
||||||
l := NewTestLogger()
|
|
||||||
|
|
||||||
vpnNetworks := certs[len(certs)-1].Networks()
|
|
||||||
|
|
||||||
var udpAddr netip.AddrPort
|
|
||||||
if vpnNetworks[0].Addr().Is4() {
|
|
||||||
budpIp := vpnNetworks[0].Addr().As4()
|
|
||||||
budpIp[1] -= 128
|
|
||||||
udpAddr = netip.AddrPortFrom(netip.AddrFrom4(budpIp), 4242)
|
|
||||||
} else {
|
|
||||||
budpIp := vpnNetworks[0].Addr().As16()
|
|
||||||
// beef for funsies
|
|
||||||
budpIp[2] = 190
|
|
||||||
budpIp[3] = 239
|
|
||||||
udpAddr = netip.AddrPortFrom(netip.AddrFrom16(budpIp), 4242)
|
|
||||||
}
|
|
||||||
|
|
||||||
caStr := ""
|
|
||||||
for _, ca := range caCrt {
|
|
||||||
x, err := ca.MarshalPEM()
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
caStr += string(x)
|
|
||||||
}
|
|
||||||
certStr := ""
|
|
||||||
for _, c := range certs {
|
|
||||||
x, err := c.MarshalPEM()
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
certStr += string(x)
|
|
||||||
}
|
|
||||||
|
|
||||||
mc := m{
|
|
||||||
"pki": m{
|
|
||||||
"ca": caStr,
|
|
||||||
"cert": certStr,
|
|
||||||
"key": string(key),
|
|
||||||
},
|
|
||||||
//"tun": m{"disabled": true},
|
|
||||||
"firewall": m{
|
|
||||||
"outbound": []m{{
|
|
||||||
"proto": "any",
|
|
||||||
"port": "any",
|
|
||||||
"host": "any",
|
|
||||||
}},
|
|
||||||
"inbound": []m{{
|
|
||||||
"proto": "any",
|
|
||||||
"port": "any",
|
|
||||||
"host": "any",
|
|
||||||
}},
|
|
||||||
},
|
|
||||||
//"handshakes": m{
|
|
||||||
// "try_interval": "1s",
|
|
||||||
//},
|
|
||||||
"listen": m{
|
|
||||||
"host": udpAddr.Addr().String(),
|
|
||||||
"port": udpAddr.Port(),
|
|
||||||
},
|
|
||||||
"logging": m{
|
|
||||||
"timestamp_format": fmt.Sprintf("%v 15:04:05.000000", certs[0].Name()),
|
|
||||||
"level": l.Level.String(),
|
|
||||||
},
|
|
||||||
"timers": m{
|
|
||||||
"pending_deletion_interval": 2,
|
|
||||||
"connection_alive_interval": 2,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
if overrides != nil {
|
|
||||||
final := m{}
|
|
||||||
err := mergo.Merge(&final, overrides, mergo.WithAppendSlice)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
err = mergo.Merge(&final, mc, mergo.WithAppendSlice)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
mc = final
|
|
||||||
}
|
|
||||||
|
|
||||||
cb, err := yaml.Marshal(mc)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
c := config.NewC(l)
|
|
||||||
cStr := string(cb)
|
|
||||||
c.LoadString(cStr)
|
|
||||||
|
|
||||||
control, err := nebula.Main(c, false, "e2e-test", l, nil)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return control, vpnNetworks, udpAddr, c
|
|
||||||
}
|
|
||||||
|
|
||||||
type doneCb func()
|
type doneCb func()
|
||||||
|
|
||||||
func deadline(t *testing.T, seconds time.Duration) doneCb {
|
func deadline(t *testing.T, seconds time.Duration) doneCb {
|
||||||
|
|||||||
@@ -4,16 +4,12 @@
|
|||||||
package e2e
|
package e2e
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"net/netip"
|
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/slackhq/nebula/cert"
|
"github.com/slackhq/nebula/cert"
|
||||||
"github.com/slackhq/nebula/cert_test"
|
"github.com/slackhq/nebula/cert_test"
|
||||||
"github.com/slackhq/nebula/e2e/router"
|
"github.com/slackhq/nebula/e2e/router"
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"gopkg.in/yaml.v3"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestDropInactiveTunnels(t *testing.T) {
|
func TestDropInactiveTunnels(t *testing.T) {
|
||||||
@@ -59,262 +55,3 @@ func TestDropInactiveTunnels(t *testing.T) {
|
|||||||
myControl.Stop()
|
myControl.Stop()
|
||||||
theirControl.Stop()
|
theirControl.Stop()
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCertUpgrade(t *testing.T) {
|
|
||||||
// The goal of this test is to ensure the shortest inactivity timeout will close the tunnel on both sides
|
|
||||||
// under ideal conditions
|
|
||||||
ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
|
|
||||||
caB, err := ca.MarshalPEM()
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
ca2, _, caKey2, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
|
|
||||||
|
|
||||||
ca2B, err := ca2.MarshalPEM()
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
caStr := fmt.Sprintf("%s\n%s", caB, ca2B)
|
|
||||||
|
|
||||||
myCert, _, myPrivKey, _ := cert_test.NewTestCert(cert.Version1, cert.Curve_CURVE25519, ca, caKey, "me", time.Now(), time.Now().Add(5*time.Minute), []netip.Prefix{netip.MustParsePrefix("10.128.0.1/24")}, nil, []string{})
|
|
||||||
_, myCert2Pem := cert_test.NewTestCertDifferentVersion(myCert, cert.Version2, ca2, caKey2)
|
|
||||||
|
|
||||||
theirCert, _, theirPrivKey, _ := cert_test.NewTestCert(cert.Version1, cert.Curve_CURVE25519, ca, caKey, "them", time.Now(), time.Now().Add(5*time.Minute), []netip.Prefix{netip.MustParsePrefix("10.128.0.2/24")}, nil, []string{})
|
|
||||||
theirCert2, _ := cert_test.NewTestCertDifferentVersion(theirCert, cert.Version2, ca2, caKey2)
|
|
||||||
|
|
||||||
myControl, myVpnIpNet, myUdpAddr, myC := newServer([]cert.Certificate{ca, ca2}, []cert.Certificate{myCert}, myPrivKey, m{})
|
|
||||||
theirControl, theirVpnIpNet, theirUdpAddr, _ := newServer([]cert.Certificate{ca, ca2}, []cert.Certificate{theirCert, theirCert2}, theirPrivKey, m{})
|
|
||||||
|
|
||||||
// Share our underlay information
|
|
||||||
myControl.InjectLightHouseAddr(theirVpnIpNet[0].Addr(), theirUdpAddr)
|
|
||||||
theirControl.InjectLightHouseAddr(myVpnIpNet[0].Addr(), myUdpAddr)
|
|
||||||
|
|
||||||
// Start the servers
|
|
||||||
myControl.Start()
|
|
||||||
theirControl.Start()
|
|
||||||
|
|
||||||
r := router.NewR(t, myControl, theirControl)
|
|
||||||
defer r.RenderFlow()
|
|
||||||
|
|
||||||
r.Log("Assert the tunnel between me and them works")
|
|
||||||
assertTunnel(t, myVpnIpNet[0].Addr(), theirVpnIpNet[0].Addr(), myControl, theirControl, r)
|
|
||||||
r.Log("yay")
|
|
||||||
//todo ???
|
|
||||||
time.Sleep(1 * time.Second)
|
|
||||||
r.FlushAll()
|
|
||||||
|
|
||||||
mc := m{
|
|
||||||
"pki": m{
|
|
||||||
"ca": caStr,
|
|
||||||
"cert": string(myCert2Pem),
|
|
||||||
"key": string(myPrivKey),
|
|
||||||
},
|
|
||||||
//"tun": m{"disabled": true},
|
|
||||||
"firewall": myC.Settings["firewall"],
|
|
||||||
//"handshakes": m{
|
|
||||||
// "try_interval": "1s",
|
|
||||||
//},
|
|
||||||
"listen": myC.Settings["listen"],
|
|
||||||
"logging": myC.Settings["logging"],
|
|
||||||
"timers": myC.Settings["timers"],
|
|
||||||
}
|
|
||||||
|
|
||||||
cb, err := yaml.Marshal(mc)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
r.Logf("reload new v2-only config")
|
|
||||||
err = myC.ReloadConfigString(string(cb))
|
|
||||||
assert.NoError(t, err)
|
|
||||||
r.Log("yay, spin until their sees it")
|
|
||||||
waitStart := time.Now()
|
|
||||||
for {
|
|
||||||
assertTunnel(t, myVpnIpNet[0].Addr(), theirVpnIpNet[0].Addr(), myControl, theirControl, r)
|
|
||||||
c := theirControl.GetHostInfoByVpnAddr(myVpnIpNet[0].Addr(), false)
|
|
||||||
if c == nil {
|
|
||||||
r.Log("nil")
|
|
||||||
} else {
|
|
||||||
version := c.Cert.Version()
|
|
||||||
r.Logf("version %d", version)
|
|
||||||
if version == cert.Version2 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
since := time.Since(waitStart)
|
|
||||||
if since > time.Second*10 {
|
|
||||||
t.Fatal("Cert should be new by now")
|
|
||||||
}
|
|
||||||
time.Sleep(time.Second)
|
|
||||||
}
|
|
||||||
|
|
||||||
r.RenderHostmaps("Final hostmaps", myControl, theirControl)
|
|
||||||
|
|
||||||
myControl.Stop()
|
|
||||||
theirControl.Stop()
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCertDowngrade(t *testing.T) {
|
|
||||||
// The goal of this test is to ensure the shortest inactivity timeout will close the tunnel on both sides
|
|
||||||
// under ideal conditions
|
|
||||||
ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
|
|
||||||
caB, err := ca.MarshalPEM()
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
ca2, _, caKey2, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
|
|
||||||
|
|
||||||
ca2B, err := ca2.MarshalPEM()
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
caStr := fmt.Sprintf("%s\n%s", caB, ca2B)
|
|
||||||
|
|
||||||
myCert, _, myPrivKey, myCertPem := cert_test.NewTestCert(cert.Version1, cert.Curve_CURVE25519, ca, caKey, "me", time.Now(), time.Now().Add(5*time.Minute), []netip.Prefix{netip.MustParsePrefix("10.128.0.1/24")}, nil, []string{})
|
|
||||||
myCert2, _ := cert_test.NewTestCertDifferentVersion(myCert, cert.Version2, ca2, caKey2)
|
|
||||||
|
|
||||||
theirCert, _, theirPrivKey, _ := cert_test.NewTestCert(cert.Version1, cert.Curve_CURVE25519, ca, caKey, "them", time.Now(), time.Now().Add(5*time.Minute), []netip.Prefix{netip.MustParsePrefix("10.128.0.2/24")}, nil, []string{})
|
|
||||||
theirCert2, _ := cert_test.NewTestCertDifferentVersion(theirCert, cert.Version2, ca2, caKey2)
|
|
||||||
|
|
||||||
myControl, myVpnIpNet, myUdpAddr, myC := newServer([]cert.Certificate{ca, ca2}, []cert.Certificate{myCert2}, myPrivKey, m{})
|
|
||||||
theirControl, theirVpnIpNet, theirUdpAddr, _ := newServer([]cert.Certificate{ca, ca2}, []cert.Certificate{theirCert, theirCert2}, theirPrivKey, m{})
|
|
||||||
|
|
||||||
// Share our underlay information
|
|
||||||
myControl.InjectLightHouseAddr(theirVpnIpNet[0].Addr(), theirUdpAddr)
|
|
||||||
theirControl.InjectLightHouseAddr(myVpnIpNet[0].Addr(), myUdpAddr)
|
|
||||||
|
|
||||||
// Start the servers
|
|
||||||
myControl.Start()
|
|
||||||
theirControl.Start()
|
|
||||||
|
|
||||||
r := router.NewR(t, myControl, theirControl)
|
|
||||||
defer r.RenderFlow()
|
|
||||||
|
|
||||||
r.Log("Assert the tunnel between me and them works")
|
|
||||||
//assertTunnel(t, theirVpnIpNet[0].Addr(), myVpnIpNet[0].Addr(), theirControl, myControl, r)
|
|
||||||
//r.Log("yay")
|
|
||||||
assertTunnel(t, myVpnIpNet[0].Addr(), theirVpnIpNet[0].Addr(), myControl, theirControl, r)
|
|
||||||
r.Log("yay")
|
|
||||||
//todo ???
|
|
||||||
time.Sleep(1 * time.Second)
|
|
||||||
r.FlushAll()
|
|
||||||
|
|
||||||
mc := m{
|
|
||||||
"pki": m{
|
|
||||||
"ca": caStr,
|
|
||||||
"cert": string(myCertPem),
|
|
||||||
"key": string(myPrivKey),
|
|
||||||
},
|
|
||||||
"firewall": myC.Settings["firewall"],
|
|
||||||
"listen": myC.Settings["listen"],
|
|
||||||
"logging": myC.Settings["logging"],
|
|
||||||
"timers": myC.Settings["timers"],
|
|
||||||
}
|
|
||||||
|
|
||||||
cb, err := yaml.Marshal(mc)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
r.Logf("reload new v1-only config")
|
|
||||||
err = myC.ReloadConfigString(string(cb))
|
|
||||||
assert.NoError(t, err)
|
|
||||||
r.Log("yay, spin until their sees it")
|
|
||||||
waitStart := time.Now()
|
|
||||||
for {
|
|
||||||
assertTunnel(t, myVpnIpNet[0].Addr(), theirVpnIpNet[0].Addr(), myControl, theirControl, r)
|
|
||||||
c := theirControl.GetHostInfoByVpnAddr(myVpnIpNet[0].Addr(), false)
|
|
||||||
c2 := myControl.GetHostInfoByVpnAddr(theirVpnIpNet[0].Addr(), false)
|
|
||||||
if c == nil || c2 == nil {
|
|
||||||
r.Log("nil")
|
|
||||||
} else {
|
|
||||||
version := c.Cert.Version()
|
|
||||||
theirVersion := c2.Cert.Version()
|
|
||||||
r.Logf("version %d,%d", version, theirVersion)
|
|
||||||
if version == cert.Version1 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
since := time.Since(waitStart)
|
|
||||||
if since > time.Second*5 {
|
|
||||||
r.Log("it is unusual that the cert is not new yet, but not a failure yet")
|
|
||||||
}
|
|
||||||
if since > time.Second*10 {
|
|
||||||
r.Log("wtf")
|
|
||||||
t.Fatal("Cert should be new by now")
|
|
||||||
}
|
|
||||||
time.Sleep(time.Second)
|
|
||||||
}
|
|
||||||
|
|
||||||
r.RenderHostmaps("Final hostmaps", myControl, theirControl)
|
|
||||||
|
|
||||||
myControl.Stop()
|
|
||||||
theirControl.Stop()
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestCertMismatchCorrection(t *testing.T) {
|
|
||||||
// The goal of this test is to ensure the shortest inactivity timeout will close the tunnel on both sides
|
|
||||||
// under ideal conditions
|
|
||||||
ca, _, caKey, _ := cert_test.NewTestCaCert(cert.Version1, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
|
|
||||||
ca2, _, caKey2, _ := cert_test.NewTestCaCert(cert.Version2, cert.Curve_CURVE25519, time.Now(), time.Now().Add(10*time.Minute), nil, nil, []string{})
|
|
||||||
|
|
||||||
myCert, _, myPrivKey, _ := cert_test.NewTestCert(cert.Version1, cert.Curve_CURVE25519, ca, caKey, "me", time.Now(), time.Now().Add(5*time.Minute), []netip.Prefix{netip.MustParsePrefix("10.128.0.1/24")}, nil, []string{})
|
|
||||||
myCert2, _ := cert_test.NewTestCertDifferentVersion(myCert, cert.Version2, ca2, caKey2)
|
|
||||||
|
|
||||||
theirCert, _, theirPrivKey, _ := cert_test.NewTestCert(cert.Version1, cert.Curve_CURVE25519, ca, caKey, "them", time.Now(), time.Now().Add(5*time.Minute), []netip.Prefix{netip.MustParsePrefix("10.128.0.2/24")}, nil, []string{})
|
|
||||||
theirCert2, _ := cert_test.NewTestCertDifferentVersion(theirCert, cert.Version2, ca2, caKey2)
|
|
||||||
|
|
||||||
myControl, myVpnIpNet, myUdpAddr, _ := newServer([]cert.Certificate{ca, ca2}, []cert.Certificate{myCert2}, myPrivKey, m{})
|
|
||||||
theirControl, theirVpnIpNet, theirUdpAddr, _ := newServer([]cert.Certificate{ca, ca2}, []cert.Certificate{theirCert, theirCert2}, theirPrivKey, m{})
|
|
||||||
|
|
||||||
// Share our underlay information
|
|
||||||
myControl.InjectLightHouseAddr(theirVpnIpNet[0].Addr(), theirUdpAddr)
|
|
||||||
theirControl.InjectLightHouseAddr(myVpnIpNet[0].Addr(), myUdpAddr)
|
|
||||||
|
|
||||||
// Start the servers
|
|
||||||
myControl.Start()
|
|
||||||
theirControl.Start()
|
|
||||||
|
|
||||||
r := router.NewR(t, myControl, theirControl)
|
|
||||||
defer r.RenderFlow()
|
|
||||||
|
|
||||||
r.Log("Assert the tunnel between me and them works")
|
|
||||||
//assertTunnel(t, theirVpnIpNet[0].Addr(), myVpnIpNet[0].Addr(), theirControl, myControl, r)
|
|
||||||
//r.Log("yay")
|
|
||||||
assertTunnel(t, myVpnIpNet[0].Addr(), theirVpnIpNet[0].Addr(), myControl, theirControl, r)
|
|
||||||
r.Log("yay")
|
|
||||||
//todo ???
|
|
||||||
time.Sleep(1 * time.Second)
|
|
||||||
r.FlushAll()
|
|
||||||
|
|
||||||
waitStart := time.Now()
|
|
||||||
for {
|
|
||||||
assertTunnel(t, myVpnIpNet[0].Addr(), theirVpnIpNet[0].Addr(), myControl, theirControl, r)
|
|
||||||
c := theirControl.GetHostInfoByVpnAddr(myVpnIpNet[0].Addr(), false)
|
|
||||||
c2 := myControl.GetHostInfoByVpnAddr(theirVpnIpNet[0].Addr(), false)
|
|
||||||
if c == nil || c2 == nil {
|
|
||||||
r.Log("nil")
|
|
||||||
} else {
|
|
||||||
version := c.Cert.Version()
|
|
||||||
theirVersion := c2.Cert.Version()
|
|
||||||
r.Logf("version %d,%d", version, theirVersion)
|
|
||||||
if version == theirVersion {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
since := time.Since(waitStart)
|
|
||||||
if since > time.Second*5 {
|
|
||||||
r.Log("wtf")
|
|
||||||
}
|
|
||||||
if since > time.Second*10 {
|
|
||||||
r.Log("wtf")
|
|
||||||
t.Fatal("Cert should be new by now")
|
|
||||||
}
|
|
||||||
time.Sleep(time.Second)
|
|
||||||
}
|
|
||||||
|
|
||||||
r.RenderHostmaps("Final hostmaps", myControl, theirControl)
|
|
||||||
|
|
||||||
myControl.Stop()
|
|
||||||
theirControl.Stop()
|
|
||||||
}
|
|
||||||
|
|||||||
10
firewall.go
10
firewall.go
@@ -423,7 +423,7 @@ var ErrNoMatchingRule = errors.New("no matching rule in firewall table")
|
|||||||
|
|
||||||
// Drop returns an error if the packet should be dropped, explaining why. It
|
// Drop returns an error if the packet should be dropped, explaining why. It
|
||||||
// returns nil if the packet should not be dropped.
|
// returns nil if the packet should not be dropped.
|
||||||
func (f *Firewall) Drop(fp firewall.Packet, incoming bool, h *HostInfo, caPool *cert.CAPool, localCache *firewall.ConntrackCache) error {
|
func (f *Firewall) Drop(fp firewall.Packet, incoming bool, h *HostInfo, caPool *cert.CAPool, localCache firewall.ConntrackCache) error {
|
||||||
// Check if we spoke to this tuple, if we did then allow this packet
|
// Check if we spoke to this tuple, if we did then allow this packet
|
||||||
if f.inConns(fp, h, caPool, localCache) {
|
if f.inConns(fp, h, caPool, localCache) {
|
||||||
return nil
|
return nil
|
||||||
@@ -490,10 +490,12 @@ func (f *Firewall) EmitStats() {
|
|||||||
metrics.GetOrRegisterGauge("firewall.rules.hash", nil).Update(int64(f.GetRuleHashFNV()))
|
metrics.GetOrRegisterGauge("firewall.rules.hash", nil).Update(int64(f.GetRuleHashFNV()))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *Firewall) inConns(fp firewall.Packet, h *HostInfo, caPool *cert.CAPool, localCache *firewall.ConntrackCache) bool {
|
func (f *Firewall) inConns(fp firewall.Packet, h *HostInfo, caPool *cert.CAPool, localCache firewall.ConntrackCache) bool {
|
||||||
if localCache != nil && localCache.Has(fp) {
|
if localCache != nil {
|
||||||
|
if _, ok := localCache[fp]; ok {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
}
|
||||||
conntrack := f.Conntrack
|
conntrack := f.Conntrack
|
||||||
conntrack.Lock()
|
conntrack.Lock()
|
||||||
|
|
||||||
@@ -557,7 +559,7 @@ func (f *Firewall) inConns(fp firewall.Packet, h *HostInfo, caPool *cert.CAPool,
|
|||||||
conntrack.Unlock()
|
conntrack.Unlock()
|
||||||
|
|
||||||
if localCache != nil {
|
if localCache != nil {
|
||||||
localCache.Add(fp)
|
localCache[fp] = struct{}{}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
package firewall
|
package firewall
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -10,58 +9,13 @@ import (
|
|||||||
|
|
||||||
// ConntrackCache is used as a local routine cache to know if a given flow
|
// ConntrackCache is used as a local routine cache to know if a given flow
|
||||||
// has been seen in the conntrack table.
|
// has been seen in the conntrack table.
|
||||||
type ConntrackCache struct {
|
type ConntrackCache map[Packet]struct{}
|
||||||
mu sync.Mutex
|
|
||||||
entries map[Packet]struct{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func newConntrackCache() *ConntrackCache {
|
|
||||||
return &ConntrackCache{entries: make(map[Packet]struct{})}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *ConntrackCache) Has(p Packet) bool {
|
|
||||||
if c == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
c.mu.Lock()
|
|
||||||
_, ok := c.entries[p]
|
|
||||||
c.mu.Unlock()
|
|
||||||
return ok
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *ConntrackCache) Add(p Packet) {
|
|
||||||
if c == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
c.mu.Lock()
|
|
||||||
c.entries[p] = struct{}{}
|
|
||||||
c.mu.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *ConntrackCache) Len() int {
|
|
||||||
if c == nil {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
c.mu.Lock()
|
|
||||||
l := len(c.entries)
|
|
||||||
c.mu.Unlock()
|
|
||||||
return l
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *ConntrackCache) Reset(capHint int) {
|
|
||||||
if c == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
c.mu.Lock()
|
|
||||||
c.entries = make(map[Packet]struct{}, capHint)
|
|
||||||
c.mu.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
type ConntrackCacheTicker struct {
|
type ConntrackCacheTicker struct {
|
||||||
cacheV uint64
|
cacheV uint64
|
||||||
cacheTick atomic.Uint64
|
cacheTick atomic.Uint64
|
||||||
|
|
||||||
cache *ConntrackCache
|
cache ConntrackCache
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewConntrackCacheTicker(d time.Duration) *ConntrackCacheTicker {
|
func NewConntrackCacheTicker(d time.Duration) *ConntrackCacheTicker {
|
||||||
@@ -69,7 +23,9 @@ func NewConntrackCacheTicker(d time.Duration) *ConntrackCacheTicker {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
c := &ConntrackCacheTicker{cache: newConntrackCache()}
|
c := &ConntrackCacheTicker{
|
||||||
|
cache: ConntrackCache{},
|
||||||
|
}
|
||||||
|
|
||||||
go c.tick(d)
|
go c.tick(d)
|
||||||
|
|
||||||
@@ -85,17 +41,17 @@ func (c *ConntrackCacheTicker) tick(d time.Duration) {
|
|||||||
|
|
||||||
// Get checks if the cache ticker has moved to the next version before returning
|
// Get checks if the cache ticker has moved to the next version before returning
|
||||||
// the map. If it has moved, we reset the map.
|
// the map. If it has moved, we reset the map.
|
||||||
func (c *ConntrackCacheTicker) Get(l *logrus.Logger) *ConntrackCache {
|
func (c *ConntrackCacheTicker) Get(l *logrus.Logger) ConntrackCache {
|
||||||
if c == nil {
|
if c == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if tick := c.cacheTick.Load(); tick != c.cacheV {
|
if tick := c.cacheTick.Load(); tick != c.cacheV {
|
||||||
c.cacheV = tick
|
c.cacheV = tick
|
||||||
if ll := c.cache.Len(); ll > 0 {
|
if ll := len(c.cache); ll > 0 {
|
||||||
if l.Level == logrus.DebugLevel {
|
if l.Level == logrus.DebugLevel {
|
||||||
l.WithField("len", ll).Debug("resetting conntrack cache")
|
l.WithField("len", ll).Debug("resetting conntrack cache")
|
||||||
}
|
}
|
||||||
c.cache.Reset(ll)
|
c.cache = make(ConntrackCache, ll)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
6
go.mod
6
go.mod
@@ -29,11 +29,11 @@ require (
|
|||||||
golang.org/x/sys v0.37.0
|
golang.org/x/sys v0.37.0
|
||||||
golang.org/x/term v0.36.0
|
golang.org/x/term v0.36.0
|
||||||
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2
|
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2
|
||||||
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b
|
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb
|
||||||
golang.zx2c4.com/wireguard/windows v0.5.3
|
golang.zx2c4.com/wireguard/windows v0.5.3
|
||||||
google.golang.org/protobuf v1.36.8
|
google.golang.org/protobuf v1.36.8
|
||||||
gopkg.in/yaml.v3 v3.0.1
|
gopkg.in/yaml.v3 v3.0.1
|
||||||
gvisor.dev/gvisor v0.0.0-20240423190808-9d7a357edefe
|
gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
@@ -49,6 +49,6 @@ require (
|
|||||||
github.com/vishvananda/netns v0.0.5 // indirect
|
github.com/vishvananda/netns v0.0.5 // indirect
|
||||||
go.yaml.in/yaml/v2 v2.4.2 // indirect
|
go.yaml.in/yaml/v2 v2.4.2 // indirect
|
||||||
golang.org/x/mod v0.24.0 // indirect
|
golang.org/x/mod v0.24.0 // indirect
|
||||||
golang.org/x/time v0.5.0 // indirect
|
golang.org/x/time v0.7.0 // indirect
|
||||||
golang.org/x/tools v0.33.0 // indirect
|
golang.org/x/tools v0.33.0 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
12
go.sum
12
go.sum
@@ -215,8 +215,8 @@ golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss=
|
|||||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
|
golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ=
|
||||||
golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
|
||||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||||
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||||
@@ -230,8 +230,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
|
|||||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 h1:B82qJJgjvYKsXS9jeunTOisW56dUokqW/FOteYJJ/yg=
|
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 h1:B82qJJgjvYKsXS9jeunTOisW56dUokqW/FOteYJJ/yg=
|
||||||
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2/go.mod h1:deeaetjYA+DHMHg+sMSMI58GrEteJUUzzw7en6TJQcI=
|
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2/go.mod h1:deeaetjYA+DHMHg+sMSMI58GrEteJUUzzw7en6TJQcI=
|
||||||
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b h1:J1CaxgLerRR5lgx3wnr6L04cJFbWoceSK9JWBdglINo=
|
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb h1:whnFRlWMcXI9d+ZbWg+4sHnLp52d5yiIPUxMBSt4X9A=
|
||||||
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b/go.mod h1:tqur9LnfstdR9ep2LaJT4lFUl0EjlHtge+gAjmsHUG4=
|
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb/go.mod h1:rpwXGsirqLqN2L0JDJQlwOboGHmptD5ZD6T2VmcqhTw=
|
||||||
golang.zx2c4.com/wireguard/windows v0.5.3 h1:On6j2Rpn3OEMXqBq00QEDC7bWSZrPIHKIus8eIuExIE=
|
golang.zx2c4.com/wireguard/windows v0.5.3 h1:On6j2Rpn3OEMXqBq00QEDC7bWSZrPIHKIus8eIuExIE=
|
||||||
golang.zx2c4.com/wireguard/windows v0.5.3/go.mod h1:9TEe8TJmtwyQebdFwAkEWOPr3prrtqm+REGFifP60hI=
|
golang.zx2c4.com/wireguard/windows v0.5.3/go.mod h1:9TEe8TJmtwyQebdFwAkEWOPr3prrtqm+REGFifP60hI=
|
||||||
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
|
||||||
@@ -257,5 +257,5 @@ gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
|||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gvisor.dev/gvisor v0.0.0-20240423190808-9d7a357edefe h1:fre4i6mv4iBuz5lCMOzHD1rH1ljqHWSICFmZRbbgp3g=
|
gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c h1:m/r7OM+Y2Ty1sgBQ7Qb27VgIMBW8ZZhT4gLnUyDIhzI=
|
||||||
gvisor.dev/gvisor v0.0.0-20240423190808-9d7a357edefe/go.mod h1:sxc3Uvk/vHcd3tj7/DHVBoR5wvWT/MmRq2pj7HRJnwU=
|
gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c/go.mod h1:3r5CMtNQMKIvBlrmM9xWUNamjKBYPOWyXOjmg5Kts3g=
|
||||||
|
|||||||
@@ -23,19 +23,15 @@ func ixHandshakeStage0(f *Interface, hh *HandshakeHostInfo) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we're connecting to a v6 address we must use a v2 cert
|
||||||
cs := f.pki.getCertState()
|
cs := f.pki.getCertState()
|
||||||
v := cs.initiatingVersion
|
v := cs.initiatingVersion
|
||||||
if hh.initiatingVersionOverride != cert.VersionPre1 {
|
|
||||||
v = hh.initiatingVersionOverride
|
|
||||||
} else if v < cert.Version2 {
|
|
||||||
// If we're connecting to a v6 address we should encourage use of a V2 cert
|
|
||||||
for _, a := range hh.hostinfo.vpnAddrs {
|
for _, a := range hh.hostinfo.vpnAddrs {
|
||||||
if a.Is6() {
|
if a.Is6() {
|
||||||
v = cert.Version2
|
v = cert.Version2
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
crt := cs.getCertificate(v)
|
crt := cs.getCertificate(v)
|
||||||
if crt == nil {
|
if crt == nil {
|
||||||
@@ -52,7 +48,6 @@ func ixHandshakeStage0(f *Interface, hh *HandshakeHostInfo) bool {
|
|||||||
WithField("handshake", m{"stage": 0, "style": "ix_psk0"}).
|
WithField("handshake", m{"stage": 0, "style": "ix_psk0"}).
|
||||||
WithField("certVersion", v).
|
WithField("certVersion", v).
|
||||||
Error("Unable to handshake with host because no certificate handshake bytes is available")
|
Error("Unable to handshake with host because no certificate handshake bytes is available")
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ci, err := NewConnectionState(f.l, cs, crt, true, noise.HandshakeIX)
|
ci, err := NewConnectionState(f.l, cs, crt, true, noise.HandshakeIX)
|
||||||
@@ -108,7 +103,6 @@ func ixHandshakeStage1(f *Interface, addr netip.AddrPort, via *ViaSender, packet
|
|||||||
WithField("handshake", m{"stage": 0, "style": "ix_psk0"}).
|
WithField("handshake", m{"stage": 0, "style": "ix_psk0"}).
|
||||||
WithField("certVersion", cs.initiatingVersion).
|
WithField("certVersion", cs.initiatingVersion).
|
||||||
Error("Unable to handshake with host because no certificate is available")
|
Error("Unable to handshake with host because no certificate is available")
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ci, err := NewConnectionState(f.l, cs, crt, false, noise.HandshakeIX)
|
ci, err := NewConnectionState(f.l, cs, crt, false, noise.HandshakeIX)
|
||||||
@@ -149,8 +143,8 @@ func ixHandshakeStage1(f *Interface, addr netip.AddrPort, via *ViaSender, packet
|
|||||||
|
|
||||||
remoteCert, err := f.pki.GetCAPool().VerifyCertificate(time.Now(), rc)
|
remoteCert, err := f.pki.GetCAPool().VerifyCertificate(time.Now(), rc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fp, fperr := rc.Fingerprint()
|
fp, err := rc.Fingerprint()
|
||||||
if fperr != nil {
|
if err != nil {
|
||||||
fp = "<error generating certificate fingerprint>"
|
fp = "<error generating certificate fingerprint>"
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -169,19 +163,16 @@ func ixHandshakeStage1(f *Interface, addr netip.AddrPort, via *ViaSender, packet
|
|||||||
|
|
||||||
if remoteCert.Certificate.Version() != ci.myCert.Version() {
|
if remoteCert.Certificate.Version() != ci.myCert.Version() {
|
||||||
// We started off using the wrong certificate version, lets see if we can match the version that was sent to us
|
// We started off using the wrong certificate version, lets see if we can match the version that was sent to us
|
||||||
myCertOtherVersion := cs.getCertificate(remoteCert.Certificate.Version())
|
rc := cs.getCertificate(remoteCert.Certificate.Version())
|
||||||
if myCertOtherVersion == nil {
|
if rc == nil {
|
||||||
if f.l.Level >= logrus.DebugLevel {
|
f.l.WithError(err).WithField("udpAddr", addr).
|
||||||
f.l.WithError(err).WithFields(m{
|
WithField("handshake", m{"stage": 1, "style": "ix_psk0"}).WithField("cert", remoteCert).
|
||||||
"udpAddr": addr,
|
Info("Unable to handshake with host due to missing certificate version")
|
||||||
"handshake": m{"stage": 1, "style": "ix_psk0"},
|
return
|
||||||
"cert": remoteCert,
|
|
||||||
}).Debug("Might be unable to handshake with host due to missing certificate version")
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// Record the certificate we are actually using
|
// Record the certificate we are actually using
|
||||||
ci.myCert = myCertOtherVersion
|
ci.myCert = rc
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(remoteCert.Certificate.Networks()) == 0 {
|
if len(remoteCert.Certificate.Networks()) == 0 {
|
||||||
|
|||||||
@@ -70,7 +70,6 @@ type HandshakeHostInfo struct {
|
|||||||
|
|
||||||
startTime time.Time // Time that we first started trying with this handshake
|
startTime time.Time // Time that we first started trying with this handshake
|
||||||
ready bool // Is the handshake ready
|
ready bool // Is the handshake ready
|
||||||
initiatingVersionOverride cert.Version // Should we use a non-default cert version for this handshake?
|
|
||||||
counter int64 // How many attempts have we made so far
|
counter int64 // How many attempts have we made so far
|
||||||
lastRemotes []netip.AddrPort // Remotes that we sent to during the previous attempt
|
lastRemotes []netip.AddrPort // Remotes that we sent to during the previous attempt
|
||||||
packetStore []*cachedPacket // A set of packets to be transmitted once the handshake completes
|
packetStore []*cachedPacket // A set of packets to be transmitted once the handshake completes
|
||||||
|
|||||||
92
inside.go
92
inside.go
@@ -2,18 +2,16 @@ package nebula
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/slackhq/nebula/firewall"
|
"github.com/slackhq/nebula/firewall"
|
||||||
"github.com/slackhq/nebula/header"
|
"github.com/slackhq/nebula/header"
|
||||||
"github.com/slackhq/nebula/iputil"
|
"github.com/slackhq/nebula/iputil"
|
||||||
"github.com/slackhq/nebula/noiseutil"
|
"github.com/slackhq/nebula/noiseutil"
|
||||||
"github.com/slackhq/nebula/overlay"
|
|
||||||
"github.com/slackhq/nebula/routing"
|
"github.com/slackhq/nebula/routing"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet, nb, out []byte, q int, localCache *firewall.ConntrackCache) {
|
func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet, nb, out []byte, q int, localCache firewall.ConntrackCache) {
|
||||||
err := newPacket(packet, false, fwPacket)
|
err := newPacket(packet, false, fwPacket)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if f.l.Level >= logrus.DebugLevel {
|
if f.l.Level >= logrus.DebugLevel {
|
||||||
@@ -35,8 +33,7 @@ func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet
|
|||||||
// routes packets from the Nebula addr to the Nebula addr through the Nebula
|
// routes packets from the Nebula addr to the Nebula addr through the Nebula
|
||||||
// TUN device.
|
// TUN device.
|
||||||
if immediatelyForwardToSelf {
|
if immediatelyForwardToSelf {
|
||||||
_, err := f.readers[q].Write(packet)
|
if err := f.writeTun(q, packet); err != nil {
|
||||||
if err != nil {
|
|
||||||
f.l.WithError(err).Error("Failed to forward to tun")
|
f.l.WithError(err).Error("Failed to forward to tun")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -93,8 +90,7 @@ func (f *Interface) rejectInside(packet []byte, out []byte, q int) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err := f.readers[q].Write(out)
|
if err := f.writeTun(q, out); err != nil {
|
||||||
if err != nil {
|
|
||||||
f.l.WithError(err).Error("Failed to write to tun")
|
f.l.WithError(err).Error("Failed to write to tun")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -337,21 +333,9 @@ func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType
|
|||||||
if ci.eKey == nil {
|
if ci.eKey == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
target := remote
|
useRelay := !remote.IsValid() && !hostinfo.remote.IsValid()
|
||||||
if !target.IsValid() {
|
|
||||||
target = hostinfo.remote
|
|
||||||
}
|
|
||||||
useRelay := !target.IsValid()
|
|
||||||
fullOut := out
|
fullOut := out
|
||||||
|
|
||||||
var pkt *overlay.Packet
|
|
||||||
if !useRelay && f.batches.Enabled() {
|
|
||||||
pkt = f.batches.newPacket()
|
|
||||||
if pkt != nil {
|
|
||||||
out = pkt.Payload()[:0]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if useRelay {
|
if useRelay {
|
||||||
if len(out) < header.Len {
|
if len(out) < header.Len {
|
||||||
// out always has a capacity of mtu, but not always a length greater than the header.Len.
|
// out always has a capacity of mtu, but not always a length greater than the header.Len.
|
||||||
@@ -385,62 +369,31 @@ func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType
|
|||||||
}
|
}
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
if len(p) > 0 && slicesOverlap(out, p) {
|
|
||||||
tmp := make([]byte, len(p))
|
|
||||||
copy(tmp, p)
|
|
||||||
p = tmp
|
|
||||||
}
|
|
||||||
out, err = ci.eKey.EncryptDanger(out, out, p, c, nb)
|
out, err = ci.eKey.EncryptDanger(out, out, p, c, nb)
|
||||||
if noiseutil.EncryptLockNeeded {
|
if noiseutil.EncryptLockNeeded {
|
||||||
ci.writeLock.Unlock()
|
ci.writeLock.Unlock()
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if pkt != nil {
|
|
||||||
pkt.Release()
|
|
||||||
}
|
|
||||||
hostinfo.logger(f.l).WithError(err).
|
hostinfo.logger(f.l).WithError(err).
|
||||||
WithField("udpAddr", target).WithField("counter", c).
|
WithField("udpAddr", remote).WithField("counter", c).
|
||||||
WithField("attemptedCounter", c).
|
WithField("attemptedCounter", c).
|
||||||
Error("Failed to encrypt outgoing packet")
|
Error("Failed to encrypt outgoing packet")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if target.IsValid() {
|
if remote.IsValid() {
|
||||||
if pkt != nil {
|
err = f.writers[q].WriteTo(out, remote)
|
||||||
pkt.Len = len(out)
|
if err != nil {
|
||||||
if f.l.Level >= logrus.DebugLevel {
|
hostinfo.logger(f.l).WithError(err).
|
||||||
f.l.WithFields(logrus.Fields{
|
WithField("udpAddr", remote).Error("Failed to write outgoing packet")
|
||||||
"queue": q,
|
|
||||||
"dest": target,
|
|
||||||
"payload_len": pkt.Len,
|
|
||||||
"use_batches": true,
|
|
||||||
"remote_index": hostinfo.remoteIndexId,
|
|
||||||
}).Debug("enqueueing packet to UDP batch queue")
|
|
||||||
}
|
}
|
||||||
if f.tryQueuePacket(q, pkt, target) {
|
} else if hostinfo.remote.IsValid() {
|
||||||
return
|
err = f.writers[q].WriteTo(out, hostinfo.remote)
|
||||||
|
if err != nil {
|
||||||
|
hostinfo.logger(f.l).WithError(err).
|
||||||
|
WithField("udpAddr", remote).Error("Failed to write outgoing packet")
|
||||||
}
|
}
|
||||||
if f.l.Level >= logrus.DebugLevel {
|
} else {
|
||||||
f.l.WithFields(logrus.Fields{
|
|
||||||
"queue": q,
|
|
||||||
"dest": target,
|
|
||||||
}).Debug("failed to enqueue packet; falling back to immediate send")
|
|
||||||
}
|
|
||||||
f.writeImmediatePacket(q, pkt, target, hostinfo)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if f.tryQueueDatagram(q, out, target) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
f.writeImmediate(q, out, target, hostinfo)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// fall back to relay path
|
|
||||||
if pkt != nil {
|
|
||||||
pkt.Release()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to send via a relay
|
// Try to send via a relay
|
||||||
for _, relayIP := range hostinfo.relayState.CopyRelayIps() {
|
for _, relayIP := range hostinfo.relayState.CopyRelayIps() {
|
||||||
relayHostInfo, relay, err := f.hostMap.QueryVpnAddrsRelayFor(hostinfo.vpnAddrs, relayIP)
|
relayHostInfo, relay, err := f.hostMap.QueryVpnAddrsRelayFor(hostinfo.vpnAddrs, relayIP)
|
||||||
@@ -453,17 +406,4 @@ func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// slicesOverlap reports whether the two byte slices share any portion of memory.
|
|
||||||
// cipher.AEAD.Seal requires plaintext and dst to live in disjoint regions.
|
|
||||||
func slicesOverlap(a, b []byte) bool {
|
|
||||||
if len(a) == 0 || len(b) == 0 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
aStart := uintptr(unsafe.Pointer(&a[0]))
|
|
||||||
aEnd := aStart + uintptr(len(a))
|
|
||||||
bStart := uintptr(unsafe.Pointer(&b[0]))
|
|
||||||
bEnd := bStart + uintptr(len(b))
|
|
||||||
return aStart < bEnd && bStart < aEnd
|
|
||||||
}
|
}
|
||||||
|
|||||||
745
interface.go
745
interface.go
@@ -8,7 +8,6 @@ import (
|
|||||||
"net/netip"
|
"net/netip"
|
||||||
"os"
|
"os"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -22,13 +21,7 @@ import (
|
|||||||
"github.com/slackhq/nebula/udp"
|
"github.com/slackhq/nebula/udp"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const mtu = 9001
|
||||||
mtu = 9001
|
|
||||||
defaultGSOFlushInterval = 150 * time.Microsecond
|
|
||||||
defaultBatchQueueDepthFactor = 4
|
|
||||||
defaultGSOMaxSegments = 8
|
|
||||||
maxKernelGSOSegments = 64
|
|
||||||
)
|
|
||||||
|
|
||||||
type InterfaceConfig struct {
|
type InterfaceConfig struct {
|
||||||
HostMap *HostMap
|
HostMap *HostMap
|
||||||
@@ -43,9 +36,6 @@ type InterfaceConfig struct {
|
|||||||
connectionManager *connectionManager
|
connectionManager *connectionManager
|
||||||
DropLocalBroadcast bool
|
DropLocalBroadcast bool
|
||||||
DropMulticast bool
|
DropMulticast bool
|
||||||
EnableGSO bool
|
|
||||||
EnableGRO bool
|
|
||||||
GSOMaxSegments int
|
|
||||||
routines int
|
routines int
|
||||||
MessageMetrics *MessageMetrics
|
MessageMetrics *MessageMetrics
|
||||||
version string
|
version string
|
||||||
@@ -57,8 +47,7 @@ type InterfaceConfig struct {
|
|||||||
reQueryWait time.Duration
|
reQueryWait time.Duration
|
||||||
|
|
||||||
ConntrackCacheTimeout time.Duration
|
ConntrackCacheTimeout time.Duration
|
||||||
BatchFlushInterval time.Duration
|
batchSize int
|
||||||
BatchQueueDepth int
|
|
||||||
l *logrus.Logger
|
l *logrus.Logger
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -96,20 +85,10 @@ type Interface struct {
|
|||||||
version string
|
version string
|
||||||
|
|
||||||
conntrackCacheTimeout time.Duration
|
conntrackCacheTimeout time.Duration
|
||||||
batchQueueDepth int
|
batchSize int
|
||||||
enableGSO bool
|
|
||||||
enableGRO bool
|
|
||||||
gsoMaxSegments int
|
|
||||||
batchUDPQueueGauge metrics.Gauge
|
|
||||||
batchUDPFlushCounter metrics.Counter
|
|
||||||
batchTunQueueGauge metrics.Gauge
|
|
||||||
batchTunFlushCounter metrics.Counter
|
|
||||||
batchFlushInterval atomic.Int64
|
|
||||||
sendSem chan struct{}
|
|
||||||
|
|
||||||
writers []udp.Conn
|
writers []udp.Conn
|
||||||
readers []io.ReadWriteCloser
|
readers []io.ReadWriteCloser
|
||||||
batches batchPipelines
|
|
||||||
|
|
||||||
metricHandshakes metrics.Histogram
|
metricHandshakes metrics.Histogram
|
||||||
messageMetrics *MessageMetrics
|
messageMetrics *MessageMetrics
|
||||||
@@ -133,6 +112,16 @@ type EncWriter interface {
|
|||||||
GetCertState() *CertState
|
GetCertState() *CertState
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BatchReader is an interface for readers that support vectorized packet reading
|
||||||
|
type BatchReader interface {
|
||||||
|
BatchRead(buffers [][]byte, sizes []int) (int, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BatchWriter is an interface for writers that support vectorized packet writing
|
||||||
|
type BatchWriter interface {
|
||||||
|
BatchWrite([][]byte) (int, error)
|
||||||
|
}
|
||||||
|
|
||||||
type sendRecvErrorConfig uint8
|
type sendRecvErrorConfig uint8
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -184,22 +173,6 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
|
|||||||
return nil, errors.New("no connection manager")
|
return nil, errors.New("no connection manager")
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.GSOMaxSegments <= 0 {
|
|
||||||
c.GSOMaxSegments = defaultGSOMaxSegments
|
|
||||||
}
|
|
||||||
if c.GSOMaxSegments > maxKernelGSOSegments {
|
|
||||||
c.GSOMaxSegments = maxKernelGSOSegments
|
|
||||||
}
|
|
||||||
if c.BatchQueueDepth <= 0 {
|
|
||||||
c.BatchQueueDepth = c.routines * defaultBatchQueueDepthFactor
|
|
||||||
}
|
|
||||||
if c.BatchFlushInterval < 0 {
|
|
||||||
c.BatchFlushInterval = 0
|
|
||||||
}
|
|
||||||
if c.BatchFlushInterval == 0 && c.EnableGSO {
|
|
||||||
c.BatchFlushInterval = defaultGSOFlushInterval
|
|
||||||
}
|
|
||||||
|
|
||||||
cs := c.pki.getCertState()
|
cs := c.pki.getCertState()
|
||||||
ifce := &Interface{
|
ifce := &Interface{
|
||||||
pki: c.pki,
|
pki: c.pki,
|
||||||
@@ -225,10 +198,7 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
|
|||||||
relayManager: c.relayManager,
|
relayManager: c.relayManager,
|
||||||
connectionManager: c.connectionManager,
|
connectionManager: c.connectionManager,
|
||||||
conntrackCacheTimeout: c.ConntrackCacheTimeout,
|
conntrackCacheTimeout: c.ConntrackCacheTimeout,
|
||||||
batchQueueDepth: c.BatchQueueDepth,
|
batchSize: c.batchSize,
|
||||||
enableGSO: c.EnableGSO,
|
|
||||||
enableGRO: c.EnableGRO,
|
|
||||||
gsoMaxSegments: c.GSOMaxSegments,
|
|
||||||
|
|
||||||
metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
|
metricHandshakes: metrics.GetOrRegisterHistogram("handshakes", nil, metrics.NewExpDecaySample(1028, 0.015)),
|
||||||
messageMetrics: c.MessageMetrics,
|
messageMetrics: c.MessageMetrics,
|
||||||
@@ -241,25 +211,8 @@ func NewInterface(ctx context.Context, c *InterfaceConfig) (*Interface, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ifce.tryPromoteEvery.Store(c.tryPromoteEvery)
|
ifce.tryPromoteEvery.Store(c.tryPromoteEvery)
|
||||||
ifce.batchUDPQueueGauge = metrics.GetOrRegisterGauge("batch.udp.queue_depth", nil)
|
|
||||||
ifce.batchUDPFlushCounter = metrics.GetOrRegisterCounter("batch.udp.flushes", nil)
|
|
||||||
ifce.batchTunQueueGauge = metrics.GetOrRegisterGauge("batch.tun.queue_depth", nil)
|
|
||||||
ifce.batchTunFlushCounter = metrics.GetOrRegisterCounter("batch.tun.flushes", nil)
|
|
||||||
ifce.batchFlushInterval.Store(int64(c.BatchFlushInterval))
|
|
||||||
ifce.sendSem = make(chan struct{}, c.routines)
|
|
||||||
ifce.batches.init(c.Inside, c.routines, c.BatchQueueDepth, c.GSOMaxSegments)
|
|
||||||
ifce.reQueryEvery.Store(c.reQueryEvery)
|
ifce.reQueryEvery.Store(c.reQueryEvery)
|
||||||
ifce.reQueryWait.Store(int64(c.reQueryWait))
|
ifce.reQueryWait.Store(int64(c.reQueryWait))
|
||||||
if c.l.Level >= logrus.DebugLevel {
|
|
||||||
c.l.WithFields(logrus.Fields{
|
|
||||||
"enableGSO": c.EnableGSO,
|
|
||||||
"enableGRO": c.EnableGRO,
|
|
||||||
"gsoMaxSegments": c.GSOMaxSegments,
|
|
||||||
"batchQueueDepth": c.BatchQueueDepth,
|
|
||||||
"batchFlush": c.BatchFlushInterval,
|
|
||||||
"batching": ifce.batches.Enabled(),
|
|
||||||
}).Debug("initialized batch pipelines")
|
|
||||||
}
|
|
||||||
|
|
||||||
ifce.connectionManager.intf = ifce
|
ifce.connectionManager.intf = ifce
|
||||||
|
|
||||||
@@ -308,18 +261,6 @@ func (f *Interface) run() {
|
|||||||
go f.listenOut(i)
|
go f.listenOut(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
if f.l.Level >= logrus.DebugLevel {
|
|
||||||
f.l.WithField("batching", f.batches.Enabled()).Debug("starting interface run loops")
|
|
||||||
}
|
|
||||||
|
|
||||||
if f.batches.Enabled() {
|
|
||||||
for i := 0; i < f.routines; i++ {
|
|
||||||
go f.runInsideBatchWorker(i)
|
|
||||||
go f.runTunWriteQueue(i)
|
|
||||||
go f.runSendQueue(i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Launch n queues to read packets from tun dev
|
// Launch n queues to read packets from tun dev
|
||||||
for i := 0; i < f.routines; i++ {
|
for i := 0; i < f.routines; i++ {
|
||||||
go f.listenIn(f.readers[i], i)
|
go f.listenIn(f.readers[i], i)
|
||||||
@@ -341,7 +282,7 @@ func (f *Interface) listenOut(i int) {
|
|||||||
plaintext := make([]byte, udp.MTU)
|
plaintext := make([]byte, udp.MTU)
|
||||||
h := &header.H{}
|
h := &header.H{}
|
||||||
fwPacket := &firewall.Packet{}
|
fwPacket := &firewall.Packet{}
|
||||||
nb := make([]byte, 12, 12)
|
nb := make([]byte, 12)
|
||||||
|
|
||||||
li.ListenOut(func(fromUdpAddr netip.AddrPort, payload []byte) {
|
li.ListenOut(func(fromUdpAddr netip.AddrPort, payload []byte) {
|
||||||
f.readOutsidePackets(fromUdpAddr, nil, plaintext[:0], payload, h, fwPacket, lhh, nb, i, ctCache.Get(f.l))
|
f.readOutsidePackets(fromUdpAddr, nil, plaintext[:0], payload, h, fwPacket, lhh, nb, i, ctCache.Get(f.l))
|
||||||
@@ -351,17 +292,16 @@ func (f *Interface) listenOut(i int) {
|
|||||||
func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
|
func (f *Interface) listenIn(reader io.ReadWriteCloser, i int) {
|
||||||
runtime.LockOSThread()
|
runtime.LockOSThread()
|
||||||
|
|
||||||
if f.batches.Enabled() {
|
// Check if reader supports batch operations
|
||||||
if br, ok := reader.(overlay.BatchReader); ok {
|
if batchReader, ok := reader.(BatchReader); ok {
|
||||||
f.listenInBatchLocked(reader, br, i)
|
err := f.listenInBatch(batchReader, i)
|
||||||
|
if err != nil {
|
||||||
|
f.l.WithError(err).Error("Fatal error in batch packet reader, exiting goroutine")
|
||||||
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
f.listenInLegacyLocked(reader, i)
|
// Fall back to single-packet mode
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) listenInLegacyLocked(reader io.ReadWriteCloser, i int) {
|
|
||||||
packet := make([]byte, mtu)
|
packet := make([]byte, mtu)
|
||||||
out := make([]byte, mtu)
|
out := make([]byte, mtu)
|
||||||
fwPacket := &firewall.Packet{}
|
fwPacket := &firewall.Packet{}
|
||||||
@@ -376,588 +316,83 @@ func (f *Interface) listenInLegacyLocked(reader io.ReadWriteCloser, i int) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
f.l.WithError(err).Error("Error while reading outbound packet")
|
f.l.WithError(err).Error("Fatal error while reading outbound packet, exiting goroutine")
|
||||||
// This only seems to happen when something fatal happens to the fd, so exit.
|
return
|
||||||
os.Exit(2)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
f.consumeInsidePacket(packet[:n], fwPacket, nb, out, i, conntrackCache.Get(f.l))
|
f.consumeInsidePacket(packet[:n], fwPacket, nb, out, i, conntrackCache.Get(f.l))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *Interface) listenInBatchLocked(raw io.ReadWriteCloser, reader overlay.BatchReader, i int) {
|
// listenInBatch handles vectorized packet reading for improved performance
|
||||||
pool := f.batches.Pool()
|
func (f *Interface) listenInBatch(reader BatchReader, i int) error {
|
||||||
if pool == nil {
|
// Allocate per-packet state and buffers for batch reading
|
||||||
f.l.Warn("batch pipeline enabled without an allocated pool; falling back to single-packet reads")
|
batchSize := f.batchSize
|
||||||
f.listenInLegacyLocked(raw, i)
|
if batchSize <= 0 {
|
||||||
return
|
batchSize = 64 // Fallback to default if not configured
|
||||||
|
}
|
||||||
|
fwPackets := make([]*firewall.Packet, batchSize)
|
||||||
|
outBuffers := make([][]byte, batchSize)
|
||||||
|
nbBuffers := make([][]byte, batchSize)
|
||||||
|
packets := make([][]byte, batchSize)
|
||||||
|
sizes := make([]int, batchSize)
|
||||||
|
|
||||||
|
for j := 0; j < batchSize; j++ {
|
||||||
|
fwPackets[j] = &firewall.Packet{}
|
||||||
|
outBuffers[j] = make([]byte, mtu)
|
||||||
|
nbBuffers[j] = make([]byte, 12)
|
||||||
|
packets[j] = make([]byte, mtu)
|
||||||
}
|
}
|
||||||
|
|
||||||
for {
|
|
||||||
packets, err := reader.ReadIntoBatch(pool)
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, os.ErrClosed) && f.closed.Load() {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if isVirtioHeadroomError(err) {
|
|
||||||
f.l.WithError(err).Warn("Batch reader fell back due to tun headroom issue")
|
|
||||||
f.listenInLegacyLocked(raw, i)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
f.l.WithError(err).Error("Error while reading outbound packet batch")
|
|
||||||
os.Exit(2)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(packets) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, pkt := range packets {
|
|
||||||
if pkt == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if !f.batches.enqueueRx(i, pkt) {
|
|
||||||
pkt.Release()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) runInsideBatchWorker(i int) {
|
|
||||||
queue := f.batches.rxQueue(i)
|
|
||||||
if queue == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
out := make([]byte, mtu)
|
|
||||||
fwPacket := &firewall.Packet{}
|
|
||||||
nb := make([]byte, 12, 12)
|
|
||||||
conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
|
conntrackCache := firewall.NewConntrackCacheTicker(f.conntrackCacheTimeout)
|
||||||
|
|
||||||
for pkt := range queue {
|
|
||||||
if pkt == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
f.consumeInsidePacket(pkt.Payload(), fwPacket, nb, out, i, conntrackCache.Get(f.l))
|
|
||||||
pkt.Release()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) runSendQueue(i int) {
|
|
||||||
queue := f.batches.txQueue(i)
|
|
||||||
if queue == nil {
|
|
||||||
if f.l.Level >= logrus.DebugLevel {
|
|
||||||
f.l.WithField("queue", i).Debug("tx queue not initialized; batching disabled for writer")
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
writer := f.writerForIndex(i)
|
|
||||||
if writer == nil {
|
|
||||||
if f.l.Level >= logrus.DebugLevel {
|
|
||||||
f.l.WithField("queue", i).Debug("no UDP writer for batch queue")
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if f.l.Level >= logrus.DebugLevel {
|
|
||||||
f.l.WithField("queue", i).Debug("send queue worker started")
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
if f.l.Level >= logrus.WarnLevel {
|
|
||||||
f.l.WithField("queue", i).Warn("send queue worker exited")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
batchCap := f.batches.batchSizeHint()
|
|
||||||
if batchCap <= 0 {
|
|
||||||
batchCap = 1
|
|
||||||
}
|
|
||||||
gsoLimit := f.effectiveGSOMaxSegments()
|
|
||||||
if gsoLimit > batchCap {
|
|
||||||
batchCap = gsoLimit
|
|
||||||
}
|
|
||||||
pending := make([]queuedDatagram, 0, batchCap)
|
|
||||||
var (
|
|
||||||
flushTimer *time.Timer
|
|
||||||
flushC <-chan time.Time
|
|
||||||
)
|
|
||||||
dispatch := func(reason string, timerFired bool) {
|
|
||||||
if len(pending) == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
batch := pending
|
|
||||||
f.flushAndReleaseBatch(i, writer, batch, reason)
|
|
||||||
for idx := range batch {
|
|
||||||
batch[idx] = queuedDatagram{}
|
|
||||||
}
|
|
||||||
pending = pending[:0]
|
|
||||||
if flushTimer != nil {
|
|
||||||
if !timerFired {
|
|
||||||
if !flushTimer.Stop() {
|
|
||||||
select {
|
|
||||||
case <-flushTimer.C:
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
flushTimer = nil
|
|
||||||
flushC = nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
armTimer := func() {
|
|
||||||
delay := f.currentBatchFlushInterval()
|
|
||||||
if delay <= 0 {
|
|
||||||
dispatch("nogso", false)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if flushTimer == nil {
|
|
||||||
flushTimer = time.NewTimer(delay)
|
|
||||||
flushC = flushTimer.C
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
n, err := reader.BatchRead(packets, sizes)
|
||||||
case d := <-queue:
|
if err != nil {
|
||||||
if d.packet == nil {
|
if errors.Is(err, os.ErrClosed) && f.closed.Load() {
|
||||||
continue
|
|
||||||
}
|
|
||||||
if f.l.Level >= logrus.DebugLevel {
|
|
||||||
f.l.WithFields(logrus.Fields{
|
|
||||||
"queue": i,
|
|
||||||
"payload_len": d.packet.Len,
|
|
||||||
"dest": d.addr,
|
|
||||||
}).Debug("send queue received packet")
|
|
||||||
}
|
|
||||||
pending = append(pending, d)
|
|
||||||
if gsoLimit > 0 && len(pending) >= gsoLimit {
|
|
||||||
dispatch("gso", false)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if len(pending) >= cap(pending) {
|
|
||||||
dispatch("cap", false)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
armTimer()
|
|
||||||
f.observeUDPQueueLen(i)
|
|
||||||
case <-flushC:
|
|
||||||
dispatch("timer", true)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) runTunWriteQueue(i int) {
|
|
||||||
queue := f.batches.tunQueue(i)
|
|
||||||
if queue == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
writer := f.batches.inside
|
|
||||||
if writer == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
requiredHeadroom := writer.BatchHeadroom()
|
|
||||||
|
|
||||||
batchCap := f.batches.batchSizeHint()
|
|
||||||
if batchCap <= 0 {
|
|
||||||
batchCap = 1
|
|
||||||
}
|
|
||||||
pending := make([]*overlay.Packet, 0, batchCap)
|
|
||||||
var (
|
|
||||||
flushTimer *time.Timer
|
|
||||||
flushC <-chan time.Time
|
|
||||||
)
|
|
||||||
flush := func(reason string, timerFired bool) {
|
|
||||||
if len(pending) == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
valid := pending[:0]
|
|
||||||
for idx := range pending {
|
|
||||||
if !f.ensurePacketHeadroom(&pending[idx], requiredHeadroom, i, reason) {
|
|
||||||
pending[idx] = nil
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if pending[idx] != nil {
|
|
||||||
valid = append(valid, pending[idx])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(valid) > 0 {
|
|
||||||
if _, err := writer.WriteBatch(valid); err != nil {
|
|
||||||
f.l.WithError(err).
|
|
||||||
WithField("queue", i).
|
|
||||||
WithField("reason", reason).
|
|
||||||
Warn("Failed to write tun batch")
|
|
||||||
for _, pkt := range valid {
|
|
||||||
if pkt != nil {
|
|
||||||
f.writePacketToTun(i, pkt)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pending = pending[:0]
|
|
||||||
if flushTimer != nil {
|
|
||||||
if !timerFired {
|
|
||||||
if !flushTimer.Stop() {
|
|
||||||
select {
|
|
||||||
case <-flushTimer.C:
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
flushTimer = nil
|
|
||||||
flushC = nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
armTimer := func() {
|
|
||||||
delay := f.currentBatchFlushInterval()
|
|
||||||
if delay <= 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if flushTimer == nil {
|
|
||||||
flushTimer = time.NewTimer(delay)
|
|
||||||
flushC = flushTimer.C
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case pkt := <-queue:
|
|
||||||
if pkt == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if f.ensurePacketHeadroom(&pkt, requiredHeadroom, i, "queue") {
|
|
||||||
pending = append(pending, pkt)
|
|
||||||
}
|
|
||||||
if len(pending) >= cap(pending) {
|
|
||||||
flush("cap", false)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
armTimer()
|
|
||||||
f.observeTunQueueLen(i)
|
|
||||||
case <-flushC:
|
|
||||||
flush("timer", true)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) flushAndReleaseBatch(index int, writer udp.Conn, batch []queuedDatagram, reason string) {
|
|
||||||
if len(batch) == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
f.flushDatagrams(index, writer, batch, reason)
|
|
||||||
for idx := range batch {
|
|
||||||
if batch[idx].packet != nil {
|
|
||||||
batch[idx].packet.Release()
|
|
||||||
batch[idx].packet = nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if f.batchUDPFlushCounter != nil {
|
|
||||||
f.batchUDPFlushCounter.Inc(int64(len(batch)))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) flushDatagrams(index int, writer udp.Conn, batch []queuedDatagram, reason string) {
|
|
||||||
if len(batch) == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if f.l.Level >= logrus.DebugLevel {
|
|
||||||
f.l.WithFields(logrus.Fields{
|
|
||||||
"writer": index,
|
|
||||||
"reason": reason,
|
|
||||||
"pending": len(batch),
|
|
||||||
}).Debug("udp batch flush summary")
|
|
||||||
}
|
|
||||||
maxSeg := f.effectiveGSOMaxSegments()
|
|
||||||
if bw, ok := writer.(udp.BatchConn); ok {
|
|
||||||
chunkCap := maxSeg
|
|
||||||
if chunkCap <= 0 {
|
|
||||||
chunkCap = len(batch)
|
|
||||||
}
|
|
||||||
chunk := make([]udp.Datagram, 0, chunkCap)
|
|
||||||
var (
|
|
||||||
currentAddr netip.AddrPort
|
|
||||||
segments int
|
|
||||||
)
|
|
||||||
flushChunk := func() {
|
|
||||||
if len(chunk) == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if f.l.Level >= logrus.DebugLevel {
|
|
||||||
f.l.WithFields(logrus.Fields{
|
|
||||||
"writer": index,
|
|
||||||
"segments": len(chunk),
|
|
||||||
"dest": chunk[0].Addr,
|
|
||||||
"reason": reason,
|
|
||||||
"pending_total": len(batch),
|
|
||||||
}).Debug("flushing UDP batch")
|
|
||||||
}
|
|
||||||
if err := bw.WriteBatch(chunk); err != nil {
|
|
||||||
f.l.WithError(err).
|
|
||||||
WithField("writer", index).
|
|
||||||
WithField("reason", reason).
|
|
||||||
Warn("Failed to write UDP batch")
|
|
||||||
}
|
|
||||||
chunk = chunk[:0]
|
|
||||||
segments = 0
|
|
||||||
}
|
|
||||||
for _, item := range batch {
|
|
||||||
if item.packet == nil || !item.addr.IsValid() {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
payload := item.packet.Payload()[:item.packet.Len]
|
|
||||||
if segments == 0 {
|
|
||||||
currentAddr = item.addr
|
|
||||||
}
|
|
||||||
if item.addr != currentAddr || (maxSeg > 0 && segments >= maxSeg) {
|
|
||||||
flushChunk()
|
|
||||||
currentAddr = item.addr
|
|
||||||
}
|
|
||||||
chunk = append(chunk, udp.Datagram{Payload: payload, Addr: item.addr})
|
|
||||||
segments++
|
|
||||||
}
|
|
||||||
flushChunk()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
for _, item := range batch {
|
|
||||||
if item.packet == nil || !item.addr.IsValid() {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if f.l.Level >= logrus.DebugLevel {
|
|
||||||
f.l.WithFields(logrus.Fields{
|
|
||||||
"writer": index,
|
|
||||||
"reason": reason,
|
|
||||||
"dest": item.addr,
|
|
||||||
"segments": 1,
|
|
||||||
}).Debug("flushing UDP batch")
|
|
||||||
}
|
|
||||||
if err := writer.WriteTo(item.packet.Payload()[:item.packet.Len], item.addr); err != nil {
|
|
||||||
f.l.WithError(err).
|
|
||||||
WithField("writer", index).
|
|
||||||
WithField("udpAddr", item.addr).
|
|
||||||
WithField("reason", reason).
|
|
||||||
Warn("Failed to write UDP packet")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) tryQueueDatagram(q int, buf []byte, addr netip.AddrPort) bool {
|
|
||||||
if !addr.IsValid() || !f.batches.Enabled() {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
pkt := f.batches.newPacket()
|
|
||||||
if pkt == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
payload := pkt.Payload()
|
|
||||||
if len(payload) < len(buf) {
|
|
||||||
pkt.Release()
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
copy(payload, buf)
|
|
||||||
pkt.Len = len(buf)
|
|
||||||
if f.batches.enqueueTx(q, pkt, addr) {
|
|
||||||
f.observeUDPQueueLen(q)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
pkt.Release()
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) writerForIndex(i int) udp.Conn {
|
|
||||||
if i < 0 || i >= len(f.writers) {
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return f.writers[i]
|
|
||||||
|
return fmt.Errorf("error while batch reading outbound packets: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *Interface) writeImmediate(q int, buf []byte, addr netip.AddrPort, hostinfo *HostInfo) {
|
// Process each packet in the batch
|
||||||
writer := f.writerForIndex(q)
|
cache := conntrackCache.Get(f.l)
|
||||||
if writer == nil {
|
for idx := 0; idx < n; idx++ {
|
||||||
f.l.WithField("udpAddr", addr).
|
if sizes[idx] > 0 {
|
||||||
WithField("writer", q).
|
// Use modulo to reuse fw packet state if batch is larger than our pre-allocated state
|
||||||
Error("Failed to write outgoing packet: no writer available")
|
stateIdx := idx % len(fwPackets)
|
||||||
return
|
f.consumeInsidePacket(packets[idx][:sizes[idx]], fwPackets[stateIdx], nbBuffers[stateIdx], outBuffers[stateIdx], i, cache)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if err := writer.WriteTo(buf, addr); err != nil {
|
|
||||||
hostinfo.logger(f.l).
|
|
||||||
WithError(err).
|
|
||||||
WithField("udpAddr", addr).
|
|
||||||
Error("Failed to write outgoing packet")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *Interface) tryQueuePacket(q int, pkt *overlay.Packet, addr netip.AddrPort) bool {
|
// writeTunBatch attempts to write multiple packets to the TUN device using batch operations if supported
|
||||||
if pkt == nil || !addr.IsValid() || !f.batches.Enabled() {
|
func (f *Interface) writeTunBatch(q int, packets [][]byte) error {
|
||||||
return false
|
if len(packets) == 0 {
|
||||||
}
|
|
||||||
if f.batches.enqueueTx(q, pkt, addr) {
|
|
||||||
f.observeUDPQueueLen(q)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) writeImmediatePacket(q int, pkt *overlay.Packet, addr netip.AddrPort, hostinfo *HostInfo) {
|
|
||||||
if pkt == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
writer := f.writerForIndex(q)
|
|
||||||
if writer == nil {
|
|
||||||
f.l.WithField("udpAddr", addr).
|
|
||||||
WithField("writer", q).
|
|
||||||
Error("Failed to write outgoing packet: no writer available")
|
|
||||||
pkt.Release()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if err := writer.WriteTo(pkt.Payload()[:pkt.Len], addr); err != nil {
|
|
||||||
hostinfo.logger(f.l).
|
|
||||||
WithError(err).
|
|
||||||
WithField("udpAddr", addr).
|
|
||||||
Error("Failed to write outgoing packet")
|
|
||||||
}
|
|
||||||
pkt.Release()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) writePacketToTun(q int, pkt *overlay.Packet) {
|
|
||||||
if pkt == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
writer := f.readers[q]
|
|
||||||
if writer == nil {
|
|
||||||
pkt.Release()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if bw, ok := writer.(interface {
|
|
||||||
WriteBatch([]*overlay.Packet) (int, error)
|
|
||||||
}); ok {
|
|
||||||
if _, err := bw.WriteBatch([]*overlay.Packet{pkt}); err != nil {
|
|
||||||
f.l.WithError(err).WithField("queue", q).Warn("Failed to write tun packet via batch writer")
|
|
||||||
pkt.Release()
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if _, err := writer.Write(pkt.Payload()[:pkt.Len]); err != nil {
|
|
||||||
f.l.WithError(err).Error("Failed to write to tun")
|
|
||||||
}
|
|
||||||
pkt.Release()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) clonePacketWithHeadroom(pkt *overlay.Packet, required int) *overlay.Packet {
|
|
||||||
if pkt == nil {
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
payload := pkt.Payload()[:pkt.Len]
|
|
||||||
if len(payload) == 0 && required <= 0 {
|
// Check if the reader/writer supports batch operations
|
||||||
return pkt
|
if batchWriter, ok := f.readers[q].(BatchWriter); ok {
|
||||||
|
_, err := batchWriter.BatchWrite(packets)
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
pool := f.batches.Pool()
|
// Fall back to writing packets individually
|
||||||
if pool != nil {
|
for _, packet := range packets {
|
||||||
if clone := pool.Get(); clone != nil {
|
if _, err := f.readers[q].Write(packet); err != nil {
|
||||||
if len(clone.Payload()) >= len(payload) {
|
return err
|
||||||
clone.Len = copy(clone.Payload(), payload)
|
|
||||||
pkt.Release()
|
|
||||||
return clone
|
|
||||||
}
|
}
|
||||||
clone.Release()
|
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if required < 0 {
|
// writeTun writes a single packet to the TUN device
|
||||||
required = 0
|
func (f *Interface) writeTun(q int, packet []byte) error {
|
||||||
}
|
_, err := f.readers[q].Write(packet)
|
||||||
buf := make([]byte, required+len(payload))
|
return err
|
||||||
n := copy(buf[required:], payload)
|
|
||||||
pkt.Release()
|
|
||||||
return &overlay.Packet{
|
|
||||||
Buf: buf,
|
|
||||||
Offset: required,
|
|
||||||
Len: n,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) observeUDPQueueLen(i int) {
|
|
||||||
if f.batchUDPQueueGauge == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
f.batchUDPQueueGauge.Update(int64(f.batches.txQueueLen(i)))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) observeTunQueueLen(i int) {
|
|
||||||
if f.batchTunQueueGauge == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
f.batchTunQueueGauge.Update(int64(f.batches.tunQueueLen(i)))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) currentBatchFlushInterval() time.Duration {
|
|
||||||
if v := f.batchFlushInterval.Load(); v > 0 {
|
|
||||||
return time.Duration(v)
|
|
||||||
}
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) ensurePacketHeadroom(pkt **overlay.Packet, required int, queue int, reason string) bool {
|
|
||||||
p := *pkt
|
|
||||||
if p == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if required <= 0 || p.Offset >= required {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
clone := f.clonePacketWithHeadroom(p, required)
|
|
||||||
if clone == nil {
|
|
||||||
f.l.WithFields(logrus.Fields{
|
|
||||||
"queue": queue,
|
|
||||||
"reason": reason,
|
|
||||||
}).Warn("dropping packet lacking tun headroom")
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
*pkt = clone
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func isVirtioHeadroomError(err error) bool {
|
|
||||||
if err == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
msg := err.Error()
|
|
||||||
return strings.Contains(msg, "headroom") || strings.Contains(msg, "virtio")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) effectiveGSOMaxSegments() int {
|
|
||||||
max := f.gsoMaxSegments
|
|
||||||
if max <= 0 {
|
|
||||||
max = defaultGSOMaxSegments
|
|
||||||
}
|
|
||||||
if max > maxKernelGSOSegments {
|
|
||||||
max = maxKernelGSOSegments
|
|
||||||
}
|
|
||||||
if !f.enableGSO {
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
return max
|
|
||||||
}
|
|
||||||
|
|
||||||
type udpOffloadConfigurator interface {
|
|
||||||
ConfigureOffload(enableGSO, enableGRO bool, maxSegments int)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Interface) applyOffloadConfig(enableGSO, enableGRO bool, maxSegments int) {
|
|
||||||
if maxSegments <= 0 {
|
|
||||||
maxSegments = defaultGSOMaxSegments
|
|
||||||
}
|
|
||||||
if maxSegments > maxKernelGSOSegments {
|
|
||||||
maxSegments = maxKernelGSOSegments
|
|
||||||
}
|
|
||||||
f.enableGSO = enableGSO
|
|
||||||
f.enableGRO = enableGRO
|
|
||||||
f.gsoMaxSegments = maxSegments
|
|
||||||
for _, writer := range f.writers {
|
|
||||||
if cfg, ok := writer.(udpOffloadConfigurator); ok {
|
|
||||||
cfg.ConfigureOffload(enableGSO, enableGRO, maxSegments)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
|
func (f *Interface) RegisterConfigChangeCallbacks(c *config.C) {
|
||||||
@@ -1062,42 +497,6 @@ func (f *Interface) reloadMisc(c *config.C) {
|
|||||||
f.reQueryWait.Store(int64(n))
|
f.reQueryWait.Store(int64(n))
|
||||||
f.l.Info("timers.requery_wait_duration has changed")
|
f.l.Info("timers.requery_wait_duration has changed")
|
||||||
}
|
}
|
||||||
|
|
||||||
if c.HasChanged("listen.gso_flush_timeout") {
|
|
||||||
d := c.GetDuration("listen.gso_flush_timeout", defaultGSOFlushInterval)
|
|
||||||
if d < 0 {
|
|
||||||
d = 0
|
|
||||||
}
|
|
||||||
f.batchFlushInterval.Store(int64(d))
|
|
||||||
f.l.WithField("duration", d).Info("listen.gso_flush_timeout has changed")
|
|
||||||
} else if c.HasChanged("batch.flush_interval") {
|
|
||||||
d := c.GetDuration("batch.flush_interval", defaultGSOFlushInterval)
|
|
||||||
if d < 0 {
|
|
||||||
d = 0
|
|
||||||
}
|
|
||||||
f.batchFlushInterval.Store(int64(d))
|
|
||||||
f.l.WithField("duration", d).Warn("batch.flush_interval is deprecated; use listen.gso_flush_timeout")
|
|
||||||
}
|
|
||||||
|
|
||||||
if c.HasChanged("batch.queue_depth") {
|
|
||||||
n := c.GetInt("batch.queue_depth", f.batchQueueDepth)
|
|
||||||
if n != f.batchQueueDepth {
|
|
||||||
f.batchQueueDepth = n
|
|
||||||
f.l.Warn("batch.queue_depth changes require a restart to take effect")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if c.HasChanged("listen.enable_gso") || c.HasChanged("listen.enable_gro") || c.HasChanged("listen.gso_max_segments") {
|
|
||||||
enableGSO := c.GetBool("listen.enable_gso", f.enableGSO)
|
|
||||||
enableGRO := c.GetBool("listen.enable_gro", f.enableGRO)
|
|
||||||
maxSeg := c.GetInt("listen.gso_max_segments", f.gsoMaxSegments)
|
|
||||||
f.applyOffloadConfig(enableGSO, enableGRO, maxSeg)
|
|
||||||
f.l.WithFields(logrus.Fields{
|
|
||||||
"enableGSO": enableGSO,
|
|
||||||
"enableGRO": enableGRO,
|
|
||||||
"gsoMaxSegments": maxSeg,
|
|
||||||
}).Info("listen GSO/GRO configuration updated")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
|
func (f *Interface) emitStats(ctx context.Context, i time.Duration) {
|
||||||
|
|||||||
38
main.go
38
main.go
@@ -5,7 +5,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"runtime"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
@@ -144,20 +143,6 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
|
|||||||
// set up our UDP listener
|
// set up our UDP listener
|
||||||
udpConns := make([]udp.Conn, routines)
|
udpConns := make([]udp.Conn, routines)
|
||||||
port := c.GetInt("listen.port", 0)
|
port := c.GetInt("listen.port", 0)
|
||||||
enableGSO := c.GetBool("listen.enable_gso", true)
|
|
||||||
enableGRO := c.GetBool("listen.enable_gro", true)
|
|
||||||
gsoMaxSegments := c.GetInt("listen.gso_max_segments", defaultGSOMaxSegments)
|
|
||||||
if gsoMaxSegments <= 0 {
|
|
||||||
gsoMaxSegments = defaultGSOMaxSegments
|
|
||||||
}
|
|
||||||
if gsoMaxSegments > maxKernelGSOSegments {
|
|
||||||
gsoMaxSegments = maxKernelGSOSegments
|
|
||||||
}
|
|
||||||
gsoFlushTimeout := c.GetDuration("listen.gso_flush_timeout", defaultGSOFlushInterval)
|
|
||||||
if gsoFlushTimeout < 0 {
|
|
||||||
gsoFlushTimeout = 0
|
|
||||||
}
|
|
||||||
batchQueueDepth := c.GetInt("batch.queue_depth", 0)
|
|
||||||
|
|
||||||
if !configTest {
|
if !configTest {
|
||||||
rawListenHost := c.GetString("listen.host", "0.0.0.0")
|
rawListenHost := c.GetString("listen.host", "0.0.0.0")
|
||||||
@@ -177,27 +162,13 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
|
|||||||
listenHost = ips[0].Unmap()
|
listenHost = ips[0].Unmap()
|
||||||
}
|
}
|
||||||
|
|
||||||
useWGDefault := runtime.GOOS == "linux"
|
|
||||||
useWG := c.GetBool("listen.use_wireguard_stack", useWGDefault)
|
|
||||||
var mkListener func(*logrus.Logger, netip.Addr, int, bool, int) (udp.Conn, error)
|
|
||||||
if useWG {
|
|
||||||
mkListener = udp.NewWireguardListener
|
|
||||||
} else {
|
|
||||||
mkListener = udp.NewListener
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < routines; i++ {
|
for i := 0; i < routines; i++ {
|
||||||
l.Infof("listening on %v", netip.AddrPortFrom(listenHost, uint16(port)))
|
l.Infof("listening on %v", netip.AddrPortFrom(listenHost, uint16(port)))
|
||||||
udpServer, err := mkListener(l, listenHost, port, routines > 1, c.GetInt("listen.batch", 64))
|
udpServer, err := udp.NewListener(l, listenHost, port, routines > 1, c.GetInt("listen.batch", 64))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, util.NewContextualError("Failed to open udp listener", m{"queue": i}, err)
|
return nil, util.NewContextualError("Failed to open udp listener", m{"queue": i}, err)
|
||||||
}
|
}
|
||||||
udpServer.ReloadConfig(c)
|
udpServer.ReloadConfig(c)
|
||||||
if cfg, ok := udpServer.(interface {
|
|
||||||
ConfigureOffload(bool, bool, int)
|
|
||||||
}); ok {
|
|
||||||
cfg.ConfigureOffload(enableGSO, enableGRO, gsoMaxSegments)
|
|
||||||
}
|
|
||||||
udpConns[i] = udpServer
|
udpConns[i] = udpServer
|
||||||
|
|
||||||
// If port is dynamic, discover it before the next pass through the for loop
|
// If port is dynamic, discover it before the next pass through the for loop
|
||||||
@@ -265,17 +236,13 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
|
|||||||
reQueryWait: c.GetDuration("timers.requery_wait_duration", defaultReQueryWait),
|
reQueryWait: c.GetDuration("timers.requery_wait_duration", defaultReQueryWait),
|
||||||
DropLocalBroadcast: c.GetBool("tun.drop_local_broadcast", false),
|
DropLocalBroadcast: c.GetBool("tun.drop_local_broadcast", false),
|
||||||
DropMulticast: c.GetBool("tun.drop_multicast", false),
|
DropMulticast: c.GetBool("tun.drop_multicast", false),
|
||||||
EnableGSO: enableGSO,
|
|
||||||
EnableGRO: enableGRO,
|
|
||||||
GSOMaxSegments: gsoMaxSegments,
|
|
||||||
routines: routines,
|
routines: routines,
|
||||||
MessageMetrics: messageMetrics,
|
MessageMetrics: messageMetrics,
|
||||||
version: buildVersion,
|
version: buildVersion,
|
||||||
relayManager: NewRelayManager(ctx, l, hostMap, c),
|
relayManager: NewRelayManager(ctx, l, hostMap, c),
|
||||||
punchy: punchy,
|
punchy: punchy,
|
||||||
ConntrackCacheTimeout: conntrackCacheTimeout,
|
ConntrackCacheTimeout: conntrackCacheTimeout,
|
||||||
BatchFlushInterval: gsoFlushTimeout,
|
batchSize: c.GetInt("tun.batch_size", 64),
|
||||||
BatchQueueDepth: batchQueueDepth,
|
|
||||||
l: l,
|
l: l,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -287,7 +254,6 @@ func Main(c *config.C, configTest bool, buildVersion string, logger *logrus.Logg
|
|||||||
}
|
}
|
||||||
|
|
||||||
ifce.writers = udpConns
|
ifce.writers = udpConns
|
||||||
ifce.applyOffloadConfig(enableGSO, enableGRO, gsoMaxSegments)
|
|
||||||
lightHouse.ifce = ifce
|
lightHouse.ifce = ifce
|
||||||
|
|
||||||
ifce.RegisterConfigChangeCallbacks(c)
|
ifce.RegisterConfigChangeCallbacks(c)
|
||||||
|
|||||||
56
outside.go
56
outside.go
@@ -12,7 +12,6 @@ import (
|
|||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/slackhq/nebula/firewall"
|
"github.com/slackhq/nebula/firewall"
|
||||||
"github.com/slackhq/nebula/header"
|
"github.com/slackhq/nebula/header"
|
||||||
"github.com/slackhq/nebula/overlay"
|
|
||||||
"golang.org/x/net/ipv4"
|
"golang.org/x/net/ipv4"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -20,7 +19,7 @@ const (
|
|||||||
minFwPacketLen = 4
|
minFwPacketLen = 4
|
||||||
)
|
)
|
||||||
|
|
||||||
func (f *Interface) readOutsidePackets(ip netip.AddrPort, via *ViaSender, out []byte, packet []byte, h *header.H, fwPacket *firewall.Packet, lhf *LightHouseHandler, nb []byte, q int, localCache *firewall.ConntrackCache) {
|
func (f *Interface) readOutsidePackets(ip netip.AddrPort, via *ViaSender, out []byte, packet []byte, h *header.H, fwPacket *firewall.Packet, lhf *LightHouseHandler, nb []byte, q int, localCache firewall.ConntrackCache) {
|
||||||
err := h.Parse(packet)
|
err := h.Parse(packet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Hole punch packets are 0 or 1 byte big, so lets ignore printing those errors
|
// Hole punch packets are 0 or 1 byte big, so lets ignore printing those errors
|
||||||
@@ -62,7 +61,7 @@ func (f *Interface) readOutsidePackets(ip netip.AddrPort, via *ViaSender, out []
|
|||||||
|
|
||||||
switch h.Subtype {
|
switch h.Subtype {
|
||||||
case header.MessageNone:
|
case header.MessageNone:
|
||||||
if !f.decryptToTun(hostinfo, h.MessageCounter, out, packet, fwPacket, nb, q, localCache, ip, h.RemoteIndex) {
|
if !f.decryptToTun(hostinfo, h.MessageCounter, out, packet, fwPacket, nb, q, localCache) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
case header.MessageRelay:
|
case header.MessageRelay:
|
||||||
@@ -334,12 +333,13 @@ func parseV6(data []byte, incoming bool, fp *firewall.Packet) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fp.Protocol = uint8(proto)
|
fp.Protocol = uint8(proto)
|
||||||
|
ports := data[offset : offset+4]
|
||||||
if incoming {
|
if incoming {
|
||||||
fp.RemotePort = binary.BigEndian.Uint16(data[offset : offset+2])
|
fp.RemotePort = binary.BigEndian.Uint16(ports[0:2])
|
||||||
fp.LocalPort = binary.BigEndian.Uint16(data[offset+2 : offset+4])
|
fp.LocalPort = binary.BigEndian.Uint16(ports[2:4])
|
||||||
} else {
|
} else {
|
||||||
fp.LocalPort = binary.BigEndian.Uint16(data[offset : offset+2])
|
fp.LocalPort = binary.BigEndian.Uint16(ports[0:2])
|
||||||
fp.RemotePort = binary.BigEndian.Uint16(data[offset+2 : offset+4])
|
fp.RemotePort = binary.BigEndian.Uint16(ports[2:4])
|
||||||
}
|
}
|
||||||
|
|
||||||
fp.Fragment = false
|
fp.Fragment = false
|
||||||
@@ -466,45 +466,23 @@ func (f *Interface) decrypt(hostinfo *HostInfo, mc uint64, out []byte, packet []
|
|||||||
return out, nil
|
return out, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out []byte, packet []byte, fwPacket *firewall.Packet, nb []byte, q int, localCache *firewall.ConntrackCache, addr netip.AddrPort, recvIndex uint32) bool {
|
func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out []byte, packet []byte, fwPacket *firewall.Packet, nb []byte, q int, localCache firewall.ConntrackCache) bool {
|
||||||
var (
|
var err error
|
||||||
err error
|
|
||||||
pkt *overlay.Packet
|
|
||||||
)
|
|
||||||
|
|
||||||
if f.batches.tunQueue(q) != nil {
|
|
||||||
pkt = f.batches.newPacket()
|
|
||||||
if pkt != nil {
|
|
||||||
out = pkt.Payload()[:0]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
out, err = hostinfo.ConnectionState.dKey.DecryptDanger(out, packet[:header.Len], packet[header.Len:], messageCounter, nb)
|
out, err = hostinfo.ConnectionState.dKey.DecryptDanger(out, packet[:header.Len], packet[header.Len:], messageCounter, nb)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if pkt != nil {
|
|
||||||
pkt.Release()
|
|
||||||
}
|
|
||||||
hostinfo.logger(f.l).WithError(err).Error("Failed to decrypt packet")
|
hostinfo.logger(f.l).WithError(err).Error("Failed to decrypt packet")
|
||||||
if addr.IsValid() {
|
|
||||||
f.maybeSendRecvError(addr, recvIndex)
|
|
||||||
}
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
err = newPacket(out, true, fwPacket)
|
err = newPacket(out, true, fwPacket)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if pkt != nil {
|
|
||||||
pkt.Release()
|
|
||||||
}
|
|
||||||
hostinfo.logger(f.l).WithError(err).WithField("packet", out).
|
hostinfo.logger(f.l).WithError(err).WithField("packet", out).
|
||||||
Warnf("Error while validating inbound packet")
|
Warnf("Error while validating inbound packet")
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if !hostinfo.ConnectionState.window.Update(f.l, messageCounter) {
|
if !hostinfo.ConnectionState.window.Update(f.l, messageCounter) {
|
||||||
if pkt != nil {
|
|
||||||
pkt.Release()
|
|
||||||
}
|
|
||||||
hostinfo.logger(f.l).WithField("fwPacket", fwPacket).
|
hostinfo.logger(f.l).WithField("fwPacket", fwPacket).
|
||||||
Debugln("dropping out of window packet")
|
Debugln("dropping out of window packet")
|
||||||
return false
|
return false
|
||||||
@@ -512,9 +490,6 @@ func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out
|
|||||||
|
|
||||||
dropReason := f.firewall.Drop(*fwPacket, true, hostinfo, f.pki.GetCAPool(), localCache)
|
dropReason := f.firewall.Drop(*fwPacket, true, hostinfo, f.pki.GetCAPool(), localCache)
|
||||||
if dropReason != nil {
|
if dropReason != nil {
|
||||||
if pkt != nil {
|
|
||||||
pkt.Release()
|
|
||||||
}
|
|
||||||
// NOTE: We give `packet` as the `out` here since we already decrypted from it and we don't need it anymore
|
// NOTE: We give `packet` as the `out` here since we already decrypted from it and we don't need it anymore
|
||||||
// This gives us a buffer to build the reject packet in
|
// This gives us a buffer to build the reject packet in
|
||||||
f.rejectOutside(out, hostinfo.ConnectionState, hostinfo, nb, packet, q)
|
f.rejectOutside(out, hostinfo.ConnectionState, hostinfo, nb, packet, q)
|
||||||
@@ -527,17 +502,8 @@ func (f *Interface) decryptToTun(hostinfo *HostInfo, messageCounter uint64, out
|
|||||||
}
|
}
|
||||||
|
|
||||||
f.connectionManager.In(hostinfo)
|
f.connectionManager.In(hostinfo)
|
||||||
if pkt != nil {
|
_, err = f.readers[q].Write(out)
|
||||||
pkt.Len = len(out)
|
if err != nil {
|
||||||
if f.batches.enqueueTun(q, pkt) {
|
|
||||||
f.observeTunQueueLen(q)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
f.writePacketToTun(q, pkt)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, err = f.readers[q].Write(out); err != nil {
|
|
||||||
f.l.WithError(err).Error("Failed to write to tun")
|
f.l.WithError(err).Error("Failed to write to tun")
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package overlay
|
|||||||
import (
|
import (
|
||||||
"io"
|
"io"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"sync"
|
|
||||||
|
|
||||||
"github.com/slackhq/nebula/routing"
|
"github.com/slackhq/nebula/routing"
|
||||||
)
|
)
|
||||||
@@ -16,84 +15,3 @@ type Device interface {
|
|||||||
RoutesFor(netip.Addr) routing.Gateways
|
RoutesFor(netip.Addr) routing.Gateways
|
||||||
NewMultiQueueReader() (io.ReadWriteCloser, error)
|
NewMultiQueueReader() (io.ReadWriteCloser, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Packet represents a single packet buffer with optional headroom to carry
|
|
||||||
// metadata (for example virtio-net headers).
|
|
||||||
type Packet struct {
|
|
||||||
Buf []byte
|
|
||||||
Offset int
|
|
||||||
Len int
|
|
||||||
release func()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Packet) Payload() []byte {
|
|
||||||
return p.Buf[p.Offset : p.Offset+p.Len]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Packet) Reset() {
|
|
||||||
p.Len = 0
|
|
||||||
p.Offset = 0
|
|
||||||
p.release = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Packet) Release() {
|
|
||||||
if p.release != nil {
|
|
||||||
p.release()
|
|
||||||
p.release = nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Packet) Capacity() int {
|
|
||||||
return len(p.Buf) - p.Offset
|
|
||||||
}
|
|
||||||
|
|
||||||
// PacketPool manages reusable buffers with headroom.
|
|
||||||
type PacketPool struct {
|
|
||||||
headroom int
|
|
||||||
blksz int
|
|
||||||
pool sync.Pool
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewPacketPool(headroom, payload int) *PacketPool {
|
|
||||||
p := &PacketPool{headroom: headroom, blksz: headroom + payload}
|
|
||||||
p.pool.New = func() any {
|
|
||||||
buf := make([]byte, p.blksz)
|
|
||||||
return &Packet{Buf: buf, Offset: headroom}
|
|
||||||
}
|
|
||||||
return p
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *PacketPool) Get() *Packet {
|
|
||||||
pkt := p.pool.Get().(*Packet)
|
|
||||||
pkt.Offset = p.headroom
|
|
||||||
pkt.Len = 0
|
|
||||||
pkt.release = func() { p.put(pkt) }
|
|
||||||
return pkt
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *PacketPool) put(pkt *Packet) {
|
|
||||||
pkt.Reset()
|
|
||||||
p.pool.Put(pkt)
|
|
||||||
}
|
|
||||||
|
|
||||||
// BatchReader allows reading multiple packets into a shared pool with
|
|
||||||
// preallocated headroom (e.g. virtio-net headers).
|
|
||||||
type BatchReader interface {
|
|
||||||
ReadIntoBatch(pool *PacketPool) ([]*Packet, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
// BatchWriter writes a slice of packets that carry their own metadata.
|
|
||||||
type BatchWriter interface {
|
|
||||||
WriteBatch(packets []*Packet) (int, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
// BatchCapableDevice describes a device that can efficiently read and write
|
|
||||||
// batches of packets with virtio headroom.
|
|
||||||
type BatchCapableDevice interface {
|
|
||||||
Device
|
|
||||||
BatchReader
|
|
||||||
BatchWriter
|
|
||||||
BatchHeadroom() int
|
|
||||||
BatchPayloadCap() int
|
|
||||||
BatchSize() int
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ package overlay
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"net"
|
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -305,29 +304,3 @@ func parseUnsafeRoutes(c *config.C, networks []netip.Prefix) ([]Route, error) {
|
|||||||
|
|
||||||
return routes, nil
|
return routes, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ipWithin(o *net.IPNet, i *net.IPNet) bool {
|
|
||||||
// Make sure o contains the lowest form of i
|
|
||||||
if !o.Contains(i.IP.Mask(i.Mask)) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find the max ip in i
|
|
||||||
ip4 := i.IP.To4()
|
|
||||||
if ip4 == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
last := make(net.IP, len(ip4))
|
|
||||||
copy(last, ip4)
|
|
||||||
for x := range ip4 {
|
|
||||||
last[x] |= ^i.Mask[x]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure o contains the max
|
|
||||||
if !o.Contains(last) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -225,6 +225,7 @@ func Test_parseUnsafeRoutes(t *testing.T) {
|
|||||||
// no mtu
|
// no mtu
|
||||||
c.Settings["tun"] = map[string]any{"unsafe_routes": []any{map[string]any{"via": "127.0.0.1", "route": "1.0.0.0/8"}}}
|
c.Settings["tun"] = map[string]any{"unsafe_routes": []any{map[string]any{"via": "127.0.0.1", "route": "1.0.0.0/8"}}}
|
||||||
routes, err = parseUnsafeRoutes(c, []netip.Prefix{n})
|
routes, err = parseUnsafeRoutes(c, []netip.Prefix{n})
|
||||||
|
require.NoError(t, err)
|
||||||
assert.Len(t, routes, 1)
|
assert.Len(t, routes, 1)
|
||||||
assert.Equal(t, 0, routes[0].MTU)
|
assert.Equal(t, 0, routes[0].MTU)
|
||||||
|
|
||||||
@@ -318,7 +319,7 @@ func Test_makeRouteTree(t *testing.T) {
|
|||||||
|
|
||||||
ip, err = netip.ParseAddr("1.1.0.1")
|
ip, err = netip.ParseAddr("1.1.0.1")
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
r, ok = routeTree.Lookup(ip)
|
_, ok = routeTree.Lookup(ip)
|
||||||
assert.False(t, ok)
|
assert.False(t, ok)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
package overlay
|
package overlay
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"net"
|
|
||||||
"net/netip"
|
"net/netip"
|
||||||
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
@@ -72,51 +70,3 @@ func findRemovedRoutes(newRoutes, oldRoutes []Route) []Route {
|
|||||||
|
|
||||||
return removed
|
return removed
|
||||||
}
|
}
|
||||||
|
|
||||||
func prefixToMask(prefix netip.Prefix) netip.Addr {
|
|
||||||
pLen := 128
|
|
||||||
if prefix.Addr().Is4() {
|
|
||||||
pLen = 32
|
|
||||||
}
|
|
||||||
|
|
||||||
addr, _ := netip.AddrFromSlice(net.CIDRMask(prefix.Bits(), pLen))
|
|
||||||
return addr
|
|
||||||
}
|
|
||||||
|
|
||||||
func flipBytes(b []byte) []byte {
|
|
||||||
for i := 0; i < len(b); i++ {
|
|
||||||
b[i] ^= 0xFF
|
|
||||||
}
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
func orBytes(a []byte, b []byte) []byte {
|
|
||||||
ret := make([]byte, len(a))
|
|
||||||
for i := 0; i < len(a); i++ {
|
|
||||||
ret[i] = a[i] | b[i]
|
|
||||||
}
|
|
||||||
return ret
|
|
||||||
}
|
|
||||||
|
|
||||||
func getBroadcast(cidr netip.Prefix) netip.Addr {
|
|
||||||
broadcast, _ := netip.AddrFromSlice(
|
|
||||||
orBytes(
|
|
||||||
cidr.Addr().AsSlice(),
|
|
||||||
flipBytes(prefixToMask(cidr).AsSlice()),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
return broadcast
|
|
||||||
}
|
|
||||||
|
|
||||||
func selectGateway(dest netip.Prefix, gateways []netip.Prefix) (netip.Prefix, error) {
|
|
||||||
for _, gateway := range gateways {
|
|
||||||
if dest.Addr().Is4() && gateway.Addr().Is4() {
|
|
||||||
return gateway, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if dest.Addr().Is6() && gateway.Addr().Is6() {
|
|
||||||
return gateway, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return netip.Prefix{}, fmt.Errorf("no gateway found for %v in the list of vpn networks", dest)
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//go:build !ios && !e2e_testing
|
//go:build darwin && !ios && !e2e_testing
|
||||||
// +build !ios,!e2e_testing
|
// +build darwin,!ios,!e2e_testing
|
||||||
|
|
||||||
package overlay
|
package overlay
|
||||||
|
|
||||||
@@ -8,48 +8,27 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"os"
|
|
||||||
"sync/atomic"
|
|
||||||
"syscall"
|
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/gaissmai/bart"
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/slackhq/nebula/config"
|
"github.com/slackhq/nebula/config"
|
||||||
"github.com/slackhq/nebula/routing"
|
|
||||||
"github.com/slackhq/nebula/util"
|
"github.com/slackhq/nebula/util"
|
||||||
netroute "golang.org/x/net/route"
|
netroute "golang.org/x/net/route"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
|
wgtun "golang.zx2c4.com/wireguard/tun"
|
||||||
)
|
)
|
||||||
|
|
||||||
type tun struct {
|
type tun struct {
|
||||||
io.ReadWriteCloser
|
|
||||||
Device string
|
|
||||||
vpnNetworks []netip.Prefix
|
|
||||||
DefaultMTU int
|
|
||||||
Routes atomic.Pointer[[]Route]
|
|
||||||
routeTree atomic.Pointer[bart.Table[routing.Gateways]]
|
|
||||||
linkAddr *netroute.LinkAddr
|
linkAddr *netroute.LinkAddr
|
||||||
l *logrus.Logger
|
|
||||||
|
|
||||||
// cache out buffer since we need to prepend 4 bytes for tun metadata
|
|
||||||
out []byte
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ioctl structures for Darwin network configuration
|
||||||
type ifReq struct {
|
type ifReq struct {
|
||||||
Name [unix.IFNAMSIZ]byte
|
Name [unix.IFNAMSIZ]byte
|
||||||
Flags uint16
|
Flags uint16
|
||||||
pad [8]byte
|
pad [8]byte
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
|
||||||
_SIOCAIFADDR_IN6 = 2155899162
|
|
||||||
_UTUN_OPT_IFNAME = 2
|
|
||||||
_IN6_IFF_NODAD = 0x0020
|
|
||||||
_IN6_IFF_SECURED = 0x0400
|
|
||||||
utunControlName = "com.apple.net.utun_control"
|
|
||||||
)
|
|
||||||
|
|
||||||
type ifreqMTU struct {
|
type ifreqMTU struct {
|
||||||
Name [16]byte
|
Name [16]byte
|
||||||
MTU int32
|
MTU int32
|
||||||
@@ -79,60 +58,61 @@ type ifreqAlias6 struct {
|
|||||||
Lifetime addrLifetime
|
Lifetime addrLifetime
|
||||||
}
|
}
|
||||||
|
|
||||||
func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, _ bool) (*tun, error) {
|
const (
|
||||||
|
_SIOCAIFADDR_IN6 = 2155899162
|
||||||
|
_IN6_IFF_NODAD = 0x0020
|
||||||
|
)
|
||||||
|
|
||||||
|
func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ []netip.Prefix) (*wgTun, error) {
|
||||||
|
return nil, fmt.Errorf("newTunFromFd not supported on Darwin")
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, _ bool) (*wgTun, error) {
|
||||||
name := c.GetString("tun.dev", "")
|
name := c.GetString("tun.dev", "")
|
||||||
ifIndex := -1
|
deviceName := "utun"
|
||||||
|
|
||||||
|
// Parse device name to handle utun[0-9]+ format
|
||||||
if name != "" && name != "utun" {
|
if name != "" && name != "utun" {
|
||||||
|
ifIndex := -1
|
||||||
_, err := fmt.Sscanf(name, "utun%d", &ifIndex)
|
_, err := fmt.Sscanf(name, "utun%d", &ifIndex)
|
||||||
if err != nil || ifIndex < 0 {
|
if err != nil || ifIndex < 0 {
|
||||||
// NOTE: we don't make this error so we don't break existing
|
// NOTE: we don't make this error so we don't break existing
|
||||||
// configs that set a name before it was used.
|
// configs that set a name before it was used.
|
||||||
l.Warn("interface name must be utun[0-9]+ on Darwin, ignoring")
|
l.Warn("interface name must be utun[0-9]+ on Darwin, ignoring")
|
||||||
ifIndex = -1
|
} else {
|
||||||
|
deviceName = name
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fd, err := unix.Socket(unix.AF_SYSTEM, unix.SOCK_DGRAM, unix.AF_SYS_CONTROL)
|
mtu := c.GetInt("tun.mtu", DefaultMTU)
|
||||||
|
|
||||||
|
// Create WireGuard TUN device
|
||||||
|
tunDevice, err := wgtun.CreateTUN(deviceName, mtu)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("system socket: %v", err)
|
return nil, fmt.Errorf("failed to create TUN device: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var ctlInfo = &unix.CtlInfo{}
|
// Get the actual device name
|
||||||
copy(ctlInfo.Name[:], utunControlName)
|
actualName, err := tunDevice.Name()
|
||||||
|
|
||||||
err = unix.IoctlCtlInfo(fd, ctlInfo)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("CTLIOCGINFO: %v", err)
|
tunDevice.Close()
|
||||||
|
return nil, fmt.Errorf("failed to get TUN device name: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = unix.Connect(fd, &unix.SockaddrCtl{
|
t := &wgTun{
|
||||||
ID: ctlInfo.Id,
|
tunDevice: tunDevice,
|
||||||
Unit: uint32(ifIndex) + 1,
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("SYS_CONNECT: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
name, err = unix.GetsockoptString(fd, unix.AF_SYS_CONTROL, _UTUN_OPT_IFNAME)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to retrieve tun name: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = unix.SetNonblock(fd, true)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("SetNonblock: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
t := &tun{
|
|
||||||
ReadWriteCloser: os.NewFile(uintptr(fd), ""),
|
|
||||||
Device: name,
|
|
||||||
vpnNetworks: vpnNetworks,
|
vpnNetworks: vpnNetworks,
|
||||||
DefaultMTU: c.GetInt("tun.mtu", DefaultMTU),
|
MaxMTU: mtu,
|
||||||
|
DefaultMTU: mtu,
|
||||||
l: l,
|
l: l,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create Darwin-specific route manager
|
||||||
|
t.routeManager = &tun{}
|
||||||
|
|
||||||
err = t.reload(c, true)
|
err = t.reload(c, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
tunDevice.Close()
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -143,215 +123,251 @@ func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, _ bool) (
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
l.WithField("name", actualName).Info("Created WireGuard TUN device")
|
||||||
|
|
||||||
return t, nil
|
return t, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) deviceBytes() (o [16]byte) {
|
func (rm *tun) Activate(t *wgTun) error {
|
||||||
for i, c := range t.Device {
|
name, err := t.tunDevice.Name()
|
||||||
o[i] = byte(c)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ []netip.Prefix) (*tun, error) {
|
|
||||||
return nil, fmt.Errorf("newTunFromFd not supported in Darwin")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) Close() error {
|
|
||||||
if t.ReadWriteCloser != nil {
|
|
||||||
return t.ReadWriteCloser.Close()
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) Activate() error {
|
|
||||||
devName := t.deviceBytes()
|
|
||||||
|
|
||||||
s, err := unix.Socket(
|
|
||||||
unix.AF_INET,
|
|
||||||
unix.SOCK_DGRAM,
|
|
||||||
unix.IPPROTO_IP,
|
|
||||||
)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get device name: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the MTU
|
||||||
|
rm.SetMTU(t, t.MaxMTU)
|
||||||
|
|
||||||
|
// Add IP addresses
|
||||||
|
for _, network := range t.vpnNetworks {
|
||||||
|
if err := rm.addIP(t, name, network); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer unix.Close(s)
|
|
||||||
|
|
||||||
fd := uintptr(s)
|
|
||||||
|
|
||||||
// Set the MTU on the device
|
|
||||||
ifm := ifreqMTU{Name: devName, MTU: int32(t.DefaultMTU)}
|
|
||||||
if err = ioctl(fd, unix.SIOCSIFMTU, uintptr(unsafe.Pointer(&ifm))); err != nil {
|
|
||||||
return fmt.Errorf("failed to set tun mtu: %v", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the device flags
|
// Bring up the interface using ioctl
|
||||||
ifrf := ifReq{Name: devName}
|
if err := rm.bringUpInterface(name); err != nil {
|
||||||
if err = ioctl(fd, unix.SIOCGIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
|
return fmt.Errorf("failed to bring up interface: %w", err)
|
||||||
return fmt.Errorf("failed to get tun flags: %s", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
linkAddr, err := getLinkAddr(t.Device)
|
// Get the link address for routing
|
||||||
|
linkAddr, err := getLinkAddr(name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return fmt.Errorf("failed to get link address: %w", err)
|
||||||
}
|
}
|
||||||
if linkAddr == nil {
|
if linkAddr == nil {
|
||||||
return fmt.Errorf("unable to discover link_addr for tun interface")
|
return fmt.Errorf("unable to discover link_addr for tun interface")
|
||||||
}
|
}
|
||||||
t.linkAddr = linkAddr
|
rm.linkAddr = linkAddr
|
||||||
|
|
||||||
for _, network := range t.vpnNetworks {
|
// Set the routes
|
||||||
if network.Addr().Is4() {
|
if err := rm.AddRoutes(t, false); err != nil {
|
||||||
err = t.activate4(network)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rm *tun) bringUpInterface(name string) error {
|
||||||
|
// Open a socket for ioctl
|
||||||
|
fd, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create socket: %w", err)
|
||||||
|
}
|
||||||
|
defer unix.Close(fd)
|
||||||
|
|
||||||
|
// Get current flags
|
||||||
|
var ifrf ifReq
|
||||||
|
copy(ifrf.Name[:], name)
|
||||||
|
|
||||||
|
if err := ioctl(uintptr(fd), unix.SIOCGIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
|
||||||
|
return fmt.Errorf("failed to get interface flags: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set IFF_UP and IFF_RUNNING flags
|
||||||
|
ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING
|
||||||
|
|
||||||
|
if err := ioctl(uintptr(fd), unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
|
||||||
|
return fmt.Errorf("failed to set interface flags: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rm *tun) SetMTU(t *wgTun, mtu int) {
|
||||||
|
name, err := t.tunDevice.Name()
|
||||||
|
if err != nil {
|
||||||
|
t.l.WithError(err).Error("Failed to get device name for MTU set")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open a socket for ioctl
|
||||||
|
fd, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0)
|
||||||
|
if err != nil {
|
||||||
|
t.l.WithError(err).Error("Failed to create socket for MTU set")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer unix.Close(fd)
|
||||||
|
|
||||||
|
// Prepare the ioctl request
|
||||||
|
var ifr ifreqMTU
|
||||||
|
copy(ifr.Name[:], name)
|
||||||
|
ifr.MTU = int32(mtu)
|
||||||
|
|
||||||
|
// Set the MTU using ioctl
|
||||||
|
if err := ioctl(uintptr(fd), unix.SIOCSIFMTU, uintptr(unsafe.Pointer(&ifr))); err != nil {
|
||||||
|
t.l.WithError(err).Error("Failed to set tun mtu via ioctl")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rm *tun) SetDefaultRoute(t *wgTun, cidr netip.Prefix) error {
|
||||||
|
// On Darwin, routes are set via ifconfig and route commands
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rm *tun) AddRoutes(t *wgTun, logErrors bool) error {
|
||||||
|
routes := *t.Routes.Load()
|
||||||
|
for _, r := range routes {
|
||||||
|
if !r.Install {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
err := rm.addRoute(r.Cidr)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, unix.EEXIST) {
|
||||||
|
t.l.WithField("route", r.Cidr).
|
||||||
|
Warnf("unable to add unsafe_route, identical route already exists")
|
||||||
|
} else {
|
||||||
|
retErr := util.NewContextualError("Failed to add route", map[string]any{"route": r}, err)
|
||||||
|
if logErrors {
|
||||||
|
retErr.Log(t.l)
|
||||||
|
} else {
|
||||||
|
return retErr
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
err = t.activate6(network)
|
t.l.WithField("route", r).Info("Added route")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rm *tun) RemoveRoutes(t *wgTun, routes []Route) {
|
||||||
|
for _, r := range routes {
|
||||||
|
if !r.Install {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
err := rm.delRoute(r.Cidr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
|
||||||
|
} else {
|
||||||
|
t.l.WithField("route", r).Info("Removed route")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run the interface
|
func (rm *tun) NewMultiQueueReader(t *wgTun) (io.ReadWriteCloser, error) {
|
||||||
ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING
|
// Darwin doesn't support multi-queue TUN devices in the same way as Linux
|
||||||
if err = ioctl(fd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
|
// Return a reader that wraps the same device
|
||||||
return fmt.Errorf("failed to run tun device: %s", err)
|
return &wgTunReader{
|
||||||
|
parent: t,
|
||||||
|
tunDevice: t.tunDevice,
|
||||||
|
offset: 0,
|
||||||
|
l: t.l,
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unsafe path routes
|
func (rm *tun) addIP(t *wgTun, name string, network netip.Prefix) error {
|
||||||
return t.addRoutes(false)
|
addr := network.Addr()
|
||||||
|
|
||||||
|
if addr.Is4() {
|
||||||
|
return rm.addIPv4(name, network)
|
||||||
|
} else {
|
||||||
|
return rm.addIPv6(name, network)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) activate4(network netip.Prefix) error {
|
func (rm *tun) addIPv4(name string, network netip.Prefix) error {
|
||||||
s, err := unix.Socket(
|
// Open an IPv4 socket for ioctl
|
||||||
unix.AF_INET,
|
s, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, unix.IPPROTO_IP)
|
||||||
unix.SOCK_DGRAM,
|
|
||||||
unix.IPPROTO_IP,
|
|
||||||
)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return fmt.Errorf("failed to create IPv4 socket: %w", err)
|
||||||
}
|
}
|
||||||
defer unix.Close(s)
|
defer unix.Close(s)
|
||||||
|
|
||||||
ifr := ifreqAlias4{
|
var ifr ifreqAlias4
|
||||||
Name: t.deviceBytes(),
|
copy(ifr.Name[:], name)
|
||||||
Addr: unix.RawSockaddrInet4{
|
|
||||||
|
// Set the address
|
||||||
|
ifr.Addr = unix.RawSockaddrInet4{
|
||||||
Len: unix.SizeofSockaddrInet4,
|
Len: unix.SizeofSockaddrInet4,
|
||||||
Family: unix.AF_INET,
|
Family: unix.AF_INET,
|
||||||
Addr: network.Addr().As4(),
|
Addr: network.Addr().As4(),
|
||||||
},
|
}
|
||||||
DstAddr: unix.RawSockaddrInet4{
|
|
||||||
|
// Set the destination address (same as address for point-to-point)
|
||||||
|
ifr.DstAddr = unix.RawSockaddrInet4{
|
||||||
Len: unix.SizeofSockaddrInet4,
|
Len: unix.SizeofSockaddrInet4,
|
||||||
Family: unix.AF_INET,
|
Family: unix.AF_INET,
|
||||||
Addr: network.Addr().As4(),
|
Addr: network.Addr().As4(),
|
||||||
},
|
}
|
||||||
MaskAddr: unix.RawSockaddrInet4{
|
|
||||||
|
// Set the netmask
|
||||||
|
ifr.MaskAddr = unix.RawSockaddrInet4{
|
||||||
Len: unix.SizeofSockaddrInet4,
|
Len: unix.SizeofSockaddrInet4,
|
||||||
Family: unix.AF_INET,
|
Family: unix.AF_INET,
|
||||||
Addr: prefixToMask(network).As4(),
|
Addr: prefixToMask(network).As4(),
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := ioctl(uintptr(s), unix.SIOCAIFADDR, uintptr(unsafe.Pointer(&ifr))); err != nil {
|
if err := ioctl(uintptr(s), unix.SIOCAIFADDR, uintptr(unsafe.Pointer(&ifr))); err != nil {
|
||||||
return fmt.Errorf("failed to set tun v4 address: %s", err)
|
return fmt.Errorf("failed to set IPv4 address via ioctl: %w", err)
|
||||||
}
|
|
||||||
|
|
||||||
err = addRoute(network, t.linkAddr)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) activate6(network netip.Prefix) error {
|
func (rm *tun) addIPv6(name string, network netip.Prefix) error {
|
||||||
s, err := unix.Socket(
|
// Open an IPv6 socket for ioctl
|
||||||
unix.AF_INET6,
|
s, err := unix.Socket(unix.AF_INET6, unix.SOCK_DGRAM, unix.IPPROTO_IP)
|
||||||
unix.SOCK_DGRAM,
|
|
||||||
unix.IPPROTO_IP,
|
|
||||||
)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return fmt.Errorf("failed to create IPv6 socket: %w", err)
|
||||||
}
|
}
|
||||||
defer unix.Close(s)
|
defer unix.Close(s)
|
||||||
|
|
||||||
ifr := ifreqAlias6{
|
var ifr ifreqAlias6
|
||||||
Name: t.deviceBytes(),
|
copy(ifr.Name[:], name)
|
||||||
Addr: unix.RawSockaddrInet6{
|
|
||||||
|
// Set the address
|
||||||
|
ifr.Addr = unix.RawSockaddrInet6{
|
||||||
Len: unix.SizeofSockaddrInet6,
|
Len: unix.SizeofSockaddrInet6,
|
||||||
Family: unix.AF_INET6,
|
Family: unix.AF_INET6,
|
||||||
Addr: network.Addr().As16(),
|
Addr: network.Addr().As16(),
|
||||||
},
|
}
|
||||||
PrefixMask: unix.RawSockaddrInet6{
|
|
||||||
|
// Set the prefix mask
|
||||||
|
ifr.PrefixMask = unix.RawSockaddrInet6{
|
||||||
Len: unix.SizeofSockaddrInet6,
|
Len: unix.SizeofSockaddrInet6,
|
||||||
Family: unix.AF_INET6,
|
Family: unix.AF_INET6,
|
||||||
Addr: prefixToMask(network).As16(),
|
Addr: prefixToMask(network).As16(),
|
||||||
},
|
}
|
||||||
Lifetime: addrLifetime{
|
|
||||||
// never expires
|
// Set lifetime (never expires)
|
||||||
|
ifr.Lifetime = addrLifetime{
|
||||||
Vltime: 0xffffffff,
|
Vltime: 0xffffffff,
|
||||||
Pltime: 0xffffffff,
|
Pltime: 0xffffffff,
|
||||||
},
|
|
||||||
Flags: _IN6_IFF_NODAD,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set flags (no DAD - Duplicate Address Detection)
|
||||||
|
ifr.Flags = _IN6_IFF_NODAD
|
||||||
|
|
||||||
if err := ioctl(uintptr(s), _SIOCAIFADDR_IN6, uintptr(unsafe.Pointer(&ifr))); err != nil {
|
if err := ioctl(uintptr(s), _SIOCAIFADDR_IN6, uintptr(unsafe.Pointer(&ifr))); err != nil {
|
||||||
return fmt.Errorf("failed to set tun address: %s", err)
|
return fmt.Errorf("failed to set IPv6 address via ioctl: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) reload(c *config.C, initial bool) error {
|
|
||||||
change, routes, err := getAllRoutesFromConfig(c, t.vpnNetworks, initial)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if !initial && !change {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
routeTree, err := makeRouteTree(t.l, routes, false)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Teach nebula how to handle the routes before establishing them in the system table
|
|
||||||
oldRoutes := t.Routes.Swap(&routes)
|
|
||||||
t.routeTree.Store(routeTree)
|
|
||||||
|
|
||||||
if !initial {
|
|
||||||
// Remove first, if the system removes a wanted route hopefully it will be re-added next
|
|
||||||
err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
|
|
||||||
if err != nil {
|
|
||||||
util.LogWithContextIfNeeded("Failed to remove routes", err, t.l)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure any routes we actually want are installed
|
|
||||||
err = t.addRoutes(true)
|
|
||||||
if err != nil {
|
|
||||||
// Catch any stray logs
|
|
||||||
util.LogWithContextIfNeeded("Failed to add routes", err, t.l)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) RoutesFor(ip netip.Addr) routing.Gateways {
|
|
||||||
r, ok := t.routeTree.Load().Lookup(ip)
|
|
||||||
if ok {
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
return routing.Gateways{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the LinkAddr for the interface of the given name
|
|
||||||
// Is there an easier way to fetch this when we create the interface?
|
|
||||||
// Maybe SIOCGIFINDEX? but this doesn't appear to exist in the darwin headers.
|
|
||||||
func getLinkAddr(name string) (*netroute.LinkAddr, error) {
|
func getLinkAddr(name string) (*netroute.LinkAddr, error) {
|
||||||
rib, err := netroute.FetchRIB(unix.AF_UNSPEC, unix.NET_RT_IFLIST, 0)
|
rib, err := netroute.FetchRIB(unix.AF_UNSPEC, unix.NET_RT_IFLIST, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -377,53 +393,7 @@ func getLinkAddr(name string) (*netroute.LinkAddr, error) {
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) addRoutes(logErrors bool) error {
|
func (rm *tun) addRoute(prefix netip.Prefix) error {
|
||||||
routes := *t.Routes.Load()
|
|
||||||
|
|
||||||
for _, r := range routes {
|
|
||||||
if len(r.Via) == 0 || !r.Install {
|
|
||||||
// We don't allow route MTUs so only install routes with a via
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
err := addRoute(r.Cidr, t.linkAddr)
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, unix.EEXIST) {
|
|
||||||
t.l.WithField("route", r.Cidr).
|
|
||||||
Warnf("unable to add unsafe_route, identical route already exists")
|
|
||||||
} else {
|
|
||||||
retErr := util.NewContextualError("Failed to add route", map[string]any{"route": r}, err)
|
|
||||||
if logErrors {
|
|
||||||
retErr.Log(t.l)
|
|
||||||
} else {
|
|
||||||
return retErr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
t.l.WithField("route", r).Info("Added route")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) removeRoutes(routes []Route) error {
|
|
||||||
for _, r := range routes {
|
|
||||||
if !r.Install {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
err := delRoute(r.Cidr, t.linkAddr)
|
|
||||||
if err != nil {
|
|
||||||
t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
|
|
||||||
} else {
|
|
||||||
t.l.WithField("route", r).Info("Removed route")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func addRoute(prefix netip.Prefix, gateway netroute.Addr) error {
|
|
||||||
sock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC)
|
sock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to create AF_ROUTE socket: %v", err)
|
return fmt.Errorf("unable to create AF_ROUTE socket: %v", err)
|
||||||
@@ -441,13 +411,13 @@ func addRoute(prefix netip.Prefix, gateway netroute.Addr) error {
|
|||||||
route.Addrs = []netroute.Addr{
|
route.Addrs = []netroute.Addr{
|
||||||
unix.RTAX_DST: &netroute.Inet4Addr{IP: prefix.Masked().Addr().As4()},
|
unix.RTAX_DST: &netroute.Inet4Addr{IP: prefix.Masked().Addr().As4()},
|
||||||
unix.RTAX_NETMASK: &netroute.Inet4Addr{IP: prefixToMask(prefix).As4()},
|
unix.RTAX_NETMASK: &netroute.Inet4Addr{IP: prefixToMask(prefix).As4()},
|
||||||
unix.RTAX_GATEWAY: gateway,
|
unix.RTAX_GATEWAY: rm.linkAddr,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
route.Addrs = []netroute.Addr{
|
route.Addrs = []netroute.Addr{
|
||||||
unix.RTAX_DST: &netroute.Inet6Addr{IP: prefix.Masked().Addr().As16()},
|
unix.RTAX_DST: &netroute.Inet6Addr{IP: prefix.Masked().Addr().As16()},
|
||||||
unix.RTAX_NETMASK: &netroute.Inet6Addr{IP: prefixToMask(prefix).As16()},
|
unix.RTAX_NETMASK: &netroute.Inet6Addr{IP: prefixToMask(prefix).As16()},
|
||||||
unix.RTAX_GATEWAY: gateway,
|
unix.RTAX_GATEWAY: rm.linkAddr,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -464,7 +434,7 @@ func addRoute(prefix netip.Prefix, gateway netroute.Addr) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func delRoute(prefix netip.Prefix, gateway netroute.Addr) error {
|
func (rm *tun) delRoute(prefix netip.Prefix) error {
|
||||||
sock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC)
|
sock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to create AF_ROUTE socket: %v", err)
|
return fmt.Errorf("unable to create AF_ROUTE socket: %v", err)
|
||||||
@@ -481,13 +451,13 @@ func delRoute(prefix netip.Prefix, gateway netroute.Addr) error {
|
|||||||
route.Addrs = []netroute.Addr{
|
route.Addrs = []netroute.Addr{
|
||||||
unix.RTAX_DST: &netroute.Inet4Addr{IP: prefix.Masked().Addr().As4()},
|
unix.RTAX_DST: &netroute.Inet4Addr{IP: prefix.Masked().Addr().As4()},
|
||||||
unix.RTAX_NETMASK: &netroute.Inet4Addr{IP: prefixToMask(prefix).As4()},
|
unix.RTAX_NETMASK: &netroute.Inet4Addr{IP: prefixToMask(prefix).As4()},
|
||||||
unix.RTAX_GATEWAY: gateway,
|
unix.RTAX_GATEWAY: rm.linkAddr,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
route.Addrs = []netroute.Addr{
|
route.Addrs = []netroute.Addr{
|
||||||
unix.RTAX_DST: &netroute.Inet6Addr{IP: prefix.Masked().Addr().As16()},
|
unix.RTAX_DST: &netroute.Inet6Addr{IP: prefix.Masked().Addr().As16()},
|
||||||
unix.RTAX_NETMASK: &netroute.Inet6Addr{IP: prefixToMask(prefix).As16()},
|
unix.RTAX_NETMASK: &netroute.Inet6Addr{IP: prefixToMask(prefix).As16()},
|
||||||
unix.RTAX_GATEWAY: gateway,
|
unix.RTAX_GATEWAY: rm.linkAddr,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -495,6 +465,7 @@ func delRoute(prefix netip.Prefix, gateway netroute.Addr) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to create route.RouteMessage: %w", err)
|
return fmt.Errorf("failed to create route.RouteMessage: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = unix.Write(sock, data[:])
|
_, err = unix.Write(sock, data[:])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to write route.RouteMessage to socket: %w", err)
|
return fmt.Errorf("failed to write route.RouteMessage to socket: %w", err)
|
||||||
@@ -503,52 +474,34 @@ func delRoute(prefix netip.Prefix, gateway netroute.Addr) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) Read(to []byte) (int, error) {
|
func ioctl(a1, a2, a3 uintptr) error {
|
||||||
buf := make([]byte, len(to)+4)
|
_, _, errno := unix.Syscall(unix.SYS_IOCTL, a1, a2, a3)
|
||||||
|
if errno != 0 {
|
||||||
n, err := t.ReadWriteCloser.Read(buf)
|
return errno
|
||||||
|
}
|
||||||
copy(to, buf[4:])
|
return nil
|
||||||
return n - 4, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write is only valid for single threaded use
|
func prefixToMask(prefix netip.Prefix) netip.Addr {
|
||||||
func (t *tun) Write(from []byte) (int, error) {
|
bits := prefix.Bits()
|
||||||
buf := t.out
|
if prefix.Addr().Is4() {
|
||||||
if cap(buf) < len(from)+4 {
|
// Create IPv4 netmask from prefix length
|
||||||
buf = make([]byte, len(from)+4)
|
mask := ^uint32(0) << (32 - bits)
|
||||||
t.out = buf
|
return netip.AddrFrom4([4]byte{
|
||||||
}
|
byte(mask >> 24),
|
||||||
buf = buf[:len(from)+4]
|
byte(mask >> 16),
|
||||||
|
byte(mask >> 8),
|
||||||
if len(from) == 0 {
|
byte(mask),
|
||||||
return 0, syscall.EIO
|
})
|
||||||
}
|
|
||||||
|
|
||||||
// Determine the IP Family for the NULL L2 Header
|
|
||||||
ipVer := from[0] >> 4
|
|
||||||
if ipVer == 4 {
|
|
||||||
buf[3] = syscall.AF_INET
|
|
||||||
} else if ipVer == 6 {
|
|
||||||
buf[3] = syscall.AF_INET6
|
|
||||||
} else {
|
} else {
|
||||||
return 0, fmt.Errorf("unable to determine IP version from packet")
|
// Create IPv6 netmask from prefix length
|
||||||
|
var mask [16]byte
|
||||||
|
for i := 0; i < bits/8; i++ {
|
||||||
|
mask[i] = 0xff
|
||||||
}
|
}
|
||||||
|
if bits%8 != 0 {
|
||||||
copy(buf[4:], from)
|
mask[bits/8] = ^byte(0) << (8 - bits%8)
|
||||||
|
|
||||||
n, err := t.ReadWriteCloser.Write(buf)
|
|
||||||
return n - 4, err
|
|
||||||
}
|
}
|
||||||
|
return netip.AddrFrom16(mask)
|
||||||
func (t *tun) Networks() []netip.Prefix {
|
|
||||||
return t.vpnNetworks
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) Name() string {
|
|
||||||
return t.Device
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
|
|
||||||
return nil, fmt.Errorf("TODO: multiqueue not implemented for darwin")
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,284 +1,77 @@
|
|||||||
//go:build !e2e_testing
|
//go:build freebsd && !e2e_testing
|
||||||
// +build !e2e_testing
|
// +build freebsd,!e2e_testing
|
||||||
|
|
||||||
package overlay
|
package overlay
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/fs"
|
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"sync/atomic"
|
"os/exec"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/gaissmai/bart"
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/slackhq/nebula/config"
|
"github.com/slackhq/nebula/config"
|
||||||
"github.com/slackhq/nebula/routing"
|
|
||||||
"github.com/slackhq/nebula/util"
|
"github.com/slackhq/nebula/util"
|
||||||
netroute "golang.org/x/net/route"
|
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
|
wgtun "golang.zx2c4.com/wireguard/tun"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
type tun struct{}
|
||||||
// FIODGNAME is defined in sys/sys/filio.h on FreeBSD
|
|
||||||
// For 32-bit systems, use FIODGNAME_32 (not defined in this file: 0x80086678)
|
|
||||||
FIODGNAME = 0x80106678
|
|
||||||
TUNSIFMODE = 0x8004745e
|
|
||||||
TUNSIFHEAD = 0x80047460
|
|
||||||
OSIOCAIFADDR_IN6 = 0x8088691b
|
|
||||||
IN6_IFF_NODAD = 0x0020
|
|
||||||
)
|
|
||||||
|
|
||||||
type fiodgnameArg struct {
|
|
||||||
length int32
|
|
||||||
pad [4]byte
|
|
||||||
buf unsafe.Pointer
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// ifreqRename is used for renaming network interfaces on FreeBSD
|
||||||
type ifreqRename struct {
|
type ifreqRename struct {
|
||||||
Name [unix.IFNAMSIZ]byte
|
Name [unix.IFNAMSIZ]byte
|
||||||
Data uintptr
|
Data uintptr
|
||||||
}
|
}
|
||||||
|
|
||||||
type ifreqDestroy struct {
|
func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ []netip.Prefix) (*wgTun, error) {
|
||||||
Name [unix.IFNAMSIZ]byte
|
return nil, fmt.Errorf("newTunFromFd not supported on FreeBSD")
|
||||||
pad [16]byte
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type ifReq struct {
|
func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, _ bool) (*wgTun, error) {
|
||||||
Name [unix.IFNAMSIZ]byte
|
deviceName := c.GetString("tun.dev", "tun")
|
||||||
Flags uint16
|
mtu := c.GetInt("tun.mtu", DefaultMTU)
|
||||||
}
|
|
||||||
|
|
||||||
type ifreqMTU struct {
|
// Create WireGuard TUN device
|
||||||
Name [unix.IFNAMSIZ]byte
|
tunDevice, err := wgtun.CreateTUN(deviceName, mtu)
|
||||||
MTU int32
|
|
||||||
}
|
|
||||||
|
|
||||||
type addrLifetime struct {
|
|
||||||
Expire uint64
|
|
||||||
Preferred uint64
|
|
||||||
Vltime uint32
|
|
||||||
Pltime uint32
|
|
||||||
}
|
|
||||||
|
|
||||||
type ifreqAlias4 struct {
|
|
||||||
Name [unix.IFNAMSIZ]byte
|
|
||||||
Addr unix.RawSockaddrInet4
|
|
||||||
DstAddr unix.RawSockaddrInet4
|
|
||||||
MaskAddr unix.RawSockaddrInet4
|
|
||||||
VHid uint32
|
|
||||||
}
|
|
||||||
|
|
||||||
type ifreqAlias6 struct {
|
|
||||||
Name [unix.IFNAMSIZ]byte
|
|
||||||
Addr unix.RawSockaddrInet6
|
|
||||||
DstAddr unix.RawSockaddrInet6
|
|
||||||
PrefixMask unix.RawSockaddrInet6
|
|
||||||
Flags uint32
|
|
||||||
Lifetime addrLifetime
|
|
||||||
VHid uint32
|
|
||||||
}
|
|
||||||
|
|
||||||
type tun struct {
|
|
||||||
Device string
|
|
||||||
vpnNetworks []netip.Prefix
|
|
||||||
MTU int
|
|
||||||
Routes atomic.Pointer[[]Route]
|
|
||||||
routeTree atomic.Pointer[bart.Table[routing.Gateways]]
|
|
||||||
linkAddr *netroute.LinkAddr
|
|
||||||
l *logrus.Logger
|
|
||||||
devFd int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) Read(to []byte) (int, error) {
|
|
||||||
// use readv() to read from the tunnel device, to eliminate the need for copying the buffer
|
|
||||||
if t.devFd < 0 {
|
|
||||||
return -1, syscall.EINVAL
|
|
||||||
}
|
|
||||||
|
|
||||||
// first 4 bytes is protocol family, in network byte order
|
|
||||||
head := make([]byte, 4)
|
|
||||||
|
|
||||||
iovecs := []syscall.Iovec{
|
|
||||||
{&head[0], 4},
|
|
||||||
{&to[0], uint64(len(to))},
|
|
||||||
}
|
|
||||||
|
|
||||||
n, _, errno := syscall.Syscall(syscall.SYS_READV, uintptr(t.devFd), uintptr(unsafe.Pointer(&iovecs[0])), uintptr(2))
|
|
||||||
|
|
||||||
var err error
|
|
||||||
if errno != 0 {
|
|
||||||
err = syscall.Errno(errno)
|
|
||||||
} else {
|
|
||||||
err = nil
|
|
||||||
}
|
|
||||||
// fix bytes read number to exclude header
|
|
||||||
bytesRead := int(n)
|
|
||||||
if bytesRead < 0 {
|
|
||||||
return bytesRead, err
|
|
||||||
} else if bytesRead < 4 {
|
|
||||||
return 0, err
|
|
||||||
} else {
|
|
||||||
return bytesRead - 4, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write is only valid for single threaded use
|
|
||||||
func (t *tun) Write(from []byte) (int, error) {
|
|
||||||
// use writev() to write to the tunnel device, to eliminate the need for copying the buffer
|
|
||||||
if t.devFd < 0 {
|
|
||||||
return -1, syscall.EINVAL
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(from) <= 1 {
|
|
||||||
return 0, syscall.EIO
|
|
||||||
}
|
|
||||||
ipVer := from[0] >> 4
|
|
||||||
var head []byte
|
|
||||||
// first 4 bytes is protocol family, in network byte order
|
|
||||||
if ipVer == 4 {
|
|
||||||
head = []byte{0, 0, 0, syscall.AF_INET}
|
|
||||||
} else if ipVer == 6 {
|
|
||||||
head = []byte{0, 0, 0, syscall.AF_INET6}
|
|
||||||
} else {
|
|
||||||
return 0, fmt.Errorf("unable to determine IP version from packet")
|
|
||||||
}
|
|
||||||
iovecs := []syscall.Iovec{
|
|
||||||
{&head[0], 4},
|
|
||||||
{&from[0], uint64(len(from))},
|
|
||||||
}
|
|
||||||
|
|
||||||
n, _, errno := syscall.Syscall(syscall.SYS_WRITEV, uintptr(t.devFd), uintptr(unsafe.Pointer(&iovecs[0])), uintptr(2))
|
|
||||||
|
|
||||||
var err error
|
|
||||||
if errno != 0 {
|
|
||||||
err = syscall.Errno(errno)
|
|
||||||
} else {
|
|
||||||
err = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return int(n) - 4, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) Close() error {
|
|
||||||
if t.devFd >= 0 {
|
|
||||||
err := syscall.Close(t.devFd)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.l.WithError(err).Error("Error closing device")
|
return nil, fmt.Errorf("failed to create TUN device: %w", err)
|
||||||
}
|
}
|
||||||
t.devFd = -1
|
|
||||||
|
|
||||||
c := make(chan struct{})
|
// Get the actual device name
|
||||||
go func() {
|
actualName, err := tunDevice.Name()
|
||||||
// destroying the interface can block if a read() is still pending. Do this asynchronously.
|
|
||||||
defer close(c)
|
|
||||||
s, err := syscall.Socket(syscall.AF_INET, syscall.SOCK_DGRAM, syscall.IPPROTO_IP)
|
|
||||||
if err == nil {
|
|
||||||
defer syscall.Close(s)
|
|
||||||
ifreq := ifreqDestroy{Name: t.deviceBytes()}
|
|
||||||
err = ioctl(uintptr(s), syscall.SIOCIFDESTROY, uintptr(unsafe.Pointer(&ifreq)))
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.l.WithError(err).Error("Error destroying tunnel")
|
tunDevice.Close()
|
||||||
}
|
return nil, fmt.Errorf("failed to get TUN device name: %w", err)
|
||||||
}()
|
|
||||||
|
|
||||||
// wait up to 1 second so we start blocking at the ioctl
|
|
||||||
select {
|
|
||||||
case <-c:
|
|
||||||
case <-time.After(1 * time.Second):
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ []netip.Prefix) (*tun, error) {
|
|
||||||
return nil, fmt.Errorf("newTunFromFd not supported in FreeBSD")
|
|
||||||
}
|
|
||||||
|
|
||||||
func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, _ bool) (*tun, error) {
|
|
||||||
// Try to open existing tun device
|
|
||||||
var fd int
|
|
||||||
var err error
|
|
||||||
deviceName := c.GetString("tun.dev", "")
|
|
||||||
if deviceName != "" {
|
|
||||||
fd, err = syscall.Open("/dev/"+deviceName, syscall.O_RDWR, 0)
|
|
||||||
}
|
|
||||||
if errors.Is(err, fs.ErrNotExist) || deviceName == "" {
|
|
||||||
// If the device doesn't already exist, request a new one and rename it
|
|
||||||
fd, err = syscall.Open("/dev/tun", syscall.O_RDWR, 0)
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read the name of the interface
|
|
||||||
var name [16]byte
|
|
||||||
arg := fiodgnameArg{length: 16, buf: unsafe.Pointer(&name)}
|
|
||||||
ctrlErr := ioctl(uintptr(fd), FIODGNAME, uintptr(unsafe.Pointer(&arg)))
|
|
||||||
|
|
||||||
if ctrlErr == nil {
|
|
||||||
// set broadcast mode and multicast
|
|
||||||
ifmode := uint32(unix.IFF_BROADCAST | unix.IFF_MULTICAST)
|
|
||||||
ctrlErr = ioctl(uintptr(fd), TUNSIFMODE, uintptr(unsafe.Pointer(&ifmode)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if ctrlErr == nil {
|
|
||||||
// turn on link-layer mode, to support ipv6
|
|
||||||
ifhead := uint32(1)
|
|
||||||
ctrlErr = ioctl(uintptr(fd), TUNSIFHEAD, uintptr(unsafe.Pointer(&ifhead)))
|
|
||||||
}
|
|
||||||
|
|
||||||
if ctrlErr != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
ifName := string(bytes.TrimRight(name[:], "\x00"))
|
|
||||||
if deviceName == "" {
|
|
||||||
deviceName = ifName
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the name doesn't match the desired interface name, rename it now
|
// If the name doesn't match the desired interface name, rename it now
|
||||||
if ifName != deviceName {
|
if actualName != deviceName && deviceName != "" && deviceName != "tun" {
|
||||||
s, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, unix.IPPROTO_IP)
|
if err := renameInterface(actualName, deviceName); err != nil {
|
||||||
if err != nil {
|
tunDevice.Close()
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to rename interface from %s to %s: %w", actualName, deviceName, err)
|
||||||
}
|
}
|
||||||
defer syscall.Close(s)
|
actualName = deviceName
|
||||||
|
|
||||||
fd := uintptr(s)
|
|
||||||
|
|
||||||
var fromName [16]byte
|
|
||||||
var toName [16]byte
|
|
||||||
copy(fromName[:], ifName)
|
|
||||||
copy(toName[:], deviceName)
|
|
||||||
|
|
||||||
ifrr := ifreqRename{
|
|
||||||
Name: fromName,
|
|
||||||
Data: uintptr(unsafe.Pointer(&toName)),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the device name
|
t := &wgTun{
|
||||||
ioctl(fd, syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&ifrr)))
|
tunDevice: tunDevice,
|
||||||
}
|
|
||||||
|
|
||||||
t := &tun{
|
|
||||||
Device: deviceName,
|
|
||||||
vpnNetworks: vpnNetworks,
|
vpnNetworks: vpnNetworks,
|
||||||
MTU: c.GetInt("tun.mtu", DefaultMTU),
|
MaxMTU: mtu,
|
||||||
|
DefaultMTU: mtu,
|
||||||
l: l,
|
l: l,
|
||||||
devFd: fd,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create FreeBSD-specific route manager
|
||||||
|
t.routeManager = &tun{}
|
||||||
|
|
||||||
err = t.reload(c, true)
|
err = t.reload(c, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
tunDevice.Close()
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -289,180 +82,86 @@ func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, _ bool) (
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
l.WithField("name", actualName).Info("Created WireGuard TUN device")
|
||||||
|
|
||||||
return t, nil
|
return t, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) addIp(cidr netip.Prefix) error {
|
func (rm *tun) Activate(t *wgTun) error {
|
||||||
if cidr.Addr().Is4() {
|
name, err := t.tunDevice.Name()
|
||||||
ifr := ifreqAlias4{
|
|
||||||
Name: t.deviceBytes(),
|
|
||||||
Addr: unix.RawSockaddrInet4{
|
|
||||||
Len: unix.SizeofSockaddrInet4,
|
|
||||||
Family: unix.AF_INET,
|
|
||||||
Addr: cidr.Addr().As4(),
|
|
||||||
},
|
|
||||||
DstAddr: unix.RawSockaddrInet4{
|
|
||||||
Len: unix.SizeofSockaddrInet4,
|
|
||||||
Family: unix.AF_INET,
|
|
||||||
Addr: getBroadcast(cidr).As4(),
|
|
||||||
},
|
|
||||||
MaskAddr: unix.RawSockaddrInet4{
|
|
||||||
Len: unix.SizeofSockaddrInet4,
|
|
||||||
Family: unix.AF_INET,
|
|
||||||
Addr: prefixToMask(cidr).As4(),
|
|
||||||
},
|
|
||||||
VHid: 0,
|
|
||||||
}
|
|
||||||
s, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, unix.IPPROTO_IP)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return fmt.Errorf("failed to get device name: %w", err)
|
||||||
}
|
|
||||||
defer syscall.Close(s)
|
|
||||||
// Note: unix.SIOCAIFADDR corresponds to FreeBSD's OSIOCAIFADDR
|
|
||||||
if err := ioctl(uintptr(s), unix.SIOCAIFADDR, uintptr(unsafe.Pointer(&ifr))); err != nil {
|
|
||||||
return fmt.Errorf("failed to set tun address %s: %s", cidr.Addr().String(), err)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if cidr.Addr().Is6() {
|
// Set the MTU
|
||||||
ifr := ifreqAlias6{
|
rm.SetMTU(t, t.MaxMTU)
|
||||||
Name: t.deviceBytes(),
|
|
||||||
Addr: unix.RawSockaddrInet6{
|
|
||||||
Len: unix.SizeofSockaddrInet6,
|
|
||||||
Family: unix.AF_INET6,
|
|
||||||
Addr: cidr.Addr().As16(),
|
|
||||||
},
|
|
||||||
PrefixMask: unix.RawSockaddrInet6{
|
|
||||||
Len: unix.SizeofSockaddrInet6,
|
|
||||||
Family: unix.AF_INET6,
|
|
||||||
Addr: prefixToMask(cidr).As16(),
|
|
||||||
},
|
|
||||||
Lifetime: addrLifetime{
|
|
||||||
Expire: 0,
|
|
||||||
Preferred: 0,
|
|
||||||
Vltime: 0xffffffff,
|
|
||||||
Pltime: 0xffffffff,
|
|
||||||
},
|
|
||||||
Flags: IN6_IFF_NODAD,
|
|
||||||
}
|
|
||||||
s, err := syscall.Socket(syscall.AF_INET6, syscall.SOCK_DGRAM, syscall.IPPROTO_IP)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer syscall.Close(s)
|
|
||||||
|
|
||||||
if err := ioctl(uintptr(s), OSIOCAIFADDR_IN6, uintptr(unsafe.Pointer(&ifr))); err != nil {
|
// Add IP addresses
|
||||||
return fmt.Errorf("failed to set tun address %s: %s", cidr.Addr().String(), err)
|
for _, network := range t.vpnNetworks {
|
||||||
}
|
if err := rm.addIP(t, name, network); err != nil {
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return fmt.Errorf("unknown address type %v", cidr)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) Activate() error {
|
|
||||||
// Setup our default MTU
|
|
||||||
err := t.setMTU()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
linkAddr, err := getLinkAddr(t.Device)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if linkAddr == nil {
|
|
||||||
return fmt.Errorf("unable to discover link_addr for tun interface")
|
|
||||||
}
|
|
||||||
t.linkAddr = linkAddr
|
|
||||||
|
|
||||||
for i := range t.vpnNetworks {
|
|
||||||
err := t.addIp(t.vpnNetworks[i])
|
|
||||||
if err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return t.addRoutes(false)
|
// Bring up the interface
|
||||||
|
if err := runCommandBSD("ifconfig", name, "up"); err != nil {
|
||||||
|
return fmt.Errorf("failed to bring up interface: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) setMTU() error {
|
// Set the routes
|
||||||
// Set the MTU on the device
|
if err := rm.AddRoutes(t, false); err != nil {
|
||||||
s, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, unix.IPPROTO_IP)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer syscall.Close(s)
|
|
||||||
|
|
||||||
ifm := ifreqMTU{Name: t.deviceBytes(), MTU: int32(t.MTU)}
|
|
||||||
err = ioctl(uintptr(s), unix.SIOCSIFMTU, uintptr(unsafe.Pointer(&ifm)))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) reload(c *config.C, initial bool) error {
|
|
||||||
change, routes, err := getAllRoutesFromConfig(c, t.vpnNetworks, initial)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if !initial && !change {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
routeTree, err := makeRouteTree(t.l, routes, false)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Teach nebula how to handle the routes before establishing them in the system table
|
|
||||||
oldRoutes := t.Routes.Swap(&routes)
|
|
||||||
t.routeTree.Store(routeTree)
|
|
||||||
|
|
||||||
if !initial {
|
|
||||||
// Remove first, if the system removes a wanted route hopefully it will be re-added next
|
|
||||||
err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
|
|
||||||
if err != nil {
|
|
||||||
util.LogWithContextIfNeeded("Failed to remove routes", err, t.l)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure any routes we actually want are installed
|
|
||||||
err = t.addRoutes(true)
|
|
||||||
if err != nil {
|
|
||||||
// Catch any stray logs
|
|
||||||
util.LogWithContextIfNeeded("Failed to add routes", err, t.l)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) RoutesFor(ip netip.Addr) routing.Gateways {
|
func (rm *tun) SetMTU(t *wgTun, mtu int) {
|
||||||
r, _ := t.routeTree.Load().Lookup(ip)
|
name, err := t.tunDevice.Name()
|
||||||
return r
|
if err != nil {
|
||||||
|
t.l.WithError(err).Error("Failed to get device name for MTU set")
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) Networks() []netip.Prefix {
|
if err := runCommandBSD("ifconfig", name, "mtu", strconv.Itoa(mtu)); err != nil {
|
||||||
return t.vpnNetworks
|
t.l.WithError(err).Error("Failed to set tun mtu")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) Name() string {
|
func (rm *tun) SetDefaultRoute(t *wgTun, cidr netip.Prefix) error {
|
||||||
return t.Device
|
// On FreeBSD, routes are set via ifconfig and route commands
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
|
func (rm *tun) AddRoutes(t *wgTun, logErrors bool) error {
|
||||||
return nil, fmt.Errorf("TODO: multiqueue not implemented for freebsd")
|
name, err := t.tunDevice.Name()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get device name: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) addRoutes(logErrors bool) error {
|
|
||||||
routes := *t.Routes.Load()
|
routes := *t.Routes.Load()
|
||||||
for _, r := range routes {
|
for _, r := range routes {
|
||||||
if len(r.Via) == 0 || !r.Install {
|
if !r.Install {
|
||||||
// We don't allow route MTUs so only install routes with a via
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
err := addRoute(r.Cidr, t.linkAddr)
|
// Add route using route command
|
||||||
|
args := []string{"add"}
|
||||||
|
|
||||||
|
if r.Cidr.Addr().Is6() {
|
||||||
|
args = append(args, "-inet6")
|
||||||
|
} else {
|
||||||
|
args = append(args, "-inet")
|
||||||
|
}
|
||||||
|
|
||||||
|
args = append(args, r.Cidr.String(), "-interface", name)
|
||||||
|
|
||||||
|
if r.Metric > 0 {
|
||||||
|
// FreeBSD doesn't support route metrics directly like Linux
|
||||||
|
t.l.WithField("route", r).Warn("Route metrics are not fully supported on FreeBSD")
|
||||||
|
}
|
||||||
|
|
||||||
|
err := runCommandBSD("route", args...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
retErr := util.NewContextualError("Failed to add route", map[string]any{"route": r}, err)
|
retErr := util.NewContextualError("Failed to add route", map[string]any{"route": r}, err)
|
||||||
if logErrors {
|
if logErrors {
|
||||||
@@ -478,142 +177,99 @@ func (t *tun) addRoutes(logErrors bool) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) removeRoutes(routes []Route) error {
|
func (rm *tun) RemoveRoutes(t *wgTun, routes []Route) {
|
||||||
|
name, err := t.tunDevice.Name()
|
||||||
|
if err != nil {
|
||||||
|
t.l.WithError(err).Error("Failed to get device name for route removal")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
for _, r := range routes {
|
for _, r := range routes {
|
||||||
if !r.Install {
|
if !r.Install {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
err := delRoute(r.Cidr, t.linkAddr)
|
args := []string{"delete"}
|
||||||
|
|
||||||
|
if r.Cidr.Addr().Is6() {
|
||||||
|
args = append(args, "-inet6")
|
||||||
|
} else {
|
||||||
|
args = append(args, "-inet")
|
||||||
|
}
|
||||||
|
|
||||||
|
args = append(args, r.Cidr.String(), "-interface", name)
|
||||||
|
|
||||||
|
err := runCommandBSD("route", args...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
|
t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
|
||||||
} else {
|
} else {
|
||||||
t.l.WithField("route", r).Info("Removed route")
|
t.l.WithField("route", r).Info("Removed route")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) deviceBytes() (o [16]byte) {
|
func (rm *tun) NewMultiQueueReader(t *wgTun) (io.ReadWriteCloser, error) {
|
||||||
for i, c := range t.Device {
|
// FreeBSD doesn't support multi-queue TUN devices in the same way as Linux
|
||||||
o[i] = byte(c)
|
// Return a reader that wraps the same device
|
||||||
}
|
return &wgTunReader{
|
||||||
return
|
parent: t,
|
||||||
|
tunDevice: t.tunDevice,
|
||||||
|
offset: 0,
|
||||||
|
l: t.l,
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func addRoute(prefix netip.Prefix, gateway netroute.Addr) error {
|
func (rm *tun) addIP(t *wgTun, name string, network netip.Prefix) error {
|
||||||
sock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC)
|
addr := network.Addr()
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("unable to create AF_ROUTE socket: %v", err)
|
|
||||||
}
|
|
||||||
defer unix.Close(sock)
|
|
||||||
|
|
||||||
route := &netroute.RouteMessage{
|
if addr.Is4() {
|
||||||
Version: unix.RTM_VERSION,
|
// For IPv4: ifconfig tun0 10.0.0.1/24
|
||||||
Type: unix.RTM_ADD,
|
if err := runCommandBSD("ifconfig", name, network.String()); err != nil {
|
||||||
Flags: unix.RTF_UP,
|
return fmt.Errorf("failed to add IPv4 address: %w", err)
|
||||||
Seq: 1,
|
|
||||||
}
|
|
||||||
|
|
||||||
if prefix.Addr().Is4() {
|
|
||||||
route.Addrs = []netroute.Addr{
|
|
||||||
unix.RTAX_DST: &netroute.Inet4Addr{IP: prefix.Masked().Addr().As4()},
|
|
||||||
unix.RTAX_NETMASK: &netroute.Inet4Addr{IP: prefixToMask(prefix).As4()},
|
|
||||||
unix.RTAX_GATEWAY: gateway,
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
route.Addrs = []netroute.Addr{
|
// For IPv6: ifconfig tun0 inet6 add 2001:db8::1/64
|
||||||
unix.RTAX_DST: &netroute.Inet6Addr{IP: prefix.Masked().Addr().As16()},
|
if err := runCommandBSD("ifconfig", name, "inet6", "add", network.String()); err != nil {
|
||||||
unix.RTAX_NETMASK: &netroute.Inet6Addr{IP: prefixToMask(prefix).As16()},
|
return fmt.Errorf("failed to add IPv6 address: %w", err)
|
||||||
unix.RTAX_GATEWAY: gateway,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := route.Marshal()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create route.RouteMessage: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = unix.Write(sock, data[:])
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, unix.EEXIST) {
|
|
||||||
// Try to do a change
|
|
||||||
route.Type = unix.RTM_CHANGE
|
|
||||||
data, err = route.Marshal()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create route.RouteMessage for change: %w", err)
|
|
||||||
}
|
|
||||||
_, err = unix.Write(sock, data[:])
|
|
||||||
fmt.Println("DOING CHANGE")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return fmt.Errorf("failed to write route.RouteMessage to socket: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func delRoute(prefix netip.Prefix, gateway netroute.Addr) error {
|
func runCommandBSD(name string, args ...string) error {
|
||||||
sock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC)
|
cmd := exec.Command(name, args...)
|
||||||
|
output, err := cmd.CombinedOutput()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to create AF_ROUTE socket: %v", err)
|
return fmt.Errorf("%s %s failed: %w\nOutput: %s", name, strings.Join(args, " "), err, string(output))
|
||||||
}
|
}
|
||||||
defer unix.Close(sock)
|
return nil
|
||||||
|
|
||||||
route := netroute.RouteMessage{
|
|
||||||
Version: unix.RTM_VERSION,
|
|
||||||
Type: unix.RTM_DELETE,
|
|
||||||
Seq: 1,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if prefix.Addr().Is4() {
|
func renameInterface(fromName, toName string) error {
|
||||||
route.Addrs = []netroute.Addr{
|
s, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, unix.IPPROTO_IP)
|
||||||
unix.RTAX_DST: &netroute.Inet4Addr{IP: prefix.Masked().Addr().As4()},
|
if err != nil {
|
||||||
unix.RTAX_NETMASK: &netroute.Inet4Addr{IP: prefixToMask(prefix).As4()},
|
return fmt.Errorf("failed to create socket: %w", err)
|
||||||
unix.RTAX_GATEWAY: gateway,
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
route.Addrs = []netroute.Addr{
|
|
||||||
unix.RTAX_DST: &netroute.Inet6Addr{IP: prefix.Masked().Addr().As16()},
|
|
||||||
unix.RTAX_NETMASK: &netroute.Inet6Addr{IP: prefixToMask(prefix).As16()},
|
|
||||||
unix.RTAX_GATEWAY: gateway,
|
|
||||||
}
|
}
|
||||||
|
defer syscall.Close(s)
|
||||||
|
|
||||||
|
fd := uintptr(s)
|
||||||
|
|
||||||
|
var fromNameBytes [unix.IFNAMSIZ]byte
|
||||||
|
var toNameBytes [unix.IFNAMSIZ]byte
|
||||||
|
copy(fromNameBytes[:], fromName)
|
||||||
|
copy(toNameBytes[:], toName)
|
||||||
|
|
||||||
|
ifrr := ifreqRename{
|
||||||
|
Name: fromNameBytes,
|
||||||
|
Data: uintptr(unsafe.Pointer(&toNameBytes)),
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := route.Marshal()
|
// Set the device name using SIOCSIFNAME ioctl
|
||||||
if err != nil {
|
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, fd, syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&ifrr)))
|
||||||
return fmt.Errorf("failed to create route.RouteMessage: %w", err)
|
if errno != 0 {
|
||||||
}
|
return fmt.Errorf("SIOCSIFNAME ioctl failed: %w", errno)
|
||||||
_, err = unix.Write(sock, data[:])
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to write route.RouteMessage to socket: %w", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// getLinkAddr Gets the link address for the interface of the given name
|
|
||||||
func getLinkAddr(name string) (*netroute.LinkAddr, error) {
|
|
||||||
rib, err := netroute.FetchRIB(unix.AF_UNSPEC, unix.NET_RT_IFLIST, 0)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
msgs, err := netroute.ParseRIB(unix.NET_RT_IFLIST, rib)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, m := range msgs {
|
|
||||||
switch m := m.(type) {
|
|
||||||
case *netroute.InterfaceMessage:
|
|
||||||
if m.Name == name {
|
|
||||||
sa, ok := m.Addrs[unix.RTAX_IFP].(*netroute.LinkAddr)
|
|
||||||
if ok {
|
|
||||||
return sa, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//go:build !android && !e2e_testing
|
//go:build linux && !android && !e2e_testing
|
||||||
// +build !android,!e2e_testing
|
// +build linux,!android,!e2e_testing
|
||||||
|
|
||||||
package overlay
|
package overlay
|
||||||
|
|
||||||
@@ -9,169 +9,105 @@ import (
|
|||||||
"net"
|
"net"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"os"
|
"os"
|
||||||
"runtime"
|
|
||||||
"strings"
|
|
||||||
"sync/atomic"
|
|
||||||
"time"
|
"time"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/gaissmai/bart"
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/slackhq/nebula/config"
|
"github.com/slackhq/nebula/config"
|
||||||
"github.com/slackhq/nebula/routing"
|
"github.com/slackhq/nebula/routing"
|
||||||
"github.com/slackhq/nebula/util"
|
"github.com/slackhq/nebula/util"
|
||||||
wgtun "github.com/slackhq/nebula/wgstack/tun"
|
|
||||||
"github.com/vishvananda/netlink"
|
"github.com/vishvananda/netlink"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
|
wgtun "golang.zx2c4.com/wireguard/tun"
|
||||||
)
|
)
|
||||||
|
|
||||||
type tun struct {
|
type tun struct {
|
||||||
io.ReadWriteCloser
|
|
||||||
fd int
|
|
||||||
Device string
|
|
||||||
vpnNetworks []netip.Prefix
|
|
||||||
MaxMTU int
|
|
||||||
DefaultMTU int
|
|
||||||
TXQueueLen int
|
|
||||||
deviceIndex int
|
deviceIndex int
|
||||||
ioctlFd uintptr
|
ioctlFd uintptr
|
||||||
wgDevice wgtun.Device
|
txQueueLen int
|
||||||
|
|
||||||
Routes atomic.Pointer[[]Route]
|
|
||||||
routeTree atomic.Pointer[bart.Table[routing.Gateways]]
|
|
||||||
routeChan chan struct{}
|
|
||||||
useSystemRoutes bool
|
useSystemRoutes bool
|
||||||
useSystemRoutesBufferSize int
|
useSystemRoutesBufferSize int
|
||||||
|
|
||||||
l *logrus.Logger
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) Networks() []netip.Prefix {
|
func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, multiqueue bool) (*wgTun, error) {
|
||||||
return t.vpnNetworks
|
deviceName := c.GetString("tun.dev", "")
|
||||||
}
|
mtu := c.GetInt("tun.mtu", DefaultMTU)
|
||||||
|
|
||||||
type ifReq struct {
|
// Create WireGuard TUN device
|
||||||
Name [16]byte
|
tunDevice, err := wgtun.CreateTUN(deviceName, mtu)
|
||||||
Flags uint16
|
|
||||||
pad [8]byte
|
|
||||||
}
|
|
||||||
|
|
||||||
type ifreqMTU struct {
|
|
||||||
Name [16]byte
|
|
||||||
MTU int32
|
|
||||||
pad [8]byte
|
|
||||||
}
|
|
||||||
|
|
||||||
type ifreqQLEN struct {
|
|
||||||
Name [16]byte
|
|
||||||
Value int32
|
|
||||||
pad [8]byte
|
|
||||||
}
|
|
||||||
|
|
||||||
func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, vpnNetworks []netip.Prefix) (*tun, error) {
|
|
||||||
file := os.NewFile(uintptr(deviceFd), "/dev/net/tun")
|
|
||||||
|
|
||||||
useWGDefault := runtime.GOOS == "linux"
|
|
||||||
useWG := c.GetBool("tun.use_wireguard_stack", c.GetBool("listen.use_wireguard_stack", useWGDefault))
|
|
||||||
t, err := newTunGeneric(c, l, file, vpnNetworks, useWG)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to create TUN device: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Device = "tun0"
|
// Get the actual device name
|
||||||
|
actualName, err := tunDevice.Name()
|
||||||
return t, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, multiqueue bool) (*tun, error) {
|
|
||||||
fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// If /dev/net/tun doesn't exist, try to create it (will happen in docker)
|
tunDevice.Close()
|
||||||
if os.IsNotExist(err) {
|
return nil, fmt.Errorf("failed to get TUN device name: %w", err)
|
||||||
err = os.MkdirAll("/dev/net", 0755)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("/dev/net/tun doesn't exist, failed to mkdir -p /dev/net: %w", err)
|
|
||||||
}
|
|
||||||
err = unix.Mknod("/dev/net/tun", unix.S_IFCHR|0600, int(unix.Mkdev(10, 200)))
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to create /dev/net/tun: %w", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fd, err = unix.Open("/dev/net/tun", os.O_RDWR, 0)
|
t := &wgTun{
|
||||||
if err != nil {
|
tunDevice: tunDevice,
|
||||||
return nil, fmt.Errorf("created /dev/net/tun, but still failed: %w", err)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var req ifReq
|
|
||||||
req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI)
|
|
||||||
if multiqueue {
|
|
||||||
req.Flags |= unix.IFF_MULTI_QUEUE
|
|
||||||
}
|
|
||||||
copy(req.Name[:], c.GetString("tun.dev", ""))
|
|
||||||
if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
name := strings.Trim(string(req.Name[:]), "\x00")
|
|
||||||
|
|
||||||
file := os.NewFile(uintptr(fd), "/dev/net/tun")
|
|
||||||
useWGDefault := runtime.GOOS == "linux"
|
|
||||||
useWG := c.GetBool("tun.use_wireguard_stack", c.GetBool("listen.use_wireguard_stack", useWGDefault))
|
|
||||||
t, err := newTunGeneric(c, l, file, vpnNetworks, useWG)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Device = name
|
|
||||||
|
|
||||||
return t, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, vpnNetworks []netip.Prefix, useWireguard bool) (*tun, error) {
|
|
||||||
var (
|
|
||||||
rw io.ReadWriteCloser = file
|
|
||||||
fd = int(file.Fd())
|
|
||||||
wgDev wgtun.Device
|
|
||||||
)
|
|
||||||
|
|
||||||
if useWireguard {
|
|
||||||
dev, err := wgtun.CreateTUNFromFile(file, c.GetInt("tun.mtu", DefaultMTU))
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to initialize wireguard tun device: %w", err)
|
|
||||||
}
|
|
||||||
wgDev = dev
|
|
||||||
rw = newWireguardTunIO(dev, c.GetInt("tun.mtu", DefaultMTU))
|
|
||||||
fd = int(dev.File().Fd())
|
|
||||||
}
|
|
||||||
|
|
||||||
t := &tun{
|
|
||||||
ReadWriteCloser: rw,
|
|
||||||
fd: fd,
|
|
||||||
vpnNetworks: vpnNetworks,
|
vpnNetworks: vpnNetworks,
|
||||||
TXQueueLen: c.GetInt("tun.tx_queue", 500),
|
MaxMTU: mtu,
|
||||||
useSystemRoutes: c.GetBool("tun.use_system_route_table", false),
|
DefaultMTU: mtu,
|
||||||
useSystemRoutesBufferSize: c.GetInt("tun.use_system_route_table_buffer_size", 0),
|
|
||||||
l: l,
|
l: l,
|
||||||
}
|
}
|
||||||
if wgDev != nil {
|
|
||||||
t.wgDevice = wgDev
|
|
||||||
}
|
|
||||||
if wgDev != nil {
|
|
||||||
// replace ioctl fd with device file descriptor to keep route management working
|
|
||||||
file = wgDev.File()
|
|
||||||
t.fd = int(file.Fd())
|
|
||||||
t.ioctlFd = file.Fd()
|
|
||||||
}
|
|
||||||
|
|
||||||
if t.ioctlFd == 0 {
|
// Create Linux-specific route manager
|
||||||
t.ioctlFd = file.Fd()
|
routeManager := &tun{
|
||||||
|
txQueueLen: c.GetInt("tun.tx_queue", 500),
|
||||||
|
useSystemRoutes: c.GetBool("tun.use_system_route_table", false),
|
||||||
|
useSystemRoutesBufferSize: c.GetInt("tun.use_system_route_table_buffer_size", 0),
|
||||||
}
|
}
|
||||||
|
t.routeManager = routeManager
|
||||||
|
|
||||||
err := t.reload(c, true)
|
err = t.reload(c, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
tunDevice.Close()
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
c.RegisterReloadCallback(func(c *config.C) {
|
||||||
|
err := t.reload(c, false)
|
||||||
|
if err != nil {
|
||||||
|
util.LogWithContextIfNeeded("failed to reload tun device", err, t.l)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
l.WithField("name", actualName).Info("Created WireGuard TUN device")
|
||||||
|
|
||||||
|
return t, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTunFromFd(c *config.C, l *logrus.Logger, deviceFd int, vpnNetworks []netip.Prefix) (*wgTun, error) {
|
||||||
|
// Create TUN device from file descriptor
|
||||||
|
file := os.NewFile(uintptr(deviceFd), "/dev/net/tun")
|
||||||
|
mtu := c.GetInt("tun.mtu", DefaultMTU)
|
||||||
|
tunDevice, err := wgtun.CreateTUNFromFile(file, mtu)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create TUN device from fd: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
t := &wgTun{
|
||||||
|
tunDevice: tunDevice,
|
||||||
|
vpnNetworks: vpnNetworks,
|
||||||
|
MaxMTU: mtu,
|
||||||
|
DefaultMTU: mtu,
|
||||||
|
l: l,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create Linux-specific route manager
|
||||||
|
routeManager := &tun{
|
||||||
|
txQueueLen: c.GetInt("tun.tx_queue", 500),
|
||||||
|
useSystemRoutes: c.GetBool("tun.use_system_route_table", false),
|
||||||
|
useSystemRoutesBufferSize: c.GetInt("tun.use_system_route_table_buffer_size", 0),
|
||||||
|
}
|
||||||
|
t.routeManager = routeManager
|
||||||
|
|
||||||
|
err = t.reload(c, true)
|
||||||
|
if err != nil {
|
||||||
|
tunDevice.Close()
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -185,273 +121,105 @@ func newTunGeneric(c *config.C, l *logrus.Logger, file *os.File, vpnNetworks []n
|
|||||||
return t, nil
|
return t, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) reload(c *config.C, initial bool) error {
|
func (rm *tun) Activate(t *wgTun) error {
|
||||||
routeChange, routes, err := getAllRoutesFromConfig(c, t.vpnNetworks, initial)
|
name, err := t.tunDevice.Name()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return fmt.Errorf("failed to get device name: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !initial && !routeChange && !c.HasChanged("tun.mtu") {
|
if t.routeManager.useSystemRoutes {
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
routeTree, err := makeRouteTree(t.l, routes, true)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
oldDefaultMTU := t.DefaultMTU
|
|
||||||
oldMaxMTU := t.MaxMTU
|
|
||||||
newDefaultMTU := c.GetInt("tun.mtu", DefaultMTU)
|
|
||||||
newMaxMTU := newDefaultMTU
|
|
||||||
for i, r := range routes {
|
|
||||||
if r.MTU == 0 {
|
|
||||||
routes[i].MTU = newDefaultMTU
|
|
||||||
}
|
|
||||||
|
|
||||||
if r.MTU > t.MaxMTU {
|
|
||||||
newMaxMTU = r.MTU
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
t.MaxMTU = newMaxMTU
|
|
||||||
t.DefaultMTU = newDefaultMTU
|
|
||||||
|
|
||||||
// Teach nebula how to handle the routes before establishing them in the system table
|
|
||||||
oldRoutes := t.Routes.Swap(&routes)
|
|
||||||
t.routeTree.Store(routeTree)
|
|
||||||
|
|
||||||
if !initial {
|
|
||||||
if oldMaxMTU != newMaxMTU {
|
|
||||||
t.setMTU()
|
|
||||||
t.l.Infof("Set max MTU to %v was %v", t.MaxMTU, oldMaxMTU)
|
|
||||||
}
|
|
||||||
|
|
||||||
if oldDefaultMTU != newDefaultMTU {
|
|
||||||
for i := range t.vpnNetworks {
|
|
||||||
err := t.setDefaultRoute(t.vpnNetworks[i])
|
|
||||||
if err != nil {
|
|
||||||
t.l.Warn(err)
|
|
||||||
} else {
|
|
||||||
t.l.Infof("Set default MTU to %v was %v", t.DefaultMTU, oldDefaultMTU)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove first, if the system removes a wanted route hopefully it will be re-added next
|
|
||||||
t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
|
|
||||||
|
|
||||||
// Ensure any routes we actually want are installed
|
|
||||||
err = t.addRoutes(true)
|
|
||||||
if err != nil {
|
|
||||||
// This should never be called since addRoutes should log its own errors in a reload condition
|
|
||||||
util.LogWithContextIfNeeded("Failed to refresh routes", err, t.l)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
|
|
||||||
fd, err := unix.Open("/dev/net/tun", os.O_RDWR, 0)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var req ifReq
|
|
||||||
req.Flags = uint16(unix.IFF_TUN | unix.IFF_NO_PI | unix.IFF_MULTI_QUEUE)
|
|
||||||
copy(req.Name[:], t.Device)
|
|
||||||
if err = ioctl(uintptr(fd), uintptr(unix.TUNSETIFF), uintptr(unsafe.Pointer(&req))); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
file := os.NewFile(uintptr(fd), "/dev/net/tun")
|
|
||||||
|
|
||||||
return file, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) RoutesFor(ip netip.Addr) routing.Gateways {
|
|
||||||
r, _ := t.routeTree.Load().Lookup(ip)
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) Write(b []byte) (int, error) {
|
|
||||||
var nn int
|
|
||||||
maximum := len(b)
|
|
||||||
|
|
||||||
for {
|
|
||||||
n, err := unix.Write(t.fd, b[nn:maximum])
|
|
||||||
if n > 0 {
|
|
||||||
nn += n
|
|
||||||
}
|
|
||||||
if nn == len(b) {
|
|
||||||
return nn, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return nn, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if n == 0 {
|
|
||||||
return nn, io.ErrUnexpectedEOF
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) deviceBytes() (o [16]byte) {
|
|
||||||
for i, c := range t.Device {
|
|
||||||
o[i] = byte(c)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func hasNetlinkAddr(al []*netlink.Addr, x netlink.Addr) bool {
|
|
||||||
for i := range al {
|
|
||||||
if al[i].Equal(x) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// addIPs uses netlink to add all addresses that don't exist, then it removes ones that should not be there
|
|
||||||
func (t *tun) addIPs(link netlink.Link) error {
|
|
||||||
newAddrs := make([]*netlink.Addr, len(t.vpnNetworks))
|
|
||||||
for i := range t.vpnNetworks {
|
|
||||||
newAddrs[i] = &netlink.Addr{
|
|
||||||
IPNet: &net.IPNet{
|
|
||||||
IP: t.vpnNetworks[i].Addr().AsSlice(),
|
|
||||||
Mask: net.CIDRMask(t.vpnNetworks[i].Bits(), t.vpnNetworks[i].Addr().BitLen()),
|
|
||||||
},
|
|
||||||
Label: t.vpnNetworks[i].Addr().Zone(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//add all new addresses
|
|
||||||
for i := range newAddrs {
|
|
||||||
//AddrReplace still adds new IPs, but if their properties change it will change them as well
|
|
||||||
if err := netlink.AddrReplace(link, newAddrs[i]); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//iterate over remainder, remove whoever shouldn't be there
|
|
||||||
al, err := netlink.AddrList(link, netlink.FAMILY_ALL)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to get tun address list: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range al {
|
|
||||||
if hasNetlinkAddr(newAddrs, al[i]) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
err = netlink.AddrDel(link, &al[i])
|
|
||||||
if err != nil {
|
|
||||||
t.l.WithError(err).Error("failed to remove address from tun address list")
|
|
||||||
} else {
|
|
||||||
t.l.WithField("removed", al[i].String()).Info("removed address not listed in cert(s)")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) Activate() error {
|
|
||||||
devName := t.deviceBytes()
|
|
||||||
|
|
||||||
if t.useSystemRoutes {
|
|
||||||
t.watchRoutes()
|
t.watchRoutes()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get the netlink device
|
||||||
|
link, err := netlink.LinkByName(name)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get tun device link: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rm.deviceIndex = link.Attrs().Index
|
||||||
|
|
||||||
|
// Open socket for ioctl operations
|
||||||
s, err := unix.Socket(
|
s, err := unix.Socket(
|
||||||
unix.AF_INET, //because everything we use t.ioctlFd for is address family independent, this is fine
|
unix.AF_INET,
|
||||||
unix.SOCK_DGRAM,
|
unix.SOCK_DGRAM,
|
||||||
unix.IPPROTO_IP,
|
unix.IPPROTO_IP,
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
t.ioctlFd = uintptr(s)
|
rm.ioctlFd = uintptr(s)
|
||||||
|
|
||||||
// Set the device name
|
rm.SetMTU(t, t.MaxMTU)
|
||||||
ifrf := ifReq{Name: devName}
|
|
||||||
if err = ioctl(t.ioctlFd, unix.SIOCGIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
|
|
||||||
return fmt.Errorf("failed to set tun device name: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
link, err := netlink.LinkByName(t.Device)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to get tun device link: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
t.deviceIndex = link.Attrs().Index
|
|
||||||
|
|
||||||
// Setup our default MTU
|
|
||||||
t.setMTU()
|
|
||||||
|
|
||||||
// Set the transmit queue length
|
// Set the transmit queue length
|
||||||
ifrq := ifreqQLEN{Name: devName, Value: int32(t.TXQueueLen)}
|
devName := deviceBytes(name)
|
||||||
if err = ioctl(t.ioctlFd, unix.SIOCSIFTXQLEN, uintptr(unsafe.Pointer(&ifrq))); err != nil {
|
ifrq := ifreqQLEN{Name: devName, Value: int32(rm.txQueueLen)}
|
||||||
|
if err = ioctl(t.routeManager.ioctlFd, unix.SIOCSIFTXQLEN, uintptr(unsafe.Pointer(&ifrq))); err != nil {
|
||||||
// If we can't set the queue length nebula will still work but it may lead to packet loss
|
// If we can't set the queue length nebula will still work but it may lead to packet loss
|
||||||
t.l.WithError(err).Error("Failed to set tun tx queue length")
|
t.l.WithError(err).Error("Failed to set tun tx queue length")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Disable IPv6 link-local address generation
|
||||||
const modeNone = 1
|
const modeNone = 1
|
||||||
if err = netlink.LinkSetIP6AddrGenMode(link, modeNone); err != nil {
|
if err = netlink.LinkSetIP6AddrGenMode(link, modeNone); err != nil {
|
||||||
t.l.WithError(err).Warn("Failed to disable link local address generation")
|
t.l.WithError(err).Warn("Failed to disable link local address generation")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = t.addIPs(link); err != nil {
|
// Add IP addresses
|
||||||
|
if err = t.routeManager.addIPs(t, link); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bring up the interface
|
// Bring up the interface
|
||||||
ifrf.Flags = ifrf.Flags | unix.IFF_UP
|
if err = netlink.LinkSetUp(link); err != nil {
|
||||||
if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
|
|
||||||
return fmt.Errorf("failed to bring the tun device up: %s", err)
|
return fmt.Errorf("failed to bring the tun device up: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
//set route MTU
|
// Set route MTU
|
||||||
for i := range t.vpnNetworks {
|
for i := range t.vpnNetworks {
|
||||||
if err = t.setDefaultRoute(t.vpnNetworks[i]); err != nil {
|
if err = t.routeManager.SetDefaultRoute(t, t.vpnNetworks[i]); err != nil {
|
||||||
return fmt.Errorf("failed to set default route MTU: %w", err)
|
return fmt.Errorf("failed to set default route MTU: %w", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the routes
|
// Set the routes
|
||||||
if err = t.addRoutes(false); err != nil {
|
if err = t.routeManager.AddRoutes(t, false); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run the interface
|
|
||||||
ifrf.Flags = ifrf.Flags | unix.IFF_UP | unix.IFF_RUNNING
|
|
||||||
if err = ioctl(t.ioctlFd, unix.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifrf))); err != nil {
|
|
||||||
return fmt.Errorf("failed to run tun device: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) setMTU() {
|
func (rm *tun) SetMTU(t *wgTun, mtu int) {
|
||||||
// Set the MTU on the device
|
name, err := t.tunDevice.Name()
|
||||||
ifm := ifreqMTU{Name: t.deviceBytes(), MTU: int32(t.MaxMTU)}
|
if err != nil {
|
||||||
if err := ioctl(t.ioctlFd, unix.SIOCSIFMTU, uintptr(unsafe.Pointer(&ifm))); err != nil {
|
t.l.WithError(err).Error("Failed to get device name for MTU set")
|
||||||
// This is currently a non fatal condition because the route table must have the MTU set appropriately as well
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
link, err := netlink.LinkByName(name)
|
||||||
|
if err != nil {
|
||||||
|
t.l.WithError(err).Error("Failed to get link for MTU set")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := netlink.LinkSetMTU(link, mtu); err != nil {
|
||||||
t.l.WithError(err).Error("Failed to set tun mtu")
|
t.l.WithError(err).Error("Failed to set tun mtu")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) setDefaultRoute(cidr netip.Prefix) error {
|
func (rm *tun) SetDefaultRoute(t *wgTun, cidr netip.Prefix) error {
|
||||||
dr := &net.IPNet{
|
dr := &net.IPNet{
|
||||||
IP: cidr.Masked().Addr().AsSlice(),
|
IP: cidr.Masked().Addr().AsSlice(),
|
||||||
Mask: net.CIDRMask(cidr.Bits(), cidr.Addr().BitLen()),
|
Mask: net.CIDRMask(cidr.Bits(), cidr.Addr().BitLen()),
|
||||||
}
|
}
|
||||||
|
|
||||||
nr := netlink.Route{
|
nr := netlink.Route{
|
||||||
LinkIndex: t.deviceIndex,
|
LinkIndex: t.routeManager.deviceIndex,
|
||||||
Dst: dr,
|
Dst: dr,
|
||||||
MTU: t.DefaultMTU,
|
MTU: t.DefaultMTU,
|
||||||
AdvMSS: t.advMSS(Route{}),
|
AdvMSS: advMSS(Route{}, t.DefaultMTU, t.MaxMTU),
|
||||||
Scope: unix.RT_SCOPE_LINK,
|
Scope: unix.RT_SCOPE_LINK,
|
||||||
Src: net.IP(cidr.Addr().AsSlice()),
|
Src: net.IP(cidr.Addr().AsSlice()),
|
||||||
Protocol: unix.RTPROT_KERNEL,
|
Protocol: unix.RTPROT_KERNEL,
|
||||||
@@ -461,7 +229,7 @@ func (t *tun) setDefaultRoute(cidr netip.Prefix) error {
|
|||||||
err := netlink.RouteReplace(&nr)
|
err := netlink.RouteReplace(&nr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.l.WithError(err).WithField("cidr", cidr).Warn("Failed to set default route MTU, retrying")
|
t.l.WithError(err).WithField("cidr", cidr).Warn("Failed to set default route MTU, retrying")
|
||||||
//retry twice more -- on some systems there appears to be a race condition where if we set routes too soon, netlink says `invalid argument`
|
// Retry twice more
|
||||||
for i := 0; i < 2; i++ {
|
for i := 0; i < 2; i++ {
|
||||||
time.Sleep(100 * time.Millisecond)
|
time.Sleep(100 * time.Millisecond)
|
||||||
err = netlink.RouteReplace(&nr)
|
err = netlink.RouteReplace(&nr)
|
||||||
@@ -479,8 +247,7 @@ func (t *tun) setDefaultRoute(cidr netip.Prefix) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) addRoutes(logErrors bool) error {
|
func (rm *tun) AddRoutes(t *wgTun, logErrors bool) error {
|
||||||
// Path routes
|
|
||||||
routes := *t.Routes.Load()
|
routes := *t.Routes.Load()
|
||||||
for _, r := range routes {
|
for _, r := range routes {
|
||||||
if !r.Install {
|
if !r.Install {
|
||||||
@@ -493,10 +260,10 @@ func (t *tun) addRoutes(logErrors bool) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
nr := netlink.Route{
|
nr := netlink.Route{
|
||||||
LinkIndex: t.deviceIndex,
|
LinkIndex: t.routeManager.deviceIndex,
|
||||||
Dst: dr,
|
Dst: dr,
|
||||||
MTU: r.MTU,
|
MTU: r.MTU,
|
||||||
AdvMSS: t.advMSS(r),
|
AdvMSS: advMSS(r, t.DefaultMTU, t.MaxMTU),
|
||||||
Scope: unix.RT_SCOPE_LINK,
|
Scope: unix.RT_SCOPE_LINK,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -520,7 +287,7 @@ func (t *tun) addRoutes(logErrors bool) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) removeRoutes(routes []Route) {
|
func (rm *tun) RemoveRoutes(t *wgTun, routes []Route) {
|
||||||
for _, r := range routes {
|
for _, r := range routes {
|
||||||
if !r.Install {
|
if !r.Install {
|
||||||
continue
|
continue
|
||||||
@@ -532,10 +299,10 @@ func (t *tun) removeRoutes(routes []Route) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
nr := netlink.Route{
|
nr := netlink.Route{
|
||||||
LinkIndex: t.deviceIndex,
|
LinkIndex: t.routeManager.deviceIndex,
|
||||||
Dst: dr,
|
Dst: dr,
|
||||||
MTU: r.MTU,
|
MTU: r.MTU,
|
||||||
AdvMSS: t.advMSS(r),
|
AdvMSS: advMSS(r, t.DefaultMTU, t.MaxMTU),
|
||||||
Scope: unix.RT_SCOPE_LINK,
|
Scope: unix.RT_SCOPE_LINK,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -552,30 +319,105 @@ func (t *tun) removeRoutes(routes []Route) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) Name() string {
|
func (rm *tun) NewMultiQueueReader(t *wgTun) (io.ReadWriteCloser, error) {
|
||||||
return t.Device
|
// For Linux with WireGuard TUN, we can reuse the same device
|
||||||
|
// The vectorized I/O will handle batching
|
||||||
|
return &wgTunReader{
|
||||||
|
parent: t,
|
||||||
|
tunDevice: t.tunDevice,
|
||||||
|
offset: 0,
|
||||||
|
l: t.l,
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) advMSS(r Route) int {
|
func deviceBytes(name string) [16]byte {
|
||||||
|
var o [16]byte
|
||||||
|
for i, c := range name {
|
||||||
|
if i >= 16 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
o[i] = byte(c)
|
||||||
|
}
|
||||||
|
return o
|
||||||
|
}
|
||||||
|
|
||||||
|
func advMSS(r Route, defaultMTU, maxMTU int) int {
|
||||||
mtu := r.MTU
|
mtu := r.MTU
|
||||||
if r.MTU == 0 {
|
if r.MTU == 0 {
|
||||||
mtu = t.DefaultMTU
|
mtu = defaultMTU
|
||||||
}
|
}
|
||||||
|
|
||||||
// We only need to set advmss if the route MTU does not match the device MTU
|
// We only need to set advmss if the route MTU does not match the device MTU
|
||||||
if mtu != t.MaxMTU {
|
if mtu != maxMTU {
|
||||||
return mtu - 40
|
return mtu - 40
|
||||||
}
|
}
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) watchRoutes() {
|
type ifreqQLEN struct {
|
||||||
|
Name [16]byte
|
||||||
|
Value int32
|
||||||
|
pad [8]byte
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasNetlinkAddr(al []*netlink.Addr, x netlink.Addr) bool {
|
||||||
|
for i := range al {
|
||||||
|
if al[i].Equal(x) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rm *tun) addIPs(t *wgTun, link netlink.Link) error {
|
||||||
|
newAddrs := make([]*netlink.Addr, len(t.vpnNetworks))
|
||||||
|
for i := range t.vpnNetworks {
|
||||||
|
newAddrs[i] = &netlink.Addr{
|
||||||
|
IPNet: &net.IPNet{
|
||||||
|
IP: t.vpnNetworks[i].Addr().AsSlice(),
|
||||||
|
Mask: net.CIDRMask(t.vpnNetworks[i].Bits(), t.vpnNetworks[i].Addr().BitLen()),
|
||||||
|
},
|
||||||
|
Label: t.vpnNetworks[i].Addr().Zone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add all new addresses
|
||||||
|
for i := range newAddrs {
|
||||||
|
if err := netlink.AddrReplace(link, newAddrs[i]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate over remainder, remove whoever shouldn't be there
|
||||||
|
al, err := netlink.AddrList(link, netlink.FAMILY_ALL)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get tun address list: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range al {
|
||||||
|
if hasNetlinkAddr(newAddrs, al[i]) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
err = netlink.AddrDel(link, &al[i])
|
||||||
|
if err != nil {
|
||||||
|
t.l.WithError(err).Error("failed to remove address from tun address list")
|
||||||
|
} else {
|
||||||
|
t.l.WithField("removed", al[i].String()).Info("removed address not listed in cert(s)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// watchRoutes monitors system route changes
|
||||||
|
func (t *wgTun) watchRoutes() {
|
||||||
|
|
||||||
rch := make(chan netlink.RouteUpdate)
|
rch := make(chan netlink.RouteUpdate)
|
||||||
doneChan := make(chan struct{})
|
doneChan := make(chan struct{})
|
||||||
|
|
||||||
netlinkOptions := netlink.RouteSubscribeOptions{
|
netlinkOptions := netlink.RouteSubscribeOptions{
|
||||||
ReceiveBufferSize: t.useSystemRoutesBufferSize,
|
ReceiveBufferSize: t.routeManager.useSystemRoutesBufferSize,
|
||||||
ReceiveBufferForceSize: t.useSystemRoutesBufferSize != 0,
|
ReceiveBufferForceSize: t.routeManager.useSystemRoutesBufferSize != 0,
|
||||||
ErrorCallback: func(e error) { t.l.WithError(e).Errorf("netlink error") },
|
ErrorCallback: func(e error) { t.l.WithError(e).Errorf("netlink error") },
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -593,87 +435,19 @@ func (t *tun) watchRoutes() {
|
|||||||
if ok {
|
if ok {
|
||||||
t.updateRoutes(r)
|
t.updateRoutes(r)
|
||||||
} else {
|
} else {
|
||||||
// may be should do something here as
|
|
||||||
// netlink stops sending updates
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
case <-doneChan:
|
case <-doneChan:
|
||||||
// netlink.RouteSubscriber will close the rch for us
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) isGatewayInVpnNetworks(gwAddr netip.Addr) bool {
|
func (t *wgTun) updateRoutes(r netlink.RouteUpdate) {
|
||||||
withinNetworks := false
|
gateways := t.getGatewaysFromRoute(&r.Route, t.routeManager.deviceIndex)
|
||||||
for i := range t.vpnNetworks {
|
|
||||||
if t.vpnNetworks[i].Contains(gwAddr) {
|
|
||||||
withinNetworks = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return withinNetworks
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) getGatewaysFromRoute(r *netlink.Route) routing.Gateways {
|
|
||||||
|
|
||||||
var gateways routing.Gateways
|
|
||||||
|
|
||||||
link, err := netlink.LinkByName(t.Device)
|
|
||||||
if err != nil {
|
|
||||||
t.l.WithField("Devicename", t.Device).Error("Ignoring route update: failed to get link by name")
|
|
||||||
return gateways
|
|
||||||
}
|
|
||||||
|
|
||||||
// If this route is relevant to our interface and there is a gateway then add it
|
|
||||||
if r.LinkIndex == link.Attrs().Index && len(r.Gw) > 0 {
|
|
||||||
gwAddr, ok := netip.AddrFromSlice(r.Gw)
|
|
||||||
if !ok {
|
|
||||||
t.l.WithField("route", r).Debug("Ignoring route update, invalid gateway address")
|
|
||||||
} else {
|
|
||||||
gwAddr = gwAddr.Unmap()
|
|
||||||
|
|
||||||
if !t.isGatewayInVpnNetworks(gwAddr) {
|
|
||||||
// Gateway isn't in our overlay network, ignore
|
|
||||||
t.l.WithField("route", r).Debug("Ignoring route update, not in our network")
|
|
||||||
} else {
|
|
||||||
gateways = append(gateways, routing.NewGateway(gwAddr, 1))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, p := range r.MultiPath {
|
|
||||||
// If this route is relevant to our interface and there is a gateway then add it
|
|
||||||
if p.LinkIndex == link.Attrs().Index && len(p.Gw) > 0 {
|
|
||||||
gwAddr, ok := netip.AddrFromSlice(p.Gw)
|
|
||||||
if !ok {
|
|
||||||
t.l.WithField("route", r).Debug("Ignoring multipath route update, invalid gateway address")
|
|
||||||
} else {
|
|
||||||
gwAddr = gwAddr.Unmap()
|
|
||||||
|
|
||||||
if !t.isGatewayInVpnNetworks(gwAddr) {
|
|
||||||
// Gateway isn't in our overlay network, ignore
|
|
||||||
t.l.WithField("route", r).Debug("Ignoring route update, not in our network")
|
|
||||||
} else {
|
|
||||||
// p.Hops+1 = weight of the route
|
|
||||||
gateways = append(gateways, routing.NewGateway(gwAddr, p.Hops+1))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
routing.CalculateBucketsForGateways(gateways)
|
|
||||||
return gateways
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) updateRoutes(r netlink.RouteUpdate) {
|
|
||||||
|
|
||||||
gateways := t.getGatewaysFromRoute(&r.Route)
|
|
||||||
|
|
||||||
if len(gateways) == 0 {
|
if len(gateways) == 0 {
|
||||||
// No gateways relevant to our network, no routing changes required.
|
|
||||||
t.l.WithField("route", r).Debug("Ignoring route update, no gateways")
|
t.l.WithField("route", r).Debug("Ignoring route update, no gateways")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -697,7 +471,6 @@ func (t *tun) updateRoutes(r netlink.RouteUpdate) {
|
|||||||
if r.Type == unix.RTM_NEWROUTE {
|
if r.Type == unix.RTM_NEWROUTE {
|
||||||
t.l.WithField("destination", dst).WithField("via", gateways).Info("Adding route")
|
t.l.WithField("destination", dst).WithField("via", gateways).Info("Adding route")
|
||||||
newTree.Insert(dst, gateways)
|
newTree.Insert(dst, gateways)
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
t.l.WithField("destination", dst).WithField("via", gateways).Info("Removing route")
|
t.l.WithField("destination", dst).WithField("via", gateways).Info("Removing route")
|
||||||
newTree.Delete(dst)
|
newTree.Delete(dst)
|
||||||
@@ -705,26 +478,71 @@ func (t *tun) updateRoutes(r netlink.RouteUpdate) {
|
|||||||
t.routeTree.Store(newTree)
|
t.routeTree.Store(newTree)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) Close() error {
|
func (t *wgTun) getGatewaysFromRoute(r *netlink.Route, deviceIndex int) routing.Gateways {
|
||||||
if t.routeChan != nil {
|
var gateways routing.Gateways
|
||||||
close(t.routeChan)
|
|
||||||
|
name, err := t.tunDevice.Name()
|
||||||
|
if err != nil {
|
||||||
|
t.l.Error("Ignoring route update: failed to get device name")
|
||||||
|
return gateways
|
||||||
}
|
}
|
||||||
|
|
||||||
if t.ReadWriteCloser != nil {
|
link, err := netlink.LinkByName(name)
|
||||||
_ = t.ReadWriteCloser.Close()
|
if err != nil {
|
||||||
|
t.l.WithField("DeviceName", name).Error("Ignoring route update: failed to get link by name")
|
||||||
|
return gateways
|
||||||
}
|
}
|
||||||
|
|
||||||
if t.wgDevice != nil {
|
// If this route is relevant to our interface and there is a gateway then add it
|
||||||
_ = t.wgDevice.Close()
|
if r.LinkIndex == link.Attrs().Index && len(r.Gw) > 0 {
|
||||||
if t.ioctlFd > 0 {
|
gwAddr, ok := netip.AddrFromSlice(r.Gw)
|
||||||
// underlying fd already closed by the device
|
if !ok {
|
||||||
t.ioctlFd = 0
|
t.l.WithField("route", r).Debug("Ignoring route update, invalid gateway address")
|
||||||
|
} else {
|
||||||
|
gwAddr = gwAddr.Unmap()
|
||||||
|
|
||||||
|
if !t.isGatewayInVpnNetworks(gwAddr) {
|
||||||
|
t.l.WithField("route", r).Debug("Ignoring route update, not in our network")
|
||||||
|
} else {
|
||||||
|
gateways = append(gateways, routing.NewGateway(gwAddr, 1))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if t.ioctlFd > 0 {
|
for _, p := range r.MultiPath {
|
||||||
_ = os.NewFile(t.ioctlFd, "ioctlFd").Close()
|
if p.LinkIndex == link.Attrs().Index && len(p.Gw) > 0 {
|
||||||
|
gwAddr, ok := netip.AddrFromSlice(p.Gw)
|
||||||
|
if !ok {
|
||||||
|
t.l.WithField("route", r).Debug("Ignoring multipath route update, invalid gateway address")
|
||||||
|
} else {
|
||||||
|
gwAddr = gwAddr.Unmap()
|
||||||
|
|
||||||
|
if !t.isGatewayInVpnNetworks(gwAddr) {
|
||||||
|
t.l.WithField("route", r).Debug("Ignoring route update, not in our network")
|
||||||
|
} else {
|
||||||
|
gateways = append(gateways, routing.NewGateway(gwAddr, p.Hops+1))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
routing.CalculateBucketsForGateways(gateways)
|
||||||
|
return gateways
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *wgTun) isGatewayInVpnNetworks(gwAddr netip.Addr) bool {
|
||||||
|
for i := range t.vpnNetworks {
|
||||||
|
if t.vpnNetworks[i].Contains(gwAddr) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func ioctl(a1, a2, a3 uintptr) error {
|
||||||
|
_, _, errno := unix.Syscall(unix.SYS_IOCTL, a1, a2, a3)
|
||||||
|
if errno != 0 {
|
||||||
|
return errno
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,56 +0,0 @@
|
|||||||
//go:build linux && !android && !e2e_testing
|
|
||||||
|
|
||||||
package overlay
|
|
||||||
|
|
||||||
import "fmt"
|
|
||||||
|
|
||||||
func (t *tun) batchIO() (*wireguardTunIO, bool) {
|
|
||||||
io, ok := t.ReadWriteCloser.(*wireguardTunIO)
|
|
||||||
return io, ok
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) ReadIntoBatch(pool *PacketPool) ([]*Packet, error) {
|
|
||||||
io, ok := t.batchIO()
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("wireguard batch I/O not enabled")
|
|
||||||
}
|
|
||||||
return io.ReadIntoBatch(pool)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) WriteBatch(packets []*Packet) (int, error) {
|
|
||||||
io, ok := t.batchIO()
|
|
||||||
if ok {
|
|
||||||
return io.WriteBatch(packets)
|
|
||||||
}
|
|
||||||
for _, pkt := range packets {
|
|
||||||
if pkt == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if _, err := t.Write(pkt.Payload()[:pkt.Len]); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
pkt.Release()
|
|
||||||
}
|
|
||||||
return len(packets), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) BatchHeadroom() int {
|
|
||||||
if io, ok := t.batchIO(); ok {
|
|
||||||
return io.BatchHeadroom()
|
|
||||||
}
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) BatchPayloadCap() int {
|
|
||||||
if io, ok := t.batchIO(); ok {
|
|
||||||
return io.BatchPayloadCap()
|
|
||||||
}
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) BatchSize() int {
|
|
||||||
if io, ok := t.batchIO(); ok {
|
|
||||||
return io.BatchSize()
|
|
||||||
}
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
@@ -7,25 +7,26 @@ import "testing"
|
|||||||
|
|
||||||
var runAdvMSSTests = []struct {
|
var runAdvMSSTests = []struct {
|
||||||
name string
|
name string
|
||||||
tun *tun
|
defaultMTU int
|
||||||
|
maxMTU int
|
||||||
r Route
|
r Route
|
||||||
expected int
|
expected int
|
||||||
}{
|
}{
|
||||||
// Standard case, default MTU is the device max MTU
|
// Standard case, default MTU is the device max MTU
|
||||||
{"default", &tun{DefaultMTU: 1440, MaxMTU: 1440}, Route{}, 0},
|
{"default", 1440, 1440, Route{}, 0},
|
||||||
{"default-min", &tun{DefaultMTU: 1440, MaxMTU: 1440}, Route{MTU: 1440}, 0},
|
{"default-min", 1440, 1440, Route{MTU: 1440}, 0},
|
||||||
{"default-low", &tun{DefaultMTU: 1440, MaxMTU: 1440}, Route{MTU: 1200}, 1160},
|
{"default-low", 1440, 1440, Route{MTU: 1200}, 1160},
|
||||||
|
|
||||||
// Case where we have a route MTU set higher than the default
|
// Case where we have a route MTU set higher than the default
|
||||||
{"route", &tun{DefaultMTU: 1440, MaxMTU: 8941}, Route{}, 1400},
|
{"route", 1440, 8941, Route{}, 1400},
|
||||||
{"route-min", &tun{DefaultMTU: 1440, MaxMTU: 8941}, Route{MTU: 1440}, 1400},
|
{"route-min", 1440, 8941, Route{MTU: 1440}, 1400},
|
||||||
{"route-high", &tun{DefaultMTU: 1440, MaxMTU: 8941}, Route{MTU: 8941}, 0},
|
{"route-high", 1440, 8941, Route{MTU: 8941}, 0},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestTunAdvMSS(t *testing.T) {
|
func TestTunAdvMSS(t *testing.T) {
|
||||||
for _, tt := range runAdvMSSTests {
|
for _, tt := range runAdvMSSTests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
o := tt.tun.advMSS(tt.r)
|
o := advMSS(tt.r, tt.defaultMTU, tt.maxMTU)
|
||||||
if o != tt.expected {
|
if o != tt.expected {
|
||||||
t.Errorf("got %d, want %d", o, tt.expected)
|
t.Errorf("got %d, want %d", o, tt.expected)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -547,3 +547,41 @@ func delRoute(prefix netip.Prefix, gateways []netip.Prefix) error {
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ioctl(a1, a2, a3 uintptr) error {
|
||||||
|
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, a1, a2, a3)
|
||||||
|
if errno != 0 {
|
||||||
|
return errno
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func prefixToMask(prefix netip.Prefix) netip.Addr {
|
||||||
|
bits := prefix.Bits()
|
||||||
|
if prefix.Addr().Is4() {
|
||||||
|
mask := ^uint32(0) << (32 - bits)
|
||||||
|
return netip.AddrFrom4([4]byte{
|
||||||
|
byte(mask >> 24),
|
||||||
|
byte(mask >> 16),
|
||||||
|
byte(mask >> 8),
|
||||||
|
byte(mask),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
var mask [16]byte
|
||||||
|
for i := 0; i < bits/8; i++ {
|
||||||
|
mask[i] = 0xff
|
||||||
|
}
|
||||||
|
if bits%8 != 0 {
|
||||||
|
mask[bits/8] = ^byte(0) << (8 - bits%8)
|
||||||
|
}
|
||||||
|
return netip.AddrFrom16(mask)
|
||||||
|
}
|
||||||
|
|
||||||
|
func selectGateway(prefix netip.Prefix, gateways []netip.Prefix) (netip.Prefix, error) {
|
||||||
|
for _, gw := range gateways {
|
||||||
|
if prefix.Addr().Is4() == gw.Addr().Is4() {
|
||||||
|
return gw, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return netip.Prefix{}, fmt.Errorf("no suitable gateway found for prefix %v", prefix)
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,14 +0,0 @@
|
|||||||
//go:build !windows
|
|
||||||
// +build !windows
|
|
||||||
|
|
||||||
package overlay
|
|
||||||
|
|
||||||
import "syscall"
|
|
||||||
|
|
||||||
func ioctl(a1, a2, a3 uintptr) error {
|
|
||||||
_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, a1, a2, a3)
|
|
||||||
if errno != 0 {
|
|
||||||
return errno
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,104 +1,59 @@
|
|||||||
//go:build !e2e_testing
|
//go:build openbsd && !e2e_testing
|
||||||
// +build !e2e_testing
|
// +build openbsd,!e2e_testing
|
||||||
|
|
||||||
package overlay
|
package overlay
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"os"
|
"os/exec"
|
||||||
"regexp"
|
"strconv"
|
||||||
"sync/atomic"
|
"strings"
|
||||||
"syscall"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/gaissmai/bart"
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/slackhq/nebula/config"
|
"github.com/slackhq/nebula/config"
|
||||||
"github.com/slackhq/nebula/routing"
|
|
||||||
"github.com/slackhq/nebula/util"
|
"github.com/slackhq/nebula/util"
|
||||||
netroute "golang.org/x/net/route"
|
wgtun "golang.zx2c4.com/wireguard/tun"
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
type tun struct{}
|
||||||
SIOCAIFADDR_IN6 = 0x8080691a
|
|
||||||
)
|
|
||||||
|
|
||||||
type ifreqAlias4 struct {
|
func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ []netip.Prefix) (*wgTun, error) {
|
||||||
Name [unix.IFNAMSIZ]byte
|
return nil, fmt.Errorf("newTunFromFd not supported on OpenBSD")
|
||||||
Addr unix.RawSockaddrInet4
|
|
||||||
DstAddr unix.RawSockaddrInet4
|
|
||||||
MaskAddr unix.RawSockaddrInet4
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type ifreqAlias6 struct {
|
func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, _ bool) (*wgTun, error) {
|
||||||
Name [unix.IFNAMSIZ]byte
|
deviceName := c.GetString("tun.dev", "tun")
|
||||||
Addr unix.RawSockaddrInet6
|
mtu := c.GetInt("tun.mtu", DefaultMTU)
|
||||||
DstAddr unix.RawSockaddrInet6
|
|
||||||
PrefixMask unix.RawSockaddrInet6
|
|
||||||
Flags uint32
|
|
||||||
Lifetime [2]uint32
|
|
||||||
}
|
|
||||||
|
|
||||||
type ifreq struct {
|
// Create WireGuard TUN device
|
||||||
Name [unix.IFNAMSIZ]byte
|
tunDevice, err := wgtun.CreateTUN(deviceName, mtu)
|
||||||
data int
|
|
||||||
}
|
|
||||||
|
|
||||||
type tun struct {
|
|
||||||
Device string
|
|
||||||
vpnNetworks []netip.Prefix
|
|
||||||
MTU int
|
|
||||||
Routes atomic.Pointer[[]Route]
|
|
||||||
routeTree atomic.Pointer[bart.Table[routing.Gateways]]
|
|
||||||
l *logrus.Logger
|
|
||||||
f *os.File
|
|
||||||
fd int
|
|
||||||
// cache out buffer since we need to prepend 4 bytes for tun metadata
|
|
||||||
out []byte
|
|
||||||
}
|
|
||||||
|
|
||||||
var deviceNameRE = regexp.MustCompile(`^tun[0-9]+$`)
|
|
||||||
|
|
||||||
func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ []netip.Prefix) (*tun, error) {
|
|
||||||
return nil, fmt.Errorf("newTunFromFd not supported in openbsd")
|
|
||||||
}
|
|
||||||
|
|
||||||
func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, _ bool) (*tun, error) {
|
|
||||||
// Try to open tun device
|
|
||||||
var err error
|
|
||||||
deviceName := c.GetString("tun.dev", "")
|
|
||||||
if deviceName == "" {
|
|
||||||
return nil, fmt.Errorf("a device name in the format of /dev/tunN must be specified")
|
|
||||||
}
|
|
||||||
if !deviceNameRE.MatchString(deviceName) {
|
|
||||||
return nil, fmt.Errorf("a device name in the format of /dev/tunN must be specified")
|
|
||||||
}
|
|
||||||
|
|
||||||
fd, err := unix.Open("/dev/"+deviceName, os.O_RDWR, 0)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, fmt.Errorf("failed to create TUN device: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = unix.SetNonblock(fd, true)
|
// Get the actual device name
|
||||||
|
actualName, err := tunDevice.Name()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
l.WithError(err).Warn("Failed to set the tun device as nonblocking")
|
tunDevice.Close()
|
||||||
|
return nil, fmt.Errorf("failed to get TUN device name: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
t := &tun{
|
t := &wgTun{
|
||||||
f: os.NewFile(uintptr(fd), ""),
|
tunDevice: tunDevice,
|
||||||
fd: fd,
|
|
||||||
Device: deviceName,
|
|
||||||
vpnNetworks: vpnNetworks,
|
vpnNetworks: vpnNetworks,
|
||||||
MTU: c.GetInt("tun.mtu", DefaultMTU),
|
MaxMTU: mtu,
|
||||||
|
DefaultMTU: mtu,
|
||||||
l: l,
|
l: l,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create OpenBSD-specific route manager
|
||||||
|
t.routeManager = &tun{}
|
||||||
|
|
||||||
err = t.reload(c, true)
|
err = t.reload(c, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
tunDevice.Close()
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -109,221 +64,86 @@ func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, _ bool) (
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
l.WithField("name", actualName).Info("Created WireGuard TUN device")
|
||||||
|
|
||||||
return t, nil
|
return t, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) Close() error {
|
func (rm *tun) Activate(t *wgTun) error {
|
||||||
if t.f != nil {
|
name, err := t.tunDevice.Name()
|
||||||
if err := t.f.Close(); err != nil {
|
|
||||||
return fmt.Errorf("error closing tun file: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// t.f.Close should have handled it for us but let's be extra sure
|
|
||||||
_ = unix.Close(t.fd)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) Read(to []byte) (int, error) {
|
|
||||||
buf := make([]byte, len(to)+4)
|
|
||||||
|
|
||||||
n, err := t.f.Read(buf)
|
|
||||||
|
|
||||||
copy(to, buf[4:])
|
|
||||||
return n - 4, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write is only valid for single threaded use
|
|
||||||
func (t *tun) Write(from []byte) (int, error) {
|
|
||||||
buf := t.out
|
|
||||||
if cap(buf) < len(from)+4 {
|
|
||||||
buf = make([]byte, len(from)+4)
|
|
||||||
t.out = buf
|
|
||||||
}
|
|
||||||
buf = buf[:len(from)+4]
|
|
||||||
|
|
||||||
if len(from) == 0 {
|
|
||||||
return 0, syscall.EIO
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine the IP Family for the NULL L2 Header
|
|
||||||
ipVer := from[0] >> 4
|
|
||||||
if ipVer == 4 {
|
|
||||||
buf[3] = syscall.AF_INET
|
|
||||||
} else if ipVer == 6 {
|
|
||||||
buf[3] = syscall.AF_INET6
|
|
||||||
} else {
|
|
||||||
return 0, fmt.Errorf("unable to determine IP version from packet")
|
|
||||||
}
|
|
||||||
|
|
||||||
copy(buf[4:], from)
|
|
||||||
|
|
||||||
n, err := t.f.Write(buf)
|
|
||||||
return n - 4, err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) addIp(cidr netip.Prefix) error {
|
|
||||||
if cidr.Addr().Is4() {
|
|
||||||
var req ifreqAlias4
|
|
||||||
req.Name = t.deviceBytes()
|
|
||||||
req.Addr = unix.RawSockaddrInet4{
|
|
||||||
Len: unix.SizeofSockaddrInet4,
|
|
||||||
Family: unix.AF_INET,
|
|
||||||
Addr: cidr.Addr().As4(),
|
|
||||||
}
|
|
||||||
req.DstAddr = unix.RawSockaddrInet4{
|
|
||||||
Len: unix.SizeofSockaddrInet4,
|
|
||||||
Family: unix.AF_INET,
|
|
||||||
Addr: cidr.Addr().As4(),
|
|
||||||
}
|
|
||||||
req.MaskAddr = unix.RawSockaddrInet4{
|
|
||||||
Len: unix.SizeofSockaddrInet4,
|
|
||||||
Family: unix.AF_INET,
|
|
||||||
Addr: prefixToMask(cidr).As4(),
|
|
||||||
}
|
|
||||||
|
|
||||||
s, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, unix.IPPROTO_IP)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get device name: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the MTU
|
||||||
|
rm.SetMTU(t, t.MaxMTU)
|
||||||
|
|
||||||
|
// Add IP addresses
|
||||||
|
for _, network := range t.vpnNetworks {
|
||||||
|
if err := rm.addIP(t, name, network); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer syscall.Close(s)
|
|
||||||
|
|
||||||
if err := ioctl(uintptr(s), unix.SIOCAIFADDR, uintptr(unsafe.Pointer(&req))); err != nil {
|
|
||||||
return fmt.Errorf("failed to set tun address %s: %s", cidr.Addr(), err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
err = addRoute(cidr, t.vpnNetworks)
|
// Bring up the interface
|
||||||
if err != nil {
|
if err := runCommandBSD("ifconfig", name, "up"); err != nil {
|
||||||
return fmt.Errorf("failed to set route for vpn network %v: %w", cidr, err)
|
return fmt.Errorf("failed to bring up interface: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the routes
|
||||||
|
if err := rm.AddRoutes(t, false); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if cidr.Addr().Is6() {
|
func (rm *tun) SetMTU(t *wgTun, mtu int) {
|
||||||
var req ifreqAlias6
|
name, err := t.tunDevice.Name()
|
||||||
req.Name = t.deviceBytes()
|
|
||||||
req.Addr = unix.RawSockaddrInet6{
|
|
||||||
Len: unix.SizeofSockaddrInet6,
|
|
||||||
Family: unix.AF_INET6,
|
|
||||||
Addr: cidr.Addr().As16(),
|
|
||||||
}
|
|
||||||
req.PrefixMask = unix.RawSockaddrInet6{
|
|
||||||
Len: unix.SizeofSockaddrInet6,
|
|
||||||
Family: unix.AF_INET6,
|
|
||||||
Addr: prefixToMask(cidr).As16(),
|
|
||||||
}
|
|
||||||
req.Lifetime[0] = 0xffffffff
|
|
||||||
req.Lifetime[1] = 0xffffffff
|
|
||||||
|
|
||||||
s, err := unix.Socket(unix.AF_INET6, unix.SOCK_DGRAM, unix.IPPROTO_IP)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
t.l.WithError(err).Error("Failed to get device name for MTU set")
|
||||||
}
|
return
|
||||||
defer syscall.Close(s)
|
|
||||||
|
|
||||||
if err := ioctl(uintptr(s), SIOCAIFADDR_IN6, uintptr(unsafe.Pointer(&req))); err != nil {
|
|
||||||
return fmt.Errorf("failed to set tun address %s: %s", cidr.Addr().String(), err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := runCommandBSD("ifconfig", name, "mtu", strconv.Itoa(mtu)); err != nil {
|
||||||
|
t.l.WithError(err).Error("Failed to set tun mtu")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rm *tun) SetDefaultRoute(t *wgTun, cidr netip.Prefix) error {
|
||||||
|
// On OpenBSD, routes are set via ifconfig and route commands
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return fmt.Errorf("unknown address type %v", cidr)
|
func (rm *tun) AddRoutes(t *wgTun, logErrors bool) error {
|
||||||
}
|
name, err := t.tunDevice.Name()
|
||||||
|
|
||||||
func (t *tun) Activate() error {
|
|
||||||
err := t.doIoctlByName(unix.SIOCSIFMTU, uint32(t.MTU))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to set tun mtu: %w", err)
|
return fmt.Errorf("failed to get device name: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := range t.vpnNetworks {
|
|
||||||
err = t.addIp(t.vpnNetworks[i])
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return t.addRoutes(false)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) doIoctlByName(ctl uintptr, value uint32) error {
|
|
||||||
s, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, unix.IPPROTO_IP)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer syscall.Close(s)
|
|
||||||
|
|
||||||
ir := ifreq{Name: t.deviceBytes(), data: int(value)}
|
|
||||||
err = ioctl(uintptr(s), ctl, uintptr(unsafe.Pointer(&ir)))
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) reload(c *config.C, initial bool) error {
|
|
||||||
change, routes, err := getAllRoutesFromConfig(c, t.vpnNetworks, initial)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if !initial && !change {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
routeTree, err := makeRouteTree(t.l, routes, false)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Teach nebula how to handle the routes before establishing them in the system table
|
|
||||||
oldRoutes := t.Routes.Swap(&routes)
|
|
||||||
t.routeTree.Store(routeTree)
|
|
||||||
|
|
||||||
if !initial {
|
|
||||||
// Remove first, if the system removes a wanted route hopefully it will be re-added next
|
|
||||||
err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
|
|
||||||
if err != nil {
|
|
||||||
util.LogWithContextIfNeeded("Failed to remove routes", err, t.l)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure any routes we actually want are installed
|
|
||||||
err = t.addRoutes(true)
|
|
||||||
if err != nil {
|
|
||||||
// Catch any stray logs
|
|
||||||
util.LogWithContextIfNeeded("Failed to add routes", err, t.l)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) RoutesFor(ip netip.Addr) routing.Gateways {
|
|
||||||
r, _ := t.routeTree.Load().Lookup(ip)
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) Networks() []netip.Prefix {
|
|
||||||
return t.vpnNetworks
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) Name() string {
|
|
||||||
return t.Device
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
|
|
||||||
return nil, fmt.Errorf("TODO: multiqueue not implemented for openbsd")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tun) addRoutes(logErrors bool) error {
|
|
||||||
routes := *t.Routes.Load()
|
routes := *t.Routes.Load()
|
||||||
|
|
||||||
for _, r := range routes {
|
for _, r := range routes {
|
||||||
if len(r.Via) == 0 || !r.Install {
|
if !r.Install {
|
||||||
// We don't allow route MTUs so only install routes with a via
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
err := addRoute(r.Cidr, t.vpnNetworks)
|
// Add route using route command
|
||||||
|
args := []string{"add"}
|
||||||
|
|
||||||
|
if r.Cidr.Addr().Is6() {
|
||||||
|
args = append(args, "-inet6")
|
||||||
|
} else {
|
||||||
|
args = append(args, "-inet")
|
||||||
|
}
|
||||||
|
|
||||||
|
args = append(args, r.Cidr.String(), "-interface", name)
|
||||||
|
|
||||||
|
if r.Metric > 0 {
|
||||||
|
// OpenBSD doesn't support route metrics directly like Linux
|
||||||
|
t.l.WithField("route", r).Warn("Route metrics are not fully supported on OpenBSD")
|
||||||
|
}
|
||||||
|
|
||||||
|
err := runCommandBSD("route", args...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
retErr := util.NewContextualError("Failed to add route", map[string]any{"route": r}, err)
|
retErr := util.NewContextualError("Failed to add route", map[string]any{"route": r}, err)
|
||||||
if logErrors {
|
if logErrors {
|
||||||
@@ -339,131 +159,71 @@ func (t *tun) addRoutes(logErrors bool) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) removeRoutes(routes []Route) error {
|
func (rm *tun) RemoveRoutes(t *wgTun, routes []Route) {
|
||||||
|
name, err := t.tunDevice.Name()
|
||||||
|
if err != nil {
|
||||||
|
t.l.WithError(err).Error("Failed to get device name for route removal")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
for _, r := range routes {
|
for _, r := range routes {
|
||||||
if !r.Install {
|
if !r.Install {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
err := delRoute(r.Cidr, t.vpnNetworks)
|
args := []string{"delete"}
|
||||||
|
|
||||||
|
if r.Cidr.Addr().Is6() {
|
||||||
|
args = append(args, "-inet6")
|
||||||
|
} else {
|
||||||
|
args = append(args, "-inet")
|
||||||
|
}
|
||||||
|
|
||||||
|
args = append(args, r.Cidr.String(), "-interface", name)
|
||||||
|
|
||||||
|
err := runCommandBSD("route", args...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
|
t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
|
||||||
} else {
|
} else {
|
||||||
t.l.WithField("route", r).Info("Removed route")
|
t.l.WithField("route", r).Info("Removed route")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tun) deviceBytes() (o [16]byte) {
|
func (rm *tun) NewMultiQueueReader(t *wgTun) (io.ReadWriteCloser, error) {
|
||||||
for i, c := range t.Device {
|
// OpenBSD doesn't support multi-queue TUN devices in the same way as Linux
|
||||||
o[i] = byte(c)
|
// Return a reader that wraps the same device
|
||||||
}
|
return &wgTunReader{
|
||||||
return
|
parent: t,
|
||||||
|
tunDevice: t.tunDevice,
|
||||||
|
offset: 0,
|
||||||
|
l: t.l,
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func addRoute(prefix netip.Prefix, gateways []netip.Prefix) error {
|
func (rm *tun) addIP(t *wgTun, name string, network netip.Prefix) error {
|
||||||
sock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC)
|
addr := network.Addr()
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("unable to create AF_ROUTE socket: %v", err)
|
|
||||||
}
|
|
||||||
defer unix.Close(sock)
|
|
||||||
|
|
||||||
route := &netroute.RouteMessage{
|
if addr.Is4() {
|
||||||
Version: unix.RTM_VERSION,
|
// For IPv4: ifconfig tun0 10.0.0.1/24
|
||||||
Type: unix.RTM_ADD,
|
if err := runCommandBSD("ifconfig", name, network.String()); err != nil {
|
||||||
Flags: unix.RTF_UP | unix.RTF_GATEWAY,
|
return fmt.Errorf("failed to add IPv4 address: %w", err)
|
||||||
Seq: 1,
|
|
||||||
}
|
|
||||||
|
|
||||||
if prefix.Addr().Is4() {
|
|
||||||
gw, err := selectGateway(prefix, gateways)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
route.Addrs = []netroute.Addr{
|
|
||||||
unix.RTAX_DST: &netroute.Inet4Addr{IP: prefix.Masked().Addr().As4()},
|
|
||||||
unix.RTAX_NETMASK: &netroute.Inet4Addr{IP: prefixToMask(prefix).As4()},
|
|
||||||
unix.RTAX_GATEWAY: &netroute.Inet4Addr{IP: gw.Addr().As4()},
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
gw, err := selectGateway(prefix, gateways)
|
// For IPv6: ifconfig tun0 inet6 add 2001:db8::1/64
|
||||||
if err != nil {
|
if err := runCommandBSD("ifconfig", name, "inet6", "add", network.String()); err != nil {
|
||||||
return err
|
return fmt.Errorf("failed to add IPv6 address: %w", err)
|
||||||
}
|
}
|
||||||
route.Addrs = []netroute.Addr{
|
|
||||||
unix.RTAX_DST: &netroute.Inet6Addr{IP: prefix.Masked().Addr().As16()},
|
|
||||||
unix.RTAX_NETMASK: &netroute.Inet6Addr{IP: prefixToMask(prefix).As16()},
|
|
||||||
unix.RTAX_GATEWAY: &netroute.Inet6Addr{IP: gw.Addr().As16()},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := route.Marshal()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create route.RouteMessage: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = unix.Write(sock, data[:])
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, unix.EEXIST) {
|
|
||||||
// Try to do a change
|
|
||||||
route.Type = unix.RTM_CHANGE
|
|
||||||
data, err = route.Marshal()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create route.RouteMessage for change: %w", err)
|
|
||||||
}
|
|
||||||
_, err = unix.Write(sock, data[:])
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return fmt.Errorf("failed to write route.RouteMessage to socket: %w", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func delRoute(prefix netip.Prefix, gateways []netip.Prefix) error {
|
func runCommandBSD(name string, args ...string) error {
|
||||||
sock, err := unix.Socket(unix.AF_ROUTE, unix.SOCK_RAW, unix.AF_UNSPEC)
|
cmd := exec.Command(name, args...)
|
||||||
|
output, err := cmd.CombinedOutput()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to create AF_ROUTE socket: %v", err)
|
return fmt.Errorf("%s %s failed: %w\nOutput: %s", name, strings.Join(args, " "), err, string(output))
|
||||||
}
|
}
|
||||||
defer unix.Close(sock)
|
|
||||||
|
|
||||||
route := netroute.RouteMessage{
|
|
||||||
Version: unix.RTM_VERSION,
|
|
||||||
Type: unix.RTM_DELETE,
|
|
||||||
Seq: 1,
|
|
||||||
}
|
|
||||||
|
|
||||||
if prefix.Addr().Is4() {
|
|
||||||
gw, err := selectGateway(prefix, gateways)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
route.Addrs = []netroute.Addr{
|
|
||||||
unix.RTAX_DST: &netroute.Inet4Addr{IP: prefix.Masked().Addr().As4()},
|
|
||||||
unix.RTAX_NETMASK: &netroute.Inet4Addr{IP: prefixToMask(prefix).As4()},
|
|
||||||
unix.RTAX_GATEWAY: &netroute.Inet4Addr{IP: gw.Addr().As4()},
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
gw, err := selectGateway(prefix, gateways)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
route.Addrs = []netroute.Addr{
|
|
||||||
unix.RTAX_DST: &netroute.Inet6Addr{IP: prefix.Masked().Addr().As16()},
|
|
||||||
unix.RTAX_NETMASK: &netroute.Inet6Addr{IP: prefixToMask(prefix).As16()},
|
|
||||||
unix.RTAX_GATEWAY: &netroute.Inet6Addr{IP: gw.Addr().As16()},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := route.Marshal()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create route.RouteMessage: %w", err)
|
|
||||||
}
|
|
||||||
_, err = unix.Write(sock, data[:])
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to write route.RouteMessage to socket: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
242
overlay/tun_wg.go
Normal file
242
overlay/tun_wg.go
Normal file
@@ -0,0 +1,242 @@
|
|||||||
|
//go:build !android && !netbsd && !e2e_testing
|
||||||
|
// +build !android,!netbsd,!e2e_testing
|
||||||
|
|
||||||
|
package overlay
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/netip"
|
||||||
|
"sync/atomic"
|
||||||
|
|
||||||
|
"github.com/gaissmai/bart"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"github.com/slackhq/nebula/config"
|
||||||
|
"github.com/slackhq/nebula/routing"
|
||||||
|
"github.com/slackhq/nebula/util"
|
||||||
|
wgtun "golang.zx2c4.com/wireguard/tun"
|
||||||
|
)
|
||||||
|
|
||||||
|
// wgTun wraps a WireGuard TUN device and implements the overlay.Device interface
|
||||||
|
type wgTun struct {
|
||||||
|
tunDevice wgtun.Device
|
||||||
|
vpnNetworks []netip.Prefix
|
||||||
|
MaxMTU int
|
||||||
|
DefaultMTU int
|
||||||
|
|
||||||
|
Routes atomic.Pointer[[]Route]
|
||||||
|
routeTree atomic.Pointer[bart.Table[routing.Gateways]]
|
||||||
|
routeChan chan struct{}
|
||||||
|
|
||||||
|
// Platform-specific route management
|
||||||
|
routeManager *tun
|
||||||
|
|
||||||
|
l *logrus.Logger
|
||||||
|
}
|
||||||
|
|
||||||
|
// BatchReader interface for readers that support vectorized I/O
|
||||||
|
type BatchReader interface {
|
||||||
|
BatchRead(buffers [][]byte, sizes []int) (int, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// BatchWriter interface for writers that support vectorized I/O
|
||||||
|
type BatchWriter interface {
|
||||||
|
BatchWrite(packets [][]byte) (int, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// wgTunReader wraps a single TUN queue for multi-queue support
|
||||||
|
type wgTunReader struct {
|
||||||
|
parent *wgTun
|
||||||
|
tunDevice wgtun.Device
|
||||||
|
offset int
|
||||||
|
l *logrus.Logger
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *wgTun) Networks() []netip.Prefix {
|
||||||
|
return t.vpnNetworks
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *wgTun) Name() string {
|
||||||
|
name, err := t.tunDevice.Name()
|
||||||
|
if err != nil {
|
||||||
|
t.l.WithError(err).Error("Failed to get TUN device name")
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
return name
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *wgTun) RoutesFor(ip netip.Addr) routing.Gateways {
|
||||||
|
r, _ := t.routeTree.Load().Lookup(ip)
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *wgTun) Activate() error {
|
||||||
|
if t.routeManager == nil {
|
||||||
|
return fmt.Errorf("route manager not initialized")
|
||||||
|
}
|
||||||
|
return t.routeManager.Activate(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read implements single-packet read for backward compatibility
|
||||||
|
func (t *wgTun) Read(b []byte) (int, error) {
|
||||||
|
bufs := [][]byte{b}
|
||||||
|
sizes := []int{0}
|
||||||
|
n, err := t.tunDevice.Read(bufs, sizes, 0)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if n == 0 {
|
||||||
|
return 0, io.ErrNoProgress
|
||||||
|
}
|
||||||
|
return sizes[0], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write implements single-packet write for backward compatibility
|
||||||
|
func (t *wgTun) Write(b []byte) (int, error) {
|
||||||
|
bufs := [][]byte{b}
|
||||||
|
offset := 0
|
||||||
|
|
||||||
|
// WireGuard TUN expects the packet data to start at offset 0
|
||||||
|
n, err := t.tunDevice.Write(bufs, offset)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if n == 0 {
|
||||||
|
return 0, io.ErrShortWrite
|
||||||
|
}
|
||||||
|
return len(b), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *wgTun) Close() error {
|
||||||
|
if t.routeChan != nil {
|
||||||
|
close(t.routeChan)
|
||||||
|
}
|
||||||
|
|
||||||
|
if t.tunDevice != nil {
|
||||||
|
return t.tunDevice.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *wgTun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
|
||||||
|
// For WireGuard TUN, we need to create separate TUN device instances for multi-queue
|
||||||
|
// The platform-specific implementation will handle this
|
||||||
|
if t.routeManager == nil {
|
||||||
|
return nil, fmt.Errorf("route manager not initialized for multi-queue reader")
|
||||||
|
}
|
||||||
|
|
||||||
|
return t.routeManager.NewMultiQueueReader(t)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *wgTun) reload(c *config.C, initial bool) error {
|
||||||
|
routeChange, routes, err := getAllRoutesFromConfig(c, t.vpnNetworks, initial)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !initial && !routeChange && !c.HasChanged("tun.mtu") {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
routeTree, err := makeRouteTree(t.l, routes, true)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
oldDefaultMTU := t.DefaultMTU
|
||||||
|
oldMaxMTU := t.MaxMTU
|
||||||
|
newDefaultMTU := c.GetInt("tun.mtu", DefaultMTU)
|
||||||
|
newMaxMTU := newDefaultMTU
|
||||||
|
for i, r := range routes {
|
||||||
|
if r.MTU == 0 {
|
||||||
|
routes[i].MTU = newDefaultMTU
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.MTU > t.MaxMTU {
|
||||||
|
newMaxMTU = r.MTU
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t.MaxMTU = newMaxMTU
|
||||||
|
t.DefaultMTU = newDefaultMTU
|
||||||
|
|
||||||
|
// Teach nebula how to handle the routes before establishing them in the system table
|
||||||
|
oldRoutes := t.Routes.Swap(&routes)
|
||||||
|
t.routeTree.Store(routeTree)
|
||||||
|
|
||||||
|
if !initial && t.routeManager != nil {
|
||||||
|
if oldMaxMTU != newMaxMTU {
|
||||||
|
t.routeManager.SetMTU(t, t.MaxMTU)
|
||||||
|
t.l.Infof("Set max MTU to %v was %v", t.MaxMTU, oldMaxMTU)
|
||||||
|
}
|
||||||
|
|
||||||
|
if oldDefaultMTU != newDefaultMTU {
|
||||||
|
for i := range t.vpnNetworks {
|
||||||
|
err := t.routeManager.SetDefaultRoute(t, t.vpnNetworks[i])
|
||||||
|
if err != nil {
|
||||||
|
t.l.Warn(err)
|
||||||
|
} else {
|
||||||
|
t.l.Infof("Set default MTU to %v was %v", t.DefaultMTU, oldDefaultMTU)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove first, if the system removes a wanted route hopefully it will be re-added next
|
||||||
|
t.routeManager.RemoveRoutes(t, findRemovedRoutes(routes, *oldRoutes))
|
||||||
|
|
||||||
|
// Ensure any routes we actually want are installed
|
||||||
|
err = t.routeManager.AddRoutes(t, true)
|
||||||
|
if err != nil {
|
||||||
|
// This should never be called since AddRoutes should log its own errors in a reload condition
|
||||||
|
util.LogWithContextIfNeeded("Failed to refresh routes", err, t.l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// BatchRead reads multiple packets from the TUN device using vectorized I/O
|
||||||
|
// The caller provides buffers and sizes slices, and this function returns the number of packets read.
|
||||||
|
func (r *wgTunReader) BatchRead(buffers [][]byte, sizes []int) (int, error) {
|
||||||
|
return r.tunDevice.Read(buffers, sizes, r.offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read implements io.Reader for wgTunReader (single packet for compatibility)
|
||||||
|
func (r *wgTunReader) Read(b []byte) (int, error) {
|
||||||
|
bufs := [][]byte{b}
|
||||||
|
sizes := []int{0}
|
||||||
|
n, err := r.tunDevice.Read(bufs, sizes, r.offset)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if n == 0 {
|
||||||
|
return 0, io.ErrNoProgress
|
||||||
|
}
|
||||||
|
return sizes[0], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write implements io.Writer for wgTunReader
|
||||||
|
func (r *wgTunReader) Write(b []byte) (int, error) {
|
||||||
|
bufs := [][]byte{b}
|
||||||
|
n, err := r.tunDevice.Write(bufs, r.offset)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if n == 0 {
|
||||||
|
return 0, io.ErrShortWrite
|
||||||
|
}
|
||||||
|
return len(b), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// BatchWrite writes multiple packets to the TUN device using vectorized I/O
|
||||||
|
func (r *wgTunReader) BatchWrite(packets [][]byte) (int, error) {
|
||||||
|
return r.tunDevice.Write(packets, r.offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *wgTunReader) Close() error {
|
||||||
|
if r.tunDevice != nil {
|
||||||
|
return r.tunDevice.Close()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -1,84 +1,77 @@
|
|||||||
//go:build !e2e_testing
|
//go:build windows && !e2e_testing
|
||||||
// +build !e2e_testing
|
// +build windows,!e2e_testing
|
||||||
|
|
||||||
package overlay
|
package overlay
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto"
|
"crypto"
|
||||||
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"runtime"
|
|
||||||
"sync/atomic"
|
|
||||||
"syscall"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"github.com/gaissmai/bart"
|
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/slackhq/nebula/config"
|
"github.com/slackhq/nebula/config"
|
||||||
"github.com/slackhq/nebula/routing"
|
|
||||||
"github.com/slackhq/nebula/util"
|
"github.com/slackhq/nebula/util"
|
||||||
"github.com/slackhq/nebula/wintun"
|
|
||||||
"golang.org/x/sys/windows"
|
"golang.org/x/sys/windows"
|
||||||
|
wgtun "golang.zx2c4.com/wireguard/tun"
|
||||||
"golang.zx2c4.com/wireguard/windows/tunnel/winipcfg"
|
"golang.zx2c4.com/wireguard/windows/tunnel/winipcfg"
|
||||||
)
|
)
|
||||||
|
|
||||||
const tunGUIDLabel = "Fixed Nebula Windows GUID v1"
|
const tunGUIDLabel = "Fixed Nebula Windows GUID v1"
|
||||||
|
|
||||||
type winTun struct {
|
type tun struct {
|
||||||
Device string
|
luid winipcfg.LUID
|
||||||
vpnNetworks []netip.Prefix
|
|
||||||
MTU int
|
|
||||||
Routes atomic.Pointer[[]Route]
|
|
||||||
routeTree atomic.Pointer[bart.Table[routing.Gateways]]
|
|
||||||
l *logrus.Logger
|
|
||||||
|
|
||||||
tun *wintun.NativeTun
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ []netip.Prefix) (Device, error) {
|
func newTunFromFd(_ *config.C, _ *logrus.Logger, _ int, _ []netip.Prefix) (*wgTun, error) {
|
||||||
return nil, fmt.Errorf("newTunFromFd not supported in Windows")
|
return nil, fmt.Errorf("newTunFromFd not supported in Windows")
|
||||||
}
|
}
|
||||||
|
|
||||||
func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, _ bool) (*winTun, error) {
|
func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, _ bool) (*wgTun, error) {
|
||||||
err := checkWinTunExists()
|
deviceName := c.GetString("tun.dev", "Nebula")
|
||||||
|
mtu := c.GetInt("tun.mtu", DefaultMTU)
|
||||||
|
|
||||||
|
// Create WireGuard TUN device
|
||||||
|
tunDevice, err := wgtun.CreateTUN(deviceName, mtu)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("can not load the wintun driver: %w", err)
|
return nil, fmt.Errorf("failed to create TUN device: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
deviceName := c.GetString("tun.dev", "")
|
// Get the actual device name
|
||||||
guid, err := generateGUIDByDeviceName(deviceName)
|
actualName, err := tunDevice.Name()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("generate GUID failed: %w", err)
|
tunDevice.Close()
|
||||||
|
return nil, fmt.Errorf("failed to get TUN device name: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
t := &winTun{
|
t := &wgTun{
|
||||||
Device: deviceName,
|
tunDevice: tunDevice,
|
||||||
vpnNetworks: vpnNetworks,
|
vpnNetworks: vpnNetworks,
|
||||||
MTU: c.GetInt("tun.mtu", DefaultMTU),
|
MaxMTU: mtu,
|
||||||
|
DefaultMTU: mtu,
|
||||||
l: l,
|
l: l,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create Windows-specific route manager
|
||||||
|
rm := &tun{}
|
||||||
|
|
||||||
|
// Get LUID from the TUN device
|
||||||
|
// The WireGuard TUN device on Windows should provide a LUID() method
|
||||||
|
if nativeTun, ok := tunDevice.(interface{ LUID() uint64 }); ok {
|
||||||
|
rm.luid = winipcfg.LUID(nativeTun.LUID())
|
||||||
|
} else {
|
||||||
|
tunDevice.Close()
|
||||||
|
return nil, fmt.Errorf("failed to get LUID from TUN device")
|
||||||
|
}
|
||||||
|
t.routeManager = rm
|
||||||
|
|
||||||
err = t.reload(c, true)
|
err = t.reload(c, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
tunDevice.Close()
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var tunDevice wintun.Device
|
|
||||||
tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, t.MTU)
|
|
||||||
if err != nil {
|
|
||||||
// Windows 10 has an issue with unclean shutdowns not fully cleaning up the wintun device.
|
|
||||||
// Trying a second time resolves the issue.
|
|
||||||
l.WithError(err).Debug("Failed to create wintun device, retrying")
|
|
||||||
tunDevice, err = wintun.CreateTUNWithRequestedGUID(deviceName, guid, t.MTU)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("create TUN device failed: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
t.tun = tunDevice.(*wintun.NativeTun)
|
|
||||||
|
|
||||||
c.RegisterReloadCallback(func(c *config.C) {
|
c.RegisterReloadCallback(func(c *config.C) {
|
||||||
err := t.reload(c, false)
|
err := t.reload(c, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -86,206 +79,140 @@ func newTun(c *config.C, l *logrus.Logger, vpnNetworks []netip.Prefix, _ bool) (
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
l.WithField("name", actualName).Info("Created WireGuard TUN device")
|
||||||
|
|
||||||
return t, nil
|
return t, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *winTun) reload(c *config.C, initial bool) error {
|
func (rm *tun) Activate(t *wgTun) error {
|
||||||
change, routes, err := getAllRoutesFromConfig(c, t.vpnNetworks, initial)
|
// Set MTU
|
||||||
|
err := rm.setMTU(t, t.MaxMTU)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to set MTU: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add IP addresses
|
||||||
|
for _, network := range t.vpnNetworks {
|
||||||
|
if err := rm.addIP(t, network); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if !initial && !change {
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
routeTree, err := makeRouteTree(t.l, routes, false)
|
// Add routes
|
||||||
if err != nil {
|
if err := rm.AddRoutes(t, false); err != nil {
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Teach nebula how to handle the routes before establishing them in the system table
|
|
||||||
oldRoutes := t.Routes.Swap(&routes)
|
|
||||||
t.routeTree.Store(routeTree)
|
|
||||||
|
|
||||||
if !initial {
|
|
||||||
// Remove first, if the system removes a wanted route hopefully it will be re-added next
|
|
||||||
err := t.removeRoutes(findRemovedRoutes(routes, *oldRoutes))
|
|
||||||
if err != nil {
|
|
||||||
util.LogWithContextIfNeeded("Failed to remove routes", err, t.l)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure any routes we actually want are installed
|
|
||||||
err = t.addRoutes(true)
|
|
||||||
if err != nil {
|
|
||||||
// Catch any stray logs
|
|
||||||
util.LogWithContextIfNeeded("Failed to add routes", err, t.l)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *winTun) Activate() error {
|
|
||||||
luid := winipcfg.LUID(t.tun.LUID())
|
|
||||||
|
|
||||||
err := luid.SetIPAddresses(t.vpnNetworks)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to set address: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = t.addRoutes(false)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *winTun) addRoutes(logErrors bool) error {
|
func (rm *tun) SetMTU(t *wgTun, mtu int) {
|
||||||
luid := winipcfg.LUID(t.tun.LUID())
|
if err := rm.setMTU(t, mtu); err != nil {
|
||||||
|
t.l.WithError(err).Error("Failed to set MTU")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rm *tun) setMTU(t *wgTun, mtu int) error {
|
||||||
|
// Set MTU using winipcfg
|
||||||
|
// Note: MTU setting on Windows TUN devices may be handled by the driver
|
||||||
|
// For now, we'll skip explicit MTU setting as the WireGuard TUN handles it
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rm *tun) SetDefaultRoute(t *wgTun, cidr netip.Prefix) error {
|
||||||
|
// On Windows, routes are managed differently
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rm *tun) AddRoutes(t *wgTun, logErrors bool) error {
|
||||||
routes := *t.Routes.Load()
|
routes := *t.Routes.Load()
|
||||||
foundDefault4 := false
|
|
||||||
|
|
||||||
for _, r := range routes {
|
for _, r := range routes {
|
||||||
if len(r.Via) == 0 || !r.Install {
|
if !r.Install {
|
||||||
// We don't allow route MTUs so only install routes with a via
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add our unsafe route
|
if r.MTU > 0 {
|
||||||
// Windows does not support multipath routes natively, so we install only a single route.
|
// Windows route MTU is not directly supported
|
||||||
// This is not a problem as traffic will always be sent to Nebula which handles the multipath routing internally.
|
t.l.WithField("route", r).Debug("Route MTU is not supported on Windows")
|
||||||
// In effect this provides multipath routing support to windows supporting loadbalancing and redundancy.
|
}
|
||||||
err := luid.AddRoute(r.Cidr, r.Via[0].Addr(), uint32(r.Metric))
|
|
||||||
|
// Use winipcfg to add the route
|
||||||
|
// The rm.luid should have the AddRoute method from winipcfg
|
||||||
|
if len(r.Via) == 0 {
|
||||||
|
t.l.WithField("route", r).Warn("Route has no via address, skipping")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
err := rm.luid.AddRoute(r.Cidr, r.Via[0].Addr(), uint32(r.Metric))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
retErr := util.NewContextualError("Failed to add route", map[string]any{"route": r}, err)
|
retErr := util.NewContextualError("Failed to add route", map[string]any{"route": r}, err)
|
||||||
if logErrors {
|
if logErrors {
|
||||||
retErr.Log(t.l)
|
retErr.Log(t.l)
|
||||||
continue
|
|
||||||
} else {
|
} else {
|
||||||
return retErr
|
return retErr
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
t.l.WithField("route", r).Info("Added route")
|
t.l.WithField("route", r).Info("Added route")
|
||||||
}
|
}
|
||||||
|
|
||||||
if !foundDefault4 {
|
|
||||||
if r.Cidr.Bits() == 0 && r.Cidr.Addr().BitLen() == 32 {
|
|
||||||
foundDefault4 = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ipif, err := luid.IPInterface(windows.AF_INET)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to get ip interface: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
ipif.NLMTU = uint32(t.MTU)
|
|
||||||
if foundDefault4 {
|
|
||||||
ipif.UseAutomaticMetric = false
|
|
||||||
ipif.Metric = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := ipif.Set(); err != nil {
|
|
||||||
return fmt.Errorf("failed to set ip interface: %w", err)
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *winTun) removeRoutes(routes []Route) error {
|
func (rm *tun) RemoveRoutes(t *wgTun, routes []Route) {
|
||||||
luid := winipcfg.LUID(t.tun.LUID())
|
|
||||||
|
|
||||||
for _, r := range routes {
|
for _, r := range routes {
|
||||||
if !r.Install {
|
if !r.Install {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// See comment on luid.AddRoute
|
if len(r.Via) == 0 {
|
||||||
err := luid.DeleteRoute(r.Cidr, r.Via[0].Addr())
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
err := rm.luid.DeleteRoute(r.Cidr, r.Via[0].Addr())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
|
t.l.WithError(err).WithField("route", r).Error("Failed to remove route")
|
||||||
} else {
|
} else {
|
||||||
t.l.WithField("route", r).Info("Removed route")
|
t.l.WithField("route", r).Info("Removed route")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rm *tun) NewMultiQueueReader(t *wgTun) (io.ReadWriteCloser, error) {
|
||||||
|
// Windows doesn't support multi-queue TUN devices
|
||||||
|
// Return a reader that wraps the same device
|
||||||
|
return &wgTunReader{
|
||||||
|
parent: t,
|
||||||
|
tunDevice: t.tunDevice,
|
||||||
|
offset: 0,
|
||||||
|
l: t.l,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (rm *tun) addIP(t *wgTun, network netip.Prefix) error {
|
||||||
|
// Add IP address using winipcfg
|
||||||
|
// SetIPAddresses expects a slice of prefixes
|
||||||
|
err := rm.luid.SetIPAddresses([]netip.Prefix{network})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to add IP address %s: %w", network, err)
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *winTun) RoutesFor(ip netip.Addr) routing.Gateways {
|
// generateGUIDByDeviceName generates a GUID based on the device name
|
||||||
r, _ := t.routeTree.Load().Lookup(ip)
|
func generateGUIDByDeviceName(deviceName string) (*windows.GUID, error) {
|
||||||
return r
|
// Hash the device name to create a deterministic GUID
|
||||||
|
h := crypto.SHA256.New()
|
||||||
|
h.Write([]byte(tunGUIDLabel))
|
||||||
|
h.Write([]byte(deviceName))
|
||||||
|
sum := h.Sum(nil)
|
||||||
|
|
||||||
|
guid := &windows.GUID{
|
||||||
|
Data1: binary.LittleEndian.Uint32(sum[0:4]),
|
||||||
|
Data2: binary.LittleEndian.Uint16(sum[4:6]),
|
||||||
|
Data3: binary.LittleEndian.Uint16(sum[6:8]),
|
||||||
}
|
}
|
||||||
|
copy(guid.Data4[:], sum[8:16])
|
||||||
|
|
||||||
func (t *winTun) Networks() []netip.Prefix {
|
return guid, nil
|
||||||
return t.vpnNetworks
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *winTun) Name() string {
|
|
||||||
return t.Device
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *winTun) Read(b []byte) (int, error) {
|
|
||||||
return t.tun.Read(b, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *winTun) Write(b []byte) (int, error) {
|
|
||||||
return t.tun.Write(b, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *winTun) NewMultiQueueReader() (io.ReadWriteCloser, error) {
|
|
||||||
return nil, fmt.Errorf("TODO: multiqueue not implemented for windows")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *winTun) Close() error {
|
|
||||||
// It seems that the Windows networking stack doesn't like it when we destroy interfaces that have active routes,
|
|
||||||
// so to be certain, just remove everything before destroying.
|
|
||||||
luid := winipcfg.LUID(t.tun.LUID())
|
|
||||||
_ = luid.FlushRoutes(windows.AF_INET)
|
|
||||||
_ = luid.FlushIPAddresses(windows.AF_INET)
|
|
||||||
|
|
||||||
_ = luid.FlushRoutes(windows.AF_INET6)
|
|
||||||
_ = luid.FlushIPAddresses(windows.AF_INET6)
|
|
||||||
|
|
||||||
_ = luid.FlushDNS(windows.AF_INET)
|
|
||||||
_ = luid.FlushDNS(windows.AF_INET6)
|
|
||||||
|
|
||||||
return t.tun.Close()
|
|
||||||
}
|
|
||||||
|
|
||||||
func generateGUIDByDeviceName(name string) (*windows.GUID, error) {
|
|
||||||
// GUID is 128 bit
|
|
||||||
hash := crypto.MD5.New()
|
|
||||||
|
|
||||||
_, err := hash.Write([]byte(tunGUIDLabel))
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = hash.Write([]byte(name))
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
sum := hash.Sum(nil)
|
|
||||||
|
|
||||||
return (*windows.GUID)(unsafe.Pointer(&sum[0])), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func checkWinTunExists() error {
|
|
||||||
myPath, err := os.Executable()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
arch := runtime.GOARCH
|
|
||||||
switch arch {
|
|
||||||
case "386":
|
|
||||||
//NOTE: wintun bundles 386 as x86
|
|
||||||
arch = "x86"
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = syscall.LoadDLL(filepath.Join(filepath.Dir(myPath), "dist", "windows", "wintun", "bin", arch, "wintun.dll"))
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,220 +0,0 @@
|
|||||||
//go:build linux && !android && !e2e_testing
|
|
||||||
|
|
||||||
package overlay
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"sync"
|
|
||||||
|
|
||||||
wgtun "github.com/slackhq/nebula/wgstack/tun"
|
|
||||||
)
|
|
||||||
|
|
||||||
type wireguardTunIO struct {
|
|
||||||
dev wgtun.Device
|
|
||||||
mtu int
|
|
||||||
batchSize int
|
|
||||||
|
|
||||||
readMu sync.Mutex
|
|
||||||
readBuffers [][]byte
|
|
||||||
readLens []int
|
|
||||||
legacyBuf []byte
|
|
||||||
|
|
||||||
writeMu sync.Mutex
|
|
||||||
writeBuf []byte
|
|
||||||
writeWrap [][]byte
|
|
||||||
writeBuffers [][]byte
|
|
||||||
}
|
|
||||||
|
|
||||||
func newWireguardTunIO(dev wgtun.Device, mtu int) *wireguardTunIO {
|
|
||||||
batch := dev.BatchSize()
|
|
||||||
if batch <= 0 {
|
|
||||||
batch = 1
|
|
||||||
}
|
|
||||||
if mtu <= 0 {
|
|
||||||
mtu = DefaultMTU
|
|
||||||
}
|
|
||||||
return &wireguardTunIO{
|
|
||||||
dev: dev,
|
|
||||||
mtu: mtu,
|
|
||||||
batchSize: batch,
|
|
||||||
readLens: make([]int, batch),
|
|
||||||
legacyBuf: make([]byte, wgtun.VirtioNetHdrLen+mtu),
|
|
||||||
writeBuf: make([]byte, wgtun.VirtioNetHdrLen+mtu),
|
|
||||||
writeWrap: make([][]byte, 1),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *wireguardTunIO) Read(p []byte) (int, error) {
|
|
||||||
w.readMu.Lock()
|
|
||||||
defer w.readMu.Unlock()
|
|
||||||
|
|
||||||
bufs := w.readBuffers
|
|
||||||
if len(bufs) == 0 {
|
|
||||||
bufs = [][]byte{w.legacyBuf}
|
|
||||||
w.readBuffers = bufs
|
|
||||||
}
|
|
||||||
n, err := w.dev.Read(bufs[:1], w.readLens[:1], wgtun.VirtioNetHdrLen)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
if n == 0 {
|
|
||||||
return 0, nil
|
|
||||||
}
|
|
||||||
length := w.readLens[0]
|
|
||||||
copy(p, w.legacyBuf[wgtun.VirtioNetHdrLen:wgtun.VirtioNetHdrLen+length])
|
|
||||||
return length, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *wireguardTunIO) Write(p []byte) (int, error) {
|
|
||||||
if len(p) > w.mtu {
|
|
||||||
return 0, fmt.Errorf("wireguard tun: payload exceeds MTU (%d > %d)", len(p), w.mtu)
|
|
||||||
}
|
|
||||||
w.writeMu.Lock()
|
|
||||||
defer w.writeMu.Unlock()
|
|
||||||
buf := w.writeBuf[:wgtun.VirtioNetHdrLen+len(p)]
|
|
||||||
for i := 0; i < wgtun.VirtioNetHdrLen; i++ {
|
|
||||||
buf[i] = 0
|
|
||||||
}
|
|
||||||
copy(buf[wgtun.VirtioNetHdrLen:], p)
|
|
||||||
w.writeWrap[0] = buf
|
|
||||||
n, err := w.dev.Write(w.writeWrap, wgtun.VirtioNetHdrLen)
|
|
||||||
if err != nil {
|
|
||||||
return n, err
|
|
||||||
}
|
|
||||||
return len(p), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *wireguardTunIO) ReadIntoBatch(pool *PacketPool) ([]*Packet, error) {
|
|
||||||
if pool == nil {
|
|
||||||
return nil, fmt.Errorf("wireguard tun: packet pool is nil")
|
|
||||||
}
|
|
||||||
|
|
||||||
w.readMu.Lock()
|
|
||||||
defer w.readMu.Unlock()
|
|
||||||
|
|
||||||
if len(w.readBuffers) < w.batchSize {
|
|
||||||
w.readBuffers = make([][]byte, w.batchSize)
|
|
||||||
}
|
|
||||||
if len(w.readLens) < w.batchSize {
|
|
||||||
w.readLens = make([]int, w.batchSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
packets := make([]*Packet, w.batchSize)
|
|
||||||
requiredHeadroom := w.BatchHeadroom()
|
|
||||||
requiredPayload := w.BatchPayloadCap()
|
|
||||||
headroom := 0
|
|
||||||
for i := 0; i < w.batchSize; i++ {
|
|
||||||
pkt := pool.Get()
|
|
||||||
if pkt == nil {
|
|
||||||
releasePackets(packets[:i])
|
|
||||||
return nil, fmt.Errorf("wireguard tun: packet pool returned nil packet")
|
|
||||||
}
|
|
||||||
if pkt.Capacity() < requiredPayload {
|
|
||||||
pkt.Release()
|
|
||||||
releasePackets(packets[:i])
|
|
||||||
return nil, fmt.Errorf("wireguard tun: packet capacity %d below required %d", pkt.Capacity(), requiredPayload)
|
|
||||||
}
|
|
||||||
if i == 0 {
|
|
||||||
headroom = pkt.Offset
|
|
||||||
if headroom < requiredHeadroom {
|
|
||||||
pkt.Release()
|
|
||||||
releasePackets(packets[:i])
|
|
||||||
return nil, fmt.Errorf("wireguard tun: packet headroom %d below virtio requirement %d", headroom, requiredHeadroom)
|
|
||||||
}
|
|
||||||
} else if pkt.Offset != headroom {
|
|
||||||
pkt.Release()
|
|
||||||
releasePackets(packets[:i])
|
|
||||||
return nil, fmt.Errorf("wireguard tun: inconsistent packet headroom (%d != %d)", pkt.Offset, headroom)
|
|
||||||
}
|
|
||||||
packets[i] = pkt
|
|
||||||
w.readBuffers[i] = pkt.Buf
|
|
||||||
}
|
|
||||||
|
|
||||||
n, err := w.dev.Read(w.readBuffers[:w.batchSize], w.readLens[:w.batchSize], headroom)
|
|
||||||
if err != nil {
|
|
||||||
releasePackets(packets)
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if n == 0 {
|
|
||||||
releasePackets(packets)
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
packets[i].Len = w.readLens[i]
|
|
||||||
}
|
|
||||||
for i := n; i < w.batchSize; i++ {
|
|
||||||
packets[i].Release()
|
|
||||||
packets[i] = nil
|
|
||||||
}
|
|
||||||
return packets[:n], nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *wireguardTunIO) WriteBatch(packets []*Packet) (int, error) {
|
|
||||||
if len(packets) == 0 {
|
|
||||||
return 0, nil
|
|
||||||
}
|
|
||||||
requiredHeadroom := w.BatchHeadroom()
|
|
||||||
offset := packets[0].Offset
|
|
||||||
if offset < requiredHeadroom {
|
|
||||||
releasePackets(packets)
|
|
||||||
return 0, fmt.Errorf("wireguard tun: packet offset %d smaller than required headroom %d", offset, requiredHeadroom)
|
|
||||||
}
|
|
||||||
for _, pkt := range packets {
|
|
||||||
if pkt == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if pkt.Offset != offset {
|
|
||||||
releasePackets(packets)
|
|
||||||
return 0, fmt.Errorf("wireguard tun: mixed packet offsets not supported")
|
|
||||||
}
|
|
||||||
limit := pkt.Offset + pkt.Len
|
|
||||||
if limit > len(pkt.Buf) {
|
|
||||||
releasePackets(packets)
|
|
||||||
return 0, fmt.Errorf("wireguard tun: packet length %d exceeds buffer capacity %d", pkt.Len, len(pkt.Buf)-pkt.Offset)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
w.writeMu.Lock()
|
|
||||||
defer w.writeMu.Unlock()
|
|
||||||
|
|
||||||
if len(w.writeBuffers) < len(packets) {
|
|
||||||
w.writeBuffers = make([][]byte, len(packets))
|
|
||||||
}
|
|
||||||
for i, pkt := range packets {
|
|
||||||
if pkt == nil {
|
|
||||||
w.writeBuffers[i] = nil
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
limit := pkt.Offset + pkt.Len
|
|
||||||
w.writeBuffers[i] = pkt.Buf[:limit]
|
|
||||||
}
|
|
||||||
n, err := w.dev.Write(w.writeBuffers[:len(packets)], offset)
|
|
||||||
if err != nil {
|
|
||||||
return n, err
|
|
||||||
}
|
|
||||||
releasePackets(packets)
|
|
||||||
return n, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *wireguardTunIO) BatchHeadroom() int {
|
|
||||||
return wgtun.VirtioNetHdrLen
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *wireguardTunIO) BatchPayloadCap() int {
|
|
||||||
return w.mtu
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *wireguardTunIO) BatchSize() int {
|
|
||||||
return w.batchSize
|
|
||||||
}
|
|
||||||
|
|
||||||
func (w *wireguardTunIO) Close() error {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func releasePackets(pkts []*Packet) {
|
|
||||||
for _, pkt := range pkts {
|
|
||||||
if pkt != nil {
|
|
||||||
pkt.Release()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
39
pki.go
39
pki.go
@@ -100,36 +100,41 @@ func (p *PKI) reloadCerts(c *config.C, initial bool) *util.ContextualError {
|
|||||||
currentState := p.cs.Load()
|
currentState := p.cs.Load()
|
||||||
if newState.v1Cert != nil {
|
if newState.v1Cert != nil {
|
||||||
if currentState.v1Cert == nil {
|
if currentState.v1Cert == nil {
|
||||||
//adding certs is fine, actually. Networks-in-common confirmed in newCertState().
|
return util.NewContextualError("v1 certificate was added, restart required", nil, err)
|
||||||
} else {
|
}
|
||||||
|
|
||||||
// did IP in cert change? if so, don't set
|
// did IP in cert change? if so, don't set
|
||||||
if !slices.Equal(currentState.v1Cert.Networks(), newState.v1Cert.Networks()) {
|
if !slices.Equal(currentState.v1Cert.Networks(), newState.v1Cert.Networks()) {
|
||||||
return util.NewContextualError(
|
return util.NewContextualError(
|
||||||
"Networks in new cert was different from old",
|
"Networks in new cert was different from old",
|
||||||
m{"new_networks": newState.v1Cert.Networks(), "old_networks": currentState.v1Cert.Networks(), "cert_version": cert.Version1},
|
m{"new_networks": newState.v1Cert.Networks(), "old_networks": currentState.v1Cert.Networks()},
|
||||||
nil,
|
nil,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
if currentState.v1Cert.Curve() != newState.v1Cert.Curve() {
|
if currentState.v1Cert.Curve() != newState.v1Cert.Curve() {
|
||||||
return util.NewContextualError(
|
return util.NewContextualError(
|
||||||
"Curve in new v1 cert was different from old",
|
"Curve in new cert was different from old",
|
||||||
m{"new_curve": newState.v1Cert.Curve(), "old_curve": currentState.v1Cert.Curve(), "cert_version": cert.Version1},
|
m{"new_curve": newState.v1Cert.Curve(), "old_curve": currentState.v1Cert.Curve()},
|
||||||
nil,
|
nil,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
} else if currentState.v1Cert != nil {
|
||||||
|
//TODO: CERT-V2 we should be able to tear this down
|
||||||
|
return util.NewContextualError("v1 certificate was removed, restart required", nil, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if newState.v2Cert != nil {
|
if newState.v2Cert != nil {
|
||||||
if currentState.v2Cert == nil {
|
if currentState.v2Cert == nil {
|
||||||
//adding certs is fine, actually
|
return util.NewContextualError("v2 certificate was added, restart required", nil, err)
|
||||||
} else {
|
}
|
||||||
|
|
||||||
// did IP in cert change? if so, don't set
|
// did IP in cert change? if so, don't set
|
||||||
if !slices.Equal(currentState.v2Cert.Networks(), newState.v2Cert.Networks()) {
|
if !slices.Equal(currentState.v2Cert.Networks(), newState.v2Cert.Networks()) {
|
||||||
return util.NewContextualError(
|
return util.NewContextualError(
|
||||||
"Networks in new cert was different from old",
|
"Networks in new cert was different from old",
|
||||||
m{"new_networks": newState.v2Cert.Networks(), "old_networks": currentState.v2Cert.Networks(), "cert_version": cert.Version2},
|
m{"new_networks": newState.v2Cert.Networks(), "old_networks": currentState.v2Cert.Networks()},
|
||||||
nil,
|
nil,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@@ -137,25 +142,13 @@ func (p *PKI) reloadCerts(c *config.C, initial bool) *util.ContextualError {
|
|||||||
if currentState.v2Cert.Curve() != newState.v2Cert.Curve() {
|
if currentState.v2Cert.Curve() != newState.v2Cert.Curve() {
|
||||||
return util.NewContextualError(
|
return util.NewContextualError(
|
||||||
"Curve in new cert was different from old",
|
"Curve in new cert was different from old",
|
||||||
m{"new_curve": newState.v2Cert.Curve(), "old_curve": currentState.v2Cert.Curve(), "cert_version": cert.Version2},
|
m{"new_curve": newState.v2Cert.Curve(), "old_curve": currentState.v2Cert.Curve()},
|
||||||
nil,
|
nil,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
} else if currentState.v2Cert != nil {
|
} else if currentState.v2Cert != nil {
|
||||||
//newState.v1Cert is non-nil bc empty certstates aren't permitted
|
return util.NewContextualError("v2 certificate was removed, restart required", nil, err)
|
||||||
if newState.v1Cert == nil {
|
|
||||||
return util.NewContextualError("v1 and v2 certs are nil, this should be impossible", nil, err)
|
|
||||||
}
|
|
||||||
//if we're going to v1-only, we need to make sure we didn't orphan any v2-cert vpnaddrs
|
|
||||||
if !slices.Equal(currentState.v2Cert.Networks(), newState.v1Cert.Networks()) {
|
|
||||||
return util.NewContextualError(
|
|
||||||
"Removing a V2 cert is not permitted unless it has identical networks to the new V1 cert",
|
|
||||||
m{"new_v1_networks": newState.v1Cert.Networks(), "old_v2_networks": currentState.v2Cert.Networks()},
|
|
||||||
nil,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cipher cant be hot swapped so just leave it at what it was before
|
// Cipher cant be hot swapped so just leave it at what it was before
|
||||||
|
|||||||
12
udp/conn.go
12
udp/conn.go
@@ -22,18 +22,6 @@ type Conn interface {
|
|||||||
Close() error
|
Close() error
|
||||||
}
|
}
|
||||||
|
|
||||||
// Datagram represents a UDP payload destined to a specific address.
|
|
||||||
type Datagram struct {
|
|
||||||
Payload []byte
|
|
||||||
Addr netip.AddrPort
|
|
||||||
}
|
|
||||||
|
|
||||||
// BatchConn can send multiple datagrams in one syscall.
|
|
||||||
type BatchConn interface {
|
|
||||||
Conn
|
|
||||||
WriteBatch(pkts []Datagram) error
|
|
||||||
}
|
|
||||||
|
|
||||||
type NoopConn struct{}
|
type NoopConn struct{}
|
||||||
|
|
||||||
func (NoopConn) Rebind() error {
|
func (NoopConn) Rebind() error {
|
||||||
|
|||||||
@@ -310,51 +310,31 @@ func (u *StdConn) Close() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func NewUDPStatsEmitter(udpConns []Conn) func() {
|
func NewUDPStatsEmitter(udpConns []Conn) func() {
|
||||||
if len(udpConns) == 0 {
|
// Check if our kernel supports SO_MEMINFO before registering the gauges
|
||||||
return func() {}
|
var udpGauges [][unix.SK_MEMINFO_VARS]metrics.Gauge
|
||||||
}
|
|
||||||
|
|
||||||
type statsProvider struct {
|
|
||||||
index int
|
|
||||||
conn *StdConn
|
|
||||||
}
|
|
||||||
|
|
||||||
providers := make([]statsProvider, 0, len(udpConns))
|
|
||||||
for i, c := range udpConns {
|
|
||||||
if sc, ok := c.(*StdConn); ok {
|
|
||||||
providers = append(providers, statsProvider{index: i, conn: sc})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(providers) == 0 {
|
|
||||||
return func() {}
|
|
||||||
}
|
|
||||||
|
|
||||||
var meminfo [unix.SK_MEMINFO_VARS]uint32
|
var meminfo [unix.SK_MEMINFO_VARS]uint32
|
||||||
if err := providers[0].conn.getMemInfo(&meminfo); err != nil {
|
if err := udpConns[0].(*StdConn).getMemInfo(&meminfo); err == nil {
|
||||||
return func() {}
|
udpGauges = make([][unix.SK_MEMINFO_VARS]metrics.Gauge, len(udpConns))
|
||||||
}
|
for i := range udpConns {
|
||||||
|
|
||||||
udpGauges := make([][unix.SK_MEMINFO_VARS]metrics.Gauge, len(providers))
|
|
||||||
for i, provider := range providers {
|
|
||||||
udpGauges[i] = [unix.SK_MEMINFO_VARS]metrics.Gauge{
|
udpGauges[i] = [unix.SK_MEMINFO_VARS]metrics.Gauge{
|
||||||
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.rmem_alloc", provider.index), nil),
|
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.rmem_alloc", i), nil),
|
||||||
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.rcvbuf", provider.index), nil),
|
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.rcvbuf", i), nil),
|
||||||
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.wmem_alloc", provider.index), nil),
|
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.wmem_alloc", i), nil),
|
||||||
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.sndbuf", provider.index), nil),
|
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.sndbuf", i), nil),
|
||||||
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.fwd_alloc", provider.index), nil),
|
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.fwd_alloc", i), nil),
|
||||||
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.wmem_queued", provider.index), nil),
|
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.wmem_queued", i), nil),
|
||||||
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.optmem", provider.index), nil),
|
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.optmem", i), nil),
|
||||||
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.backlog", provider.index), nil),
|
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.backlog", i), nil),
|
||||||
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.drops", provider.index), nil),
|
metrics.GetOrRegisterGauge(fmt.Sprintf("udp.%d.drops", i), nil),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return func() {
|
return func() {
|
||||||
for i, provider := range providers {
|
for i, gauges := range udpGauges {
|
||||||
if err := provider.conn.getMemInfo(&meminfo); err == nil {
|
if err := udpConns[i].(*StdConn).getMemInfo(&meminfo); err == nil {
|
||||||
for j := 0; j < unix.SK_MEMINFO_VARS; j++ {
|
for j := 0; j < unix.SK_MEMINFO_VARS; j++ {
|
||||||
udpGauges[i][j].Update(int64(meminfo[j]))
|
gauges[j].Update(int64(meminfo[j]))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,225 +0,0 @@
|
|||||||
//go:build linux && !android && !e2e_testing
|
|
||||||
|
|
||||||
package udp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"net"
|
|
||||||
"net/netip"
|
|
||||||
"sync"
|
|
||||||
"sync/atomic"
|
|
||||||
|
|
||||||
"github.com/sirupsen/logrus"
|
|
||||||
"github.com/slackhq/nebula/config"
|
|
||||||
wgconn "github.com/slackhq/nebula/wgstack/conn"
|
|
||||||
)
|
|
||||||
|
|
||||||
// WGConn adapts WireGuard's batched UDP bind implementation to Nebula's udp.Conn interface.
|
|
||||||
type WGConn struct {
|
|
||||||
l *logrus.Logger
|
|
||||||
bind *wgconn.StdNetBind
|
|
||||||
recvers []wgconn.ReceiveFunc
|
|
||||||
batch int
|
|
||||||
reqBatch int
|
|
||||||
localIP netip.Addr
|
|
||||||
localPort uint16
|
|
||||||
enableGSO bool
|
|
||||||
enableGRO bool
|
|
||||||
gsoMaxSeg int
|
|
||||||
closed atomic.Bool
|
|
||||||
|
|
||||||
closeOnce sync.Once
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewWireguardListener creates a UDP listener backed by WireGuard's StdNetBind.
|
|
||||||
func NewWireguardListener(l *logrus.Logger, ip netip.Addr, port int, multi bool, batch int) (Conn, error) {
|
|
||||||
bind := wgconn.NewStdNetBindForAddr(ip, multi)
|
|
||||||
recvers, actualPort, err := bind.Open(uint16(port))
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if batch <= 0 {
|
|
||||||
batch = bind.BatchSize()
|
|
||||||
} else if batch > bind.BatchSize() {
|
|
||||||
batch = bind.BatchSize()
|
|
||||||
}
|
|
||||||
return &WGConn{
|
|
||||||
l: l,
|
|
||||||
bind: bind,
|
|
||||||
recvers: recvers,
|
|
||||||
batch: batch,
|
|
||||||
reqBatch: batch,
|
|
||||||
localIP: ip,
|
|
||||||
localPort: actualPort,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *WGConn) Rebind() error {
|
|
||||||
// WireGuard's bind does not support rebinding in place.
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *WGConn) LocalAddr() (netip.AddrPort, error) {
|
|
||||||
if !c.localIP.IsValid() || c.localIP.IsUnspecified() {
|
|
||||||
// Fallback to wildcard IPv4 for display purposes.
|
|
||||||
return netip.AddrPortFrom(netip.IPv4Unspecified(), c.localPort), nil
|
|
||||||
}
|
|
||||||
return netip.AddrPortFrom(c.localIP, c.localPort), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *WGConn) listen(fn wgconn.ReceiveFunc, r EncReader) {
|
|
||||||
batchSize := c.batch
|
|
||||||
packets := make([][]byte, batchSize)
|
|
||||||
for i := range packets {
|
|
||||||
packets[i] = make([]byte, MTU)
|
|
||||||
}
|
|
||||||
sizes := make([]int, batchSize)
|
|
||||||
endpoints := make([]wgconn.Endpoint, batchSize)
|
|
||||||
|
|
||||||
for {
|
|
||||||
if c.closed.Load() {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
n, err := fn(packets, sizes, endpoints)
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, net.ErrClosed) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if c.l != nil {
|
|
||||||
c.l.WithError(err).Debug("wireguard UDP listener receive error")
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
if sizes[i] == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
stdEp, ok := endpoints[i].(*wgconn.StdNetEndpoint)
|
|
||||||
if !ok {
|
|
||||||
if c.l != nil {
|
|
||||||
c.l.Warn("wireguard UDP listener received unexpected endpoint type")
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
addr := stdEp.AddrPort
|
|
||||||
r(addr, packets[i][:sizes[i]])
|
|
||||||
endpoints[i] = nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *WGConn) ListenOut(r EncReader) {
|
|
||||||
for _, fn := range c.recvers {
|
|
||||||
go c.listen(fn, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *WGConn) WriteTo(b []byte, addr netip.AddrPort) error {
|
|
||||||
if len(b) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if c.closed.Load() {
|
|
||||||
return net.ErrClosed
|
|
||||||
}
|
|
||||||
ep := &wgconn.StdNetEndpoint{AddrPort: addr}
|
|
||||||
return c.bind.Send([][]byte{b}, ep)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *WGConn) WriteBatch(datagrams []Datagram) error {
|
|
||||||
if len(datagrams) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if c.closed.Load() {
|
|
||||||
return net.ErrClosed
|
|
||||||
}
|
|
||||||
max := c.batch
|
|
||||||
if max <= 0 {
|
|
||||||
max = len(datagrams)
|
|
||||||
if max == 0 {
|
|
||||||
max = 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bufs := make([][]byte, 0, max)
|
|
||||||
var (
|
|
||||||
current netip.AddrPort
|
|
||||||
endpoint *wgconn.StdNetEndpoint
|
|
||||||
haveAddr bool
|
|
||||||
)
|
|
||||||
flush := func() error {
|
|
||||||
if len(bufs) == 0 || endpoint == nil {
|
|
||||||
bufs = bufs[:0]
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
err := c.bind.Send(bufs, endpoint)
|
|
||||||
bufs = bufs[:0]
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, d := range datagrams {
|
|
||||||
if len(d.Payload) == 0 || !d.Addr.IsValid() {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if !haveAddr || d.Addr != current {
|
|
||||||
if err := flush(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
current = d.Addr
|
|
||||||
endpoint = &wgconn.StdNetEndpoint{AddrPort: current}
|
|
||||||
haveAddr = true
|
|
||||||
}
|
|
||||||
bufs = append(bufs, d.Payload)
|
|
||||||
if len(bufs) >= max {
|
|
||||||
if err := flush(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return flush()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *WGConn) ConfigureOffload(enableGSO, enableGRO bool, maxSegments int) {
|
|
||||||
c.enableGSO = enableGSO
|
|
||||||
c.enableGRO = enableGRO
|
|
||||||
if maxSegments <= 0 {
|
|
||||||
maxSegments = 1
|
|
||||||
} else if maxSegments > wgconn.IdealBatchSize {
|
|
||||||
maxSegments = wgconn.IdealBatchSize
|
|
||||||
}
|
|
||||||
c.gsoMaxSeg = maxSegments
|
|
||||||
|
|
||||||
effectiveBatch := c.reqBatch
|
|
||||||
if enableGSO && c.bind != nil {
|
|
||||||
bindBatch := c.bind.BatchSize()
|
|
||||||
if effectiveBatch < bindBatch {
|
|
||||||
if c.l != nil {
|
|
||||||
c.l.WithFields(logrus.Fields{
|
|
||||||
"requested": c.reqBatch,
|
|
||||||
"effective": bindBatch,
|
|
||||||
}).Warn("listen.batch below wireguard minimum; using bind batch size for UDP GSO support")
|
|
||||||
}
|
|
||||||
effectiveBatch = bindBatch
|
|
||||||
}
|
|
||||||
}
|
|
||||||
c.batch = effectiveBatch
|
|
||||||
|
|
||||||
if c.l != nil {
|
|
||||||
c.l.WithFields(logrus.Fields{
|
|
||||||
"enableGSO": enableGSO,
|
|
||||||
"enableGRO": enableGRO,
|
|
||||||
"gsoMaxSegments": maxSegments,
|
|
||||||
}).Debug("configured wireguard UDP offload")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *WGConn) ReloadConfig(*config.C) {
|
|
||||||
// WireGuard bind currently does not expose runtime configuration knobs.
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *WGConn) Close() error {
|
|
||||||
var err error
|
|
||||||
c.closeOnce.Do(func() {
|
|
||||||
c.closed.Store(true)
|
|
||||||
err = c.bind.Close()
|
|
||||||
})
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
//go:build !linux || android || e2e_testing
|
|
||||||
|
|
||||||
package udp
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"net/netip"
|
|
||||||
|
|
||||||
"github.com/sirupsen/logrus"
|
|
||||||
)
|
|
||||||
|
|
||||||
// NewWireguardListener is only available on Linux builds.
|
|
||||||
func NewWireguardListener(*logrus.Logger, netip.Addr, int, bool, int) (Conn, error) {
|
|
||||||
return nil, fmt.Errorf("wireguard experimental UDP listener is only supported on Linux")
|
|
||||||
}
|
|
||||||
@@ -1,539 +0,0 @@
|
|||||||
// SPDX-License-Identifier: MIT
|
|
||||||
//
|
|
||||||
// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"errors"
|
|
||||||
"net"
|
|
||||||
"net/netip"
|
|
||||||
"runtime"
|
|
||||||
"strconv"
|
|
||||||
"sync"
|
|
||||||
"syscall"
|
|
||||||
|
|
||||||
"golang.org/x/net/ipv4"
|
|
||||||
"golang.org/x/net/ipv6"
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
_ Bind = (*StdNetBind)(nil)
|
|
||||||
)
|
|
||||||
|
|
||||||
// StdNetBind implements Bind for all platforms. While Windows has its own Bind
|
|
||||||
// (see bind_windows.go), it may fall back to StdNetBind.
|
|
||||||
// TODO: Remove usage of ipv{4,6}.PacketConn when net.UDPConn has comparable
|
|
||||||
// methods for sending and receiving multiple datagrams per-syscall. See the
|
|
||||||
// proposal in https://github.com/golang/go/issues/45886#issuecomment-1218301564.
|
|
||||||
type StdNetBind struct {
|
|
||||||
mu sync.Mutex // protects all fields except as specified
|
|
||||||
ipv4 *net.UDPConn
|
|
||||||
ipv6 *net.UDPConn
|
|
||||||
ipv4PC *ipv4.PacketConn // will be nil on non-Linux
|
|
||||||
ipv6PC *ipv6.PacketConn // will be nil on non-Linux
|
|
||||||
|
|
||||||
// these three fields are not guarded by mu
|
|
||||||
udpAddrPool sync.Pool
|
|
||||||
ipv4MsgsPool sync.Pool
|
|
||||||
ipv6MsgsPool sync.Pool
|
|
||||||
|
|
||||||
blackhole4 bool
|
|
||||||
blackhole6 bool
|
|
||||||
|
|
||||||
listenAddr4 string
|
|
||||||
listenAddr6 string
|
|
||||||
bindV4 bool
|
|
||||||
bindV6 bool
|
|
||||||
reusePort bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func newStdNetBind() *StdNetBind {
|
|
||||||
return &StdNetBind{
|
|
||||||
udpAddrPool: sync.Pool{
|
|
||||||
New: func() any {
|
|
||||||
return &net.UDPAddr{
|
|
||||||
IP: make([]byte, 16),
|
|
||||||
}
|
|
||||||
},
|
|
||||||
},
|
|
||||||
|
|
||||||
ipv4MsgsPool: sync.Pool{
|
|
||||||
New: func() any {
|
|
||||||
msgs := make([]ipv4.Message, IdealBatchSize)
|
|
||||||
for i := range msgs {
|
|
||||||
msgs[i].Buffers = make(net.Buffers, 1)
|
|
||||||
msgs[i].OOB = make([]byte, srcControlSize)
|
|
||||||
}
|
|
||||||
return &msgs
|
|
||||||
},
|
|
||||||
},
|
|
||||||
|
|
||||||
ipv6MsgsPool: sync.Pool{
|
|
||||||
New: func() any {
|
|
||||||
msgs := make([]ipv6.Message, IdealBatchSize)
|
|
||||||
for i := range msgs {
|
|
||||||
msgs[i].Buffers = make(net.Buffers, 1)
|
|
||||||
msgs[i].OOB = make([]byte, srcControlSize)
|
|
||||||
}
|
|
||||||
return &msgs
|
|
||||||
},
|
|
||||||
},
|
|
||||||
bindV4: true,
|
|
||||||
bindV6: true,
|
|
||||||
reusePort: false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewStdNetBind creates a bind that listens on all interfaces.
|
|
||||||
func NewStdNetBind() *StdNetBind {
|
|
||||||
return newStdNetBind()
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewStdNetBindForAddr creates a bind that listens on a specific address.
|
|
||||||
// If addr is IPv4, only the IPv4 socket will be created. For IPv6, only the
|
|
||||||
// IPv6 socket will be created.
|
|
||||||
func NewStdNetBindForAddr(addr netip.Addr, reusePort bool) *StdNetBind {
|
|
||||||
b := newStdNetBind()
|
|
||||||
if addr.IsValid() {
|
|
||||||
if addr.IsUnspecified() {
|
|
||||||
// keep dual-stack defaults with empty listen addresses
|
|
||||||
} else if addr.Is4() {
|
|
||||||
b.listenAddr4 = addr.Unmap().String()
|
|
||||||
b.bindV4 = true
|
|
||||||
b.bindV6 = false
|
|
||||||
} else {
|
|
||||||
b.listenAddr6 = addr.Unmap().String()
|
|
||||||
b.bindV6 = true
|
|
||||||
b.bindV4 = false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
b.reusePort = reusePort
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
type StdNetEndpoint struct {
|
|
||||||
// AddrPort is the endpoint destination.
|
|
||||||
netip.AddrPort
|
|
||||||
// src is the current sticky source address and interface index, if supported.
|
|
||||||
src struct {
|
|
||||||
netip.Addr
|
|
||||||
ifidx int32
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
_ Bind = (*StdNetBind)(nil)
|
|
||||||
_ Endpoint = &StdNetEndpoint{}
|
|
||||||
)
|
|
||||||
|
|
||||||
func (*StdNetBind) ParseEndpoint(s string) (Endpoint, error) {
|
|
||||||
e, err := netip.ParseAddrPort(s)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return &StdNetEndpoint{
|
|
||||||
AddrPort: e,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *StdNetEndpoint) ClearSrc() {
|
|
||||||
e.src.ifidx = 0
|
|
||||||
e.src.Addr = netip.Addr{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *StdNetEndpoint) DstIP() netip.Addr {
|
|
||||||
return e.AddrPort.Addr()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *StdNetEndpoint) SrcIP() netip.Addr {
|
|
||||||
return e.src.Addr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *StdNetEndpoint) SrcIfidx() int32 {
|
|
||||||
return e.src.ifidx
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *StdNetEndpoint) DstToBytes() []byte {
|
|
||||||
b, _ := e.AddrPort.MarshalBinary()
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *StdNetEndpoint) DstToString() string {
|
|
||||||
return e.AddrPort.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *StdNetEndpoint) SrcToString() string {
|
|
||||||
return e.src.Addr.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StdNetBind) listenNet(network string, host string, port int) (*net.UDPConn, int, error) {
|
|
||||||
lc := listenConfig()
|
|
||||||
if s.reusePort {
|
|
||||||
base := lc.Control
|
|
||||||
lc.Control = func(network, address string, c syscall.RawConn) error {
|
|
||||||
if base != nil {
|
|
||||||
if err := base(network, address, c); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return c.Control(func(fd uintptr) {
|
|
||||||
_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_REUSEPORT, 1)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
addr := ":" + strconv.Itoa(port)
|
|
||||||
if host != "" {
|
|
||||||
addr = net.JoinHostPort(host, strconv.Itoa(port))
|
|
||||||
}
|
|
||||||
|
|
||||||
conn, err := lc.ListenPacket(context.Background(), network, addr)
|
|
||||||
if err != nil {
|
|
||||||
return nil, 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Retrieve port.
|
|
||||||
laddr := conn.LocalAddr()
|
|
||||||
uaddr, err := net.ResolveUDPAddr(
|
|
||||||
laddr.Network(),
|
|
||||||
laddr.String(),
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, 0, err
|
|
||||||
}
|
|
||||||
return conn.(*net.UDPConn), uaddr.Port, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StdNetBind) openIPv4(port int) (*net.UDPConn, *ipv4.PacketConn, int, error) {
|
|
||||||
if !s.bindV4 {
|
|
||||||
return nil, nil, port, nil
|
|
||||||
}
|
|
||||||
host := s.listenAddr4
|
|
||||||
conn, actualPort, err := s.listenNet("udp4", host, port)
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, syscall.EAFNOSUPPORT) {
|
|
||||||
return nil, nil, port, nil
|
|
||||||
}
|
|
||||||
return nil, nil, port, err
|
|
||||||
}
|
|
||||||
if runtime.GOOS != "linux" {
|
|
||||||
return conn, nil, actualPort, nil
|
|
||||||
}
|
|
||||||
pc := ipv4.NewPacketConn(conn)
|
|
||||||
return conn, pc, actualPort, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StdNetBind) openIPv6(port int) (*net.UDPConn, *ipv6.PacketConn, int, error) {
|
|
||||||
if !s.bindV6 {
|
|
||||||
return nil, nil, port, nil
|
|
||||||
}
|
|
||||||
host := s.listenAddr6
|
|
||||||
conn, actualPort, err := s.listenNet("udp6", host, port)
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, syscall.EAFNOSUPPORT) {
|
|
||||||
return nil, nil, port, nil
|
|
||||||
}
|
|
||||||
return nil, nil, port, err
|
|
||||||
}
|
|
||||||
if runtime.GOOS != "linux" {
|
|
||||||
return conn, nil, actualPort, nil
|
|
||||||
}
|
|
||||||
pc := ipv6.NewPacketConn(conn)
|
|
||||||
return conn, pc, actualPort, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StdNetBind) Open(uport uint16) ([]ReceiveFunc, uint16, error) {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
|
|
||||||
var err error
|
|
||||||
var tries int
|
|
||||||
|
|
||||||
if s.ipv4 != nil || s.ipv6 != nil {
|
|
||||||
return nil, 0, ErrBindAlreadyOpen
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attempt to open ipv4 and ipv6 listeners on the same port.
|
|
||||||
// If uport is 0, we can retry on failure.
|
|
||||||
again:
|
|
||||||
port := int(uport)
|
|
||||||
var v4conn *net.UDPConn
|
|
||||||
var v6conn *net.UDPConn
|
|
||||||
var v4pc *ipv4.PacketConn
|
|
||||||
var v6pc *ipv6.PacketConn
|
|
||||||
|
|
||||||
v4conn, v4pc, port, err = s.openIPv4(port)
|
|
||||||
if err != nil {
|
|
||||||
return nil, 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Listen on the same port as we're using for ipv4.
|
|
||||||
v6conn, v6pc, port, err = s.openIPv6(port)
|
|
||||||
if uport == 0 && errors.Is(err, syscall.EADDRINUSE) && tries < 100 {
|
|
||||||
if v4conn != nil {
|
|
||||||
v4conn.Close()
|
|
||||||
}
|
|
||||||
tries++
|
|
||||||
goto again
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
if v4conn != nil {
|
|
||||||
v4conn.Close()
|
|
||||||
}
|
|
||||||
return nil, 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
var fns []ReceiveFunc
|
|
||||||
if v4conn != nil {
|
|
||||||
s.ipv4 = v4conn
|
|
||||||
if v4pc != nil {
|
|
||||||
s.ipv4PC = v4pc
|
|
||||||
}
|
|
||||||
fns = append(fns, s.makeReceiveIPv4(v4pc, v4conn))
|
|
||||||
}
|
|
||||||
if v6conn != nil {
|
|
||||||
s.ipv6 = v6conn
|
|
||||||
if v6pc != nil {
|
|
||||||
s.ipv6PC = v6pc
|
|
||||||
}
|
|
||||||
fns = append(fns, s.makeReceiveIPv6(v6pc, v6conn))
|
|
||||||
}
|
|
||||||
if len(fns) == 0 {
|
|
||||||
return nil, 0, syscall.EAFNOSUPPORT
|
|
||||||
}
|
|
||||||
|
|
||||||
return fns, uint16(port), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StdNetBind) makeReceiveIPv4(pc *ipv4.PacketConn, conn *net.UDPConn) ReceiveFunc {
|
|
||||||
return func(bufs [][]byte, sizes []int, eps []Endpoint) (n int, err error) {
|
|
||||||
msgs := s.ipv4MsgsPool.Get().(*[]ipv4.Message)
|
|
||||||
defer s.ipv4MsgsPool.Put(msgs)
|
|
||||||
for i := range bufs {
|
|
||||||
(*msgs)[i].Buffers[0] = bufs[i]
|
|
||||||
}
|
|
||||||
var numMsgs int
|
|
||||||
if runtime.GOOS == "linux" && pc != nil {
|
|
||||||
numMsgs, err = pc.ReadBatch(*msgs, 0)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
msg := &(*msgs)[0]
|
|
||||||
msg.N, msg.NN, _, msg.Addr, err = conn.ReadMsgUDP(msg.Buffers[0], msg.OOB)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
numMsgs = 1
|
|
||||||
}
|
|
||||||
for i := 0; i < numMsgs; i++ {
|
|
||||||
msg := &(*msgs)[i]
|
|
||||||
sizes[i] = msg.N
|
|
||||||
addrPort := msg.Addr.(*net.UDPAddr).AddrPort()
|
|
||||||
ep := &StdNetEndpoint{AddrPort: addrPort} // TODO: remove allocation
|
|
||||||
getSrcFromControl(msg.OOB[:msg.NN], ep)
|
|
||||||
eps[i] = ep
|
|
||||||
}
|
|
||||||
return numMsgs, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StdNetBind) makeReceiveIPv6(pc *ipv6.PacketConn, conn *net.UDPConn) ReceiveFunc {
|
|
||||||
return func(bufs [][]byte, sizes []int, eps []Endpoint) (n int, err error) {
|
|
||||||
msgs := s.ipv6MsgsPool.Get().(*[]ipv6.Message)
|
|
||||||
defer s.ipv6MsgsPool.Put(msgs)
|
|
||||||
for i := range bufs {
|
|
||||||
(*msgs)[i].Buffers[0] = bufs[i]
|
|
||||||
}
|
|
||||||
var numMsgs int
|
|
||||||
if runtime.GOOS == "linux" && pc != nil {
|
|
||||||
numMsgs, err = pc.ReadBatch(*msgs, 0)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
msg := &(*msgs)[0]
|
|
||||||
msg.N, msg.NN, _, msg.Addr, err = conn.ReadMsgUDP(msg.Buffers[0], msg.OOB)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
numMsgs = 1
|
|
||||||
}
|
|
||||||
for i := 0; i < numMsgs; i++ {
|
|
||||||
msg := &(*msgs)[i]
|
|
||||||
sizes[i] = msg.N
|
|
||||||
addrPort := msg.Addr.(*net.UDPAddr).AddrPort()
|
|
||||||
ep := &StdNetEndpoint{AddrPort: addrPort} // TODO: remove allocation
|
|
||||||
getSrcFromControl(msg.OOB[:msg.NN], ep)
|
|
||||||
eps[i] = ep
|
|
||||||
}
|
|
||||||
return numMsgs, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: When all Binds handle IdealBatchSize, remove this dynamic function and
|
|
||||||
// rename the IdealBatchSize constant to BatchSize.
|
|
||||||
func (s *StdNetBind) BatchSize() int {
|
|
||||||
if runtime.GOOS == "linux" {
|
|
||||||
return IdealBatchSize
|
|
||||||
}
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StdNetBind) Close() error {
|
|
||||||
s.mu.Lock()
|
|
||||||
defer s.mu.Unlock()
|
|
||||||
|
|
||||||
var err1, err2 error
|
|
||||||
if s.ipv4 != nil {
|
|
||||||
err1 = s.ipv4.Close()
|
|
||||||
s.ipv4 = nil
|
|
||||||
s.ipv4PC = nil
|
|
||||||
}
|
|
||||||
if s.ipv6 != nil {
|
|
||||||
err2 = s.ipv6.Close()
|
|
||||||
s.ipv6 = nil
|
|
||||||
s.ipv6PC = nil
|
|
||||||
}
|
|
||||||
s.blackhole4 = false
|
|
||||||
s.blackhole6 = false
|
|
||||||
if err1 != nil {
|
|
||||||
return err1
|
|
||||||
}
|
|
||||||
return err2
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StdNetBind) Send(bufs [][]byte, endpoint Endpoint) error {
|
|
||||||
s.mu.Lock()
|
|
||||||
blackhole := s.blackhole4
|
|
||||||
conn := s.ipv4
|
|
||||||
var (
|
|
||||||
pc4 *ipv4.PacketConn
|
|
||||||
pc6 *ipv6.PacketConn
|
|
||||||
)
|
|
||||||
is6 := false
|
|
||||||
if endpoint.DstIP().Is6() {
|
|
||||||
blackhole = s.blackhole6
|
|
||||||
conn = s.ipv6
|
|
||||||
pc6 = s.ipv6PC
|
|
||||||
is6 = true
|
|
||||||
} else {
|
|
||||||
pc4 = s.ipv4PC
|
|
||||||
}
|
|
||||||
s.mu.Unlock()
|
|
||||||
|
|
||||||
if blackhole {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if conn == nil {
|
|
||||||
return syscall.EAFNOSUPPORT
|
|
||||||
}
|
|
||||||
if is6 {
|
|
||||||
return s.send6(conn, pc6, endpoint, bufs)
|
|
||||||
} else {
|
|
||||||
return s.send4(conn, pc4, endpoint, bufs)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StdNetBind) send4(conn *net.UDPConn, pc *ipv4.PacketConn, ep Endpoint, bufs [][]byte) error {
|
|
||||||
ua := s.udpAddrPool.Get().(*net.UDPAddr)
|
|
||||||
as4 := ep.DstIP().As4()
|
|
||||||
copy(ua.IP, as4[:])
|
|
||||||
ua.IP = ua.IP[:4]
|
|
||||||
ua.Port = int(ep.(*StdNetEndpoint).Port())
|
|
||||||
msgs := s.ipv4MsgsPool.Get().(*[]ipv4.Message)
|
|
||||||
for i, buf := range bufs {
|
|
||||||
(*msgs)[i].Buffers[0] = buf
|
|
||||||
(*msgs)[i].Addr = ua
|
|
||||||
setSrcControl(&(*msgs)[i].OOB, ep.(*StdNetEndpoint))
|
|
||||||
}
|
|
||||||
var (
|
|
||||||
n int
|
|
||||||
err error
|
|
||||||
start int
|
|
||||||
)
|
|
||||||
if runtime.GOOS == "linux" && pc != nil {
|
|
||||||
for {
|
|
||||||
n, err = pc.WriteBatch((*msgs)[start:len(bufs)], 0)
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, syscall.EAFNOSUPPORT) {
|
|
||||||
for j := start; j < len(bufs); j++ {
|
|
||||||
_, _, werr := conn.WriteMsgUDP(bufs[j], (*msgs)[j].OOB, ua)
|
|
||||||
if werr != nil {
|
|
||||||
err = werr
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if n == len((*msgs)[start:len(bufs)]) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
start += n
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for i, buf := range bufs {
|
|
||||||
_, _, err = conn.WriteMsgUDP(buf, (*msgs)[i].OOB, ua)
|
|
||||||
if err != nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s.udpAddrPool.Put(ua)
|
|
||||||
s.ipv4MsgsPool.Put(msgs)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StdNetBind) send6(conn *net.UDPConn, pc *ipv6.PacketConn, ep Endpoint, bufs [][]byte) error {
|
|
||||||
ua := s.udpAddrPool.Get().(*net.UDPAddr)
|
|
||||||
as16 := ep.DstIP().As16()
|
|
||||||
copy(ua.IP, as16[:])
|
|
||||||
ua.IP = ua.IP[:16]
|
|
||||||
ua.Port = int(ep.(*StdNetEndpoint).Port())
|
|
||||||
msgs := s.ipv6MsgsPool.Get().(*[]ipv6.Message)
|
|
||||||
for i, buf := range bufs {
|
|
||||||
(*msgs)[i].Buffers[0] = buf
|
|
||||||
(*msgs)[i].Addr = ua
|
|
||||||
setSrcControl(&(*msgs)[i].OOB, ep.(*StdNetEndpoint))
|
|
||||||
}
|
|
||||||
var (
|
|
||||||
n int
|
|
||||||
err error
|
|
||||||
start int
|
|
||||||
)
|
|
||||||
if runtime.GOOS == "linux" && pc != nil {
|
|
||||||
for {
|
|
||||||
n, err = pc.WriteBatch((*msgs)[start:len(bufs)], 0)
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, syscall.EAFNOSUPPORT) {
|
|
||||||
for j := start; j < len(bufs); j++ {
|
|
||||||
_, _, werr := conn.WriteMsgUDP(bufs[j], (*msgs)[j].OOB, ua)
|
|
||||||
if werr != nil {
|
|
||||||
err = werr
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if n == len((*msgs)[start:len(bufs)]) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
start += n
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for i, buf := range bufs {
|
|
||||||
_, _, err = conn.WriteMsgUDP(buf, (*msgs)[i].OOB, ua)
|
|
||||||
if err != nil {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s.udpAddrPool.Put(ua)
|
|
||||||
s.ipv6MsgsPool.Put(msgs)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
@@ -1,131 +0,0 @@
|
|||||||
// SPDX-License-Identifier: MIT
|
|
||||||
//
|
|
||||||
// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"net/netip"
|
|
||||||
"reflect"
|
|
||||||
"runtime"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
IdealBatchSize = 128 // maximum number of packets handled per read and write
|
|
||||||
)
|
|
||||||
|
|
||||||
// A ReceiveFunc receives at least one packet from the network and writes them
|
|
||||||
// into packets. On a successful read it returns the number of elements of
|
|
||||||
// sizes, packets, and endpoints that should be evaluated. Some elements of
|
|
||||||
// sizes may be zero, and callers should ignore them. Callers must pass a sizes
|
|
||||||
// and eps slice with a length greater than or equal to the length of packets.
|
|
||||||
// These lengths must not exceed the length of the associated Bind.BatchSize().
|
|
||||||
type ReceiveFunc func(packets [][]byte, sizes []int, eps []Endpoint) (n int, err error)
|
|
||||||
|
|
||||||
// A Bind listens on a port for both IPv6 and IPv4 UDP traffic.
|
|
||||||
//
|
|
||||||
// A Bind interface may also be a PeekLookAtSocketFd or BindSocketToInterface,
|
|
||||||
// depending on the platform-specific implementation.
|
|
||||||
type Bind interface {
|
|
||||||
// Open puts the Bind into a listening state on a given port and reports the actual
|
|
||||||
// port that it bound to. Passing zero results in a random selection.
|
|
||||||
// fns is the set of functions that will be called to receive packets.
|
|
||||||
Open(port uint16) (fns []ReceiveFunc, actualPort uint16, err error)
|
|
||||||
|
|
||||||
// Close closes the Bind listener.
|
|
||||||
// All fns returned by Open must return net.ErrClosed after a call to Close.
|
|
||||||
Close() error
|
|
||||||
|
|
||||||
// SetMark sets the mark for each packet sent through this Bind.
|
|
||||||
// This mark is passed to the kernel as the socket option SO_MARK.
|
|
||||||
SetMark(mark uint32) error
|
|
||||||
|
|
||||||
// Send writes one or more packets in bufs to address ep. The length of
|
|
||||||
// bufs must not exceed BatchSize().
|
|
||||||
Send(bufs [][]byte, ep Endpoint) error
|
|
||||||
|
|
||||||
// ParseEndpoint creates a new endpoint from a string.
|
|
||||||
ParseEndpoint(s string) (Endpoint, error)
|
|
||||||
|
|
||||||
// BatchSize is the number of buffers expected to be passed to
|
|
||||||
// the ReceiveFuncs, and the maximum expected to be passed to SendBatch.
|
|
||||||
BatchSize() int
|
|
||||||
}
|
|
||||||
|
|
||||||
// BindSocketToInterface is implemented by Bind objects that support being
|
|
||||||
// tied to a single network interface. Used by wireguard-windows.
|
|
||||||
type BindSocketToInterface interface {
|
|
||||||
BindSocketToInterface4(interfaceIndex uint32, blackhole bool) error
|
|
||||||
BindSocketToInterface6(interfaceIndex uint32, blackhole bool) error
|
|
||||||
}
|
|
||||||
|
|
||||||
// PeekLookAtSocketFd is implemented by Bind objects that support having their
|
|
||||||
// file descriptor peeked at. Used by wireguard-android.
|
|
||||||
type PeekLookAtSocketFd interface {
|
|
||||||
PeekLookAtSocketFd4() (fd int, err error)
|
|
||||||
PeekLookAtSocketFd6() (fd int, err error)
|
|
||||||
}
|
|
||||||
|
|
||||||
// An Endpoint maintains the source/destination caching for a peer.
|
|
||||||
//
|
|
||||||
// dst: the remote address of a peer ("endpoint" in uapi terminology)
|
|
||||||
// src: the local address from which datagrams originate going to the peer
|
|
||||||
type Endpoint interface {
|
|
||||||
ClearSrc() // clears the source address
|
|
||||||
SrcToString() string // returns the local source address (ip:port)
|
|
||||||
DstToString() string // returns the destination address (ip:port)
|
|
||||||
DstToBytes() []byte // used for mac2 cookie calculations
|
|
||||||
DstIP() netip.Addr
|
|
||||||
SrcIP() netip.Addr
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
ErrBindAlreadyOpen = errors.New("bind is already open")
|
|
||||||
ErrWrongEndpointType = errors.New("endpoint type does not correspond with bind type")
|
|
||||||
)
|
|
||||||
|
|
||||||
func (fn ReceiveFunc) PrettyName() string {
|
|
||||||
name := runtime.FuncForPC(reflect.ValueOf(fn).Pointer()).Name()
|
|
||||||
// 0. cheese/taco.beansIPv6.func12.func21218-fm
|
|
||||||
name = strings.TrimSuffix(name, "-fm")
|
|
||||||
// 1. cheese/taco.beansIPv6.func12.func21218
|
|
||||||
if idx := strings.LastIndexByte(name, '/'); idx != -1 {
|
|
||||||
name = name[idx+1:]
|
|
||||||
// 2. taco.beansIPv6.func12.func21218
|
|
||||||
}
|
|
||||||
for {
|
|
||||||
var idx int
|
|
||||||
for idx = len(name) - 1; idx >= 0; idx-- {
|
|
||||||
if name[idx] < '0' || name[idx] > '9' {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if idx == len(name)-1 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
const dotFunc = ".func"
|
|
||||||
if !strings.HasSuffix(name[:idx+1], dotFunc) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
name = name[:idx+1-len(dotFunc)]
|
|
||||||
// 3. taco.beansIPv6.func12
|
|
||||||
// 4. taco.beansIPv6
|
|
||||||
}
|
|
||||||
if idx := strings.LastIndexByte(name, '.'); idx != -1 {
|
|
||||||
name = name[idx+1:]
|
|
||||||
// 5. beansIPv6
|
|
||||||
}
|
|
||||||
if name == "" {
|
|
||||||
return fmt.Sprintf("%p", fn)
|
|
||||||
}
|
|
||||||
if strings.HasSuffix(name, "IPv4") {
|
|
||||||
return "v4"
|
|
||||||
}
|
|
||||||
if strings.HasSuffix(name, "IPv6") {
|
|
||||||
return "v6"
|
|
||||||
}
|
|
||||||
return name
|
|
||||||
}
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
// SPDX-License-Identifier: MIT
|
|
||||||
//
|
|
||||||
// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
import (
|
|
||||||
"net"
|
|
||||||
"syscall"
|
|
||||||
)
|
|
||||||
|
|
||||||
// UDP socket read/write buffer size (7MB). The value of 7MB is chosen as it is
|
|
||||||
// the max supported by a default configuration of macOS. Some platforms will
|
|
||||||
// silently clamp the value to other maximums, such as linux clamping to
|
|
||||||
// net.core.{r,w}mem_max (see _linux.go for additional implementation that works
|
|
||||||
// around this limitation)
|
|
||||||
const socketBufferSize = 7 << 20
|
|
||||||
|
|
||||||
// controlFn is the callback function signature from net.ListenConfig.Control.
|
|
||||||
// It is used to apply platform specific configuration to the socket prior to
|
|
||||||
// bind.
|
|
||||||
type controlFn func(network, address string, c syscall.RawConn) error
|
|
||||||
|
|
||||||
// controlFns is a list of functions that are called from the listen config
|
|
||||||
// that can apply socket options.
|
|
||||||
var controlFns = []controlFn{}
|
|
||||||
|
|
||||||
// listenConfig returns a net.ListenConfig that applies the controlFns to the
|
|
||||||
// socket prior to bind. This is used to apply socket buffer sizing and packet
|
|
||||||
// information OOB configuration for sticky sockets.
|
|
||||||
func listenConfig() *net.ListenConfig {
|
|
||||||
return &net.ListenConfig{
|
|
||||||
Control: func(network, address string, c syscall.RawConn) error {
|
|
||||||
for _, fn := range controlFns {
|
|
||||||
if err := fn(network, address, c); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
//go:build linux
|
|
||||||
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
//
|
|
||||||
// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"runtime"
|
|
||||||
"syscall"
|
|
||||||
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
controlFns = append(controlFns,
|
|
||||||
|
|
||||||
// Attempt to set the socket buffer size beyond net.core.{r,w}mem_max by
|
|
||||||
// using SO_*BUFFORCE. This requires CAP_NET_ADMIN, and is allowed here to
|
|
||||||
// fail silently - the result of failure is lower performance on very fast
|
|
||||||
// links or high latency links.
|
|
||||||
func(network, address string, c syscall.RawConn) error {
|
|
||||||
return c.Control(func(fd uintptr) {
|
|
||||||
// Set up to *mem_max
|
|
||||||
_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_RCVBUF, socketBufferSize)
|
|
||||||
_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_SNDBUF, socketBufferSize)
|
|
||||||
// Set beyond *mem_max if CAP_NET_ADMIN
|
|
||||||
_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_RCVBUFFORCE, socketBufferSize)
|
|
||||||
_ = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_SNDBUFFORCE, socketBufferSize)
|
|
||||||
})
|
|
||||||
},
|
|
||||||
|
|
||||||
// Enable receiving of the packet information (IP_PKTINFO for IPv4,
|
|
||||||
// IPV6_PKTINFO for IPv6) that is used to implement sticky socket support.
|
|
||||||
func(network, address string, c syscall.RawConn) error {
|
|
||||||
var err error
|
|
||||||
switch network {
|
|
||||||
case "udp4":
|
|
||||||
if runtime.GOOS != "android" {
|
|
||||||
c.Control(func(fd uintptr) {
|
|
||||||
err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_PKTINFO, 1)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
case "udp6":
|
|
||||||
c.Control(func(fd uintptr) {
|
|
||||||
if runtime.GOOS != "android" {
|
|
||||||
err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_RECVPKTINFO, 1)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
err = unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_V6ONLY, 1)
|
|
||||||
})
|
|
||||||
default:
|
|
||||||
err = fmt.Errorf("unhandled network: %s: %w", network, unix.EINVAL)
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
//go:build !windows
|
|
||||||
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
//
|
|
||||||
// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
func NewDefaultBind() Bind { return NewStdNetBind() }
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
//go:build !linux
|
|
||||||
|
|
||||||
/* SPDX-License-Identifier: MIT
|
|
||||||
*
|
|
||||||
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
func errShouldDisableUDPGSO(err error) bool {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
/* SPDX-License-Identifier: MIT
|
|
||||||
*
|
|
||||||
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
func errShouldDisableUDPGSO(err error) bool {
|
|
||||||
var serr *os.SyscallError
|
|
||||||
if errors.As(err, &serr) {
|
|
||||||
// EIO is returned by udp_send_skb() if the device driver does not have
|
|
||||||
// tx checksumming enabled, which is a hard requirement of UDP_SEGMENT.
|
|
||||||
// See:
|
|
||||||
// https://git.kernel.org/pub/scm/docs/man-pages/man-pages.git/tree/man7/udp.7?id=806eabd74910447f21005160e90957bde4db0183#n228
|
|
||||||
// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/net/ipv4/udp.c?h=v6.2&id=c9c3395d5e3dcc6daee66c6908354d47bf98cb0c#n942
|
|
||||||
return serr.Err == unix.EIO
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
//go:build !linux
|
|
||||||
// +build !linux
|
|
||||||
|
|
||||||
/* SPDX-License-Identifier: MIT
|
|
||||||
*
|
|
||||||
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
import "net"
|
|
||||||
|
|
||||||
func supportsUDPOffload(conn *net.UDPConn) (txOffload, rxOffload bool) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
/* SPDX-License-Identifier: MIT
|
|
||||||
*
|
|
||||||
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
import (
|
|
||||||
"net"
|
|
||||||
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
func supportsUDPOffload(conn *net.UDPConn) (txOffload, rxOffload bool) {
|
|
||||||
rc, err := conn.SyscallConn()
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
err = rc.Control(func(fd uintptr) {
|
|
||||||
_, errSyscall := unix.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_SEGMENT)
|
|
||||||
txOffload = errSyscall == nil
|
|
||||||
opt, errSyscall := unix.GetsockoptInt(int(fd), unix.IPPROTO_UDP, unix.UDP_GRO)
|
|
||||||
rxOffload = errSyscall == nil && opt == 1
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return false, false
|
|
||||||
}
|
|
||||||
return txOffload, rxOffload
|
|
||||||
}
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
//go:build !linux
|
|
||||||
|
|
||||||
/* SPDX-License-Identifier: MIT
|
|
||||||
*
|
|
||||||
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
// getGSOSize parses control for UDP_GRO and if found returns its GSO size data.
|
|
||||||
func getGSOSize(control []byte) (int, error) {
|
|
||||||
return 0, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// setGSOSize sets a UDP_SEGMENT in control based on gsoSize.
|
|
||||||
func setGSOSize(control *[]byte, gsoSize uint16) {
|
|
||||||
}
|
|
||||||
|
|
||||||
// gsoControlSize returns the recommended buffer size for pooling sticky and UDP
|
|
||||||
// offloading control data.
|
|
||||||
const gsoControlSize = 0
|
|
||||||
@@ -1,65 +0,0 @@
|
|||||||
//go:build linux
|
|
||||||
|
|
||||||
/* SPDX-License-Identifier: MIT
|
|
||||||
*
|
|
||||||
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
sizeOfGSOData = 2
|
|
||||||
)
|
|
||||||
|
|
||||||
// getGSOSize parses control for UDP_GRO and if found returns its GSO size data.
|
|
||||||
func getGSOSize(control []byte) (int, error) {
|
|
||||||
var (
|
|
||||||
hdr unix.Cmsghdr
|
|
||||||
data []byte
|
|
||||||
rem = control
|
|
||||||
err error
|
|
||||||
)
|
|
||||||
|
|
||||||
for len(rem) > unix.SizeofCmsghdr {
|
|
||||||
hdr, data, rem, err = unix.ParseOneSocketControlMessage(rem)
|
|
||||||
if err != nil {
|
|
||||||
return 0, fmt.Errorf("error parsing socket control message: %w", err)
|
|
||||||
}
|
|
||||||
if hdr.Level == unix.SOL_UDP && hdr.Type == unix.UDP_GRO && len(data) >= sizeOfGSOData {
|
|
||||||
var gso uint16
|
|
||||||
copy(unsafe.Slice((*byte)(unsafe.Pointer(&gso)), sizeOfGSOData), data[:sizeOfGSOData])
|
|
||||||
return int(gso), nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// setGSOSize sets a UDP_SEGMENT in control based on gsoSize. It leaves existing
|
|
||||||
// data in control untouched.
|
|
||||||
func setGSOSize(control *[]byte, gsoSize uint16) {
|
|
||||||
existingLen := len(*control)
|
|
||||||
avail := cap(*control) - existingLen
|
|
||||||
space := unix.CmsgSpace(sizeOfGSOData)
|
|
||||||
if avail < space {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
*control = (*control)[:cap(*control)]
|
|
||||||
gsoControl := (*control)[existingLen:]
|
|
||||||
hdr := (*unix.Cmsghdr)(unsafe.Pointer(&(gsoControl)[0]))
|
|
||||||
hdr.Level = unix.SOL_UDP
|
|
||||||
hdr.Type = unix.UDP_SEGMENT
|
|
||||||
hdr.SetLen(unix.CmsgLen(sizeOfGSOData))
|
|
||||||
copy((gsoControl)[unix.CmsgLen(0):], unsafe.Slice((*byte)(unsafe.Pointer(&gsoSize)), sizeOfGSOData))
|
|
||||||
*control = (*control)[:existingLen+space]
|
|
||||||
}
|
|
||||||
|
|
||||||
// gsoControlSize returns the recommended buffer size for pooling UDP
|
|
||||||
// offloading control data.
|
|
||||||
var gsoControlSize = unix.CmsgSpace(sizeOfGSOData)
|
|
||||||
@@ -1,64 +0,0 @@
|
|||||||
//go:build linux || openbsd || freebsd
|
|
||||||
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
//
|
|
||||||
// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
import (
|
|
||||||
"runtime"
|
|
||||||
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
var fwmarkIoctl int
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
switch runtime.GOOS {
|
|
||||||
case "linux", "android":
|
|
||||||
fwmarkIoctl = 36 /* unix.SO_MARK */
|
|
||||||
case "freebsd":
|
|
||||||
fwmarkIoctl = 0x1015 /* unix.SO_USER_COOKIE */
|
|
||||||
case "openbsd":
|
|
||||||
fwmarkIoctl = 0x1021 /* unix.SO_RTABLE */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *StdNetBind) SetMark(mark uint32) error {
|
|
||||||
var operr error
|
|
||||||
if fwmarkIoctl == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if s.ipv4 != nil {
|
|
||||||
fd, err := s.ipv4.SyscallConn()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = fd.Control(func(fd uintptr) {
|
|
||||||
operr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, fwmarkIoctl, int(mark))
|
|
||||||
})
|
|
||||||
if err == nil {
|
|
||||||
err = operr
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if s.ipv6 != nil {
|
|
||||||
fd, err := s.ipv6.SyscallConn()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = fd.Control(func(fd uintptr) {
|
|
||||||
operr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, fwmarkIoctl, int(mark))
|
|
||||||
})
|
|
||||||
if err == nil {
|
|
||||||
err = operr
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
//go:build !linux || android
|
|
||||||
|
|
||||||
/* SPDX-License-Identifier: MIT
|
|
||||||
*
|
|
||||||
* Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
import "net/netip"
|
|
||||||
|
|
||||||
func (e *StdNetEndpoint) SrcIP() netip.Addr {
|
|
||||||
return netip.Addr{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *StdNetEndpoint) SrcIfidx() int32 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *StdNetEndpoint) SrcToString() string {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: macOS, FreeBSD and other BSDs likely do support the sticky sockets
|
|
||||||
// {get,set}srcControl feature set, but use alternatively named flags and need
|
|
||||||
// ports and require testing.
|
|
||||||
|
|
||||||
// getSrcFromControl parses the control for PKTINFO and if found updates ep with
|
|
||||||
// the source information found.
|
|
||||||
func getSrcFromControl(control []byte, ep *StdNetEndpoint) {
|
|
||||||
}
|
|
||||||
|
|
||||||
// setSrcControl parses the control for PKTINFO and if found updates ep with
|
|
||||||
// the source information found.
|
|
||||||
func setSrcControl(control *[]byte, ep *StdNetEndpoint) {
|
|
||||||
}
|
|
||||||
|
|
||||||
// stickyControlSize returns the recommended buffer size for pooling sticky
|
|
||||||
// offloading control data.
|
|
||||||
const stickyControlSize = 0
|
|
||||||
|
|
||||||
const StdNetSupportsStickySockets = false
|
|
||||||
@@ -1,116 +0,0 @@
|
|||||||
//go:build linux && !android
|
|
||||||
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
//
|
|
||||||
// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
|
|
||||||
package conn
|
|
||||||
|
|
||||||
import (
|
|
||||||
"net/netip"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
// getSrcFromControl parses the control for PKTINFO and if found updates ep with
|
|
||||||
// the source information found.
|
|
||||||
func getSrcFromControl(control []byte, ep *StdNetEndpoint) {
|
|
||||||
ep.ClearSrc()
|
|
||||||
|
|
||||||
var (
|
|
||||||
hdr unix.Cmsghdr
|
|
||||||
data []byte
|
|
||||||
rem []byte = control
|
|
||||||
err error
|
|
||||||
)
|
|
||||||
|
|
||||||
for len(rem) > unix.SizeofCmsghdr {
|
|
||||||
hdr, data, rem, err = unix.ParseOneSocketControlMessage(rem)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if hdr.Level == unix.IPPROTO_IP &&
|
|
||||||
hdr.Type == unix.IP_PKTINFO {
|
|
||||||
|
|
||||||
info := pktInfoFromBuf[unix.Inet4Pktinfo](data)
|
|
||||||
ep.src.Addr = netip.AddrFrom4(info.Spec_dst)
|
|
||||||
ep.src.ifidx = info.Ifindex
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if hdr.Level == unix.IPPROTO_IPV6 &&
|
|
||||||
hdr.Type == unix.IPV6_PKTINFO {
|
|
||||||
|
|
||||||
info := pktInfoFromBuf[unix.Inet6Pktinfo](data)
|
|
||||||
ep.src.Addr = netip.AddrFrom16(info.Addr)
|
|
||||||
ep.src.ifidx = int32(info.Ifindex)
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// pktInfoFromBuf returns type T populated from the provided buf via copy(). It
|
|
||||||
// panics if buf is of insufficient size.
|
|
||||||
func pktInfoFromBuf[T unix.Inet4Pktinfo | unix.Inet6Pktinfo](buf []byte) (t T) {
|
|
||||||
size := int(unsafe.Sizeof(t))
|
|
||||||
if len(buf) < size {
|
|
||||||
panic("pktInfoFromBuf: buffer too small")
|
|
||||||
}
|
|
||||||
copy(unsafe.Slice((*byte)(unsafe.Pointer(&t)), size), buf)
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
// setSrcControl sets an IP{V6}_PKTINFO in control based on the source address
|
|
||||||
// and source ifindex found in ep. control's len will be set to 0 in the event
|
|
||||||
// that ep is a default value.
|
|
||||||
func setSrcControl(control *[]byte, ep *StdNetEndpoint) {
|
|
||||||
*control = (*control)[:cap(*control)]
|
|
||||||
if len(*control) < int(unsafe.Sizeof(unix.Cmsghdr{})) {
|
|
||||||
*control = (*control)[:0]
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if ep.src.ifidx == 0 && !ep.SrcIP().IsValid() {
|
|
||||||
*control = (*control)[:0]
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(*control) < srcControlSize {
|
|
||||||
*control = (*control)[:0]
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
hdr := (*unix.Cmsghdr)(unsafe.Pointer(&(*control)[0]))
|
|
||||||
if ep.SrcIP().Is4() {
|
|
||||||
hdr.Level = unix.IPPROTO_IP
|
|
||||||
hdr.Type = unix.IP_PKTINFO
|
|
||||||
hdr.SetLen(unix.CmsgLen(unix.SizeofInet4Pktinfo))
|
|
||||||
|
|
||||||
info := (*unix.Inet4Pktinfo)(unsafe.Pointer(&(*control)[unix.SizeofCmsghdr]))
|
|
||||||
info.Ifindex = ep.src.ifidx
|
|
||||||
if ep.SrcIP().IsValid() {
|
|
||||||
info.Spec_dst = ep.SrcIP().As4()
|
|
||||||
}
|
|
||||||
*control = (*control)[:unix.CmsgSpace(unix.SizeofInet4Pktinfo)]
|
|
||||||
} else {
|
|
||||||
hdr.Level = unix.IPPROTO_IPV6
|
|
||||||
hdr.Type = unix.IPV6_PKTINFO
|
|
||||||
hdr.SetLen(unix.CmsgLen(unix.SizeofInet6Pktinfo))
|
|
||||||
|
|
||||||
info := (*unix.Inet6Pktinfo)(unsafe.Pointer(&(*control)[unix.SizeofCmsghdr]))
|
|
||||||
info.Ifindex = uint32(ep.src.ifidx)
|
|
||||||
if ep.SrcIP().IsValid() {
|
|
||||||
info.Addr = ep.SrcIP().As16()
|
|
||||||
}
|
|
||||||
*control = (*control)[:unix.CmsgSpace(unix.SizeofInet6Pktinfo)]
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
var srcControlSize = unix.CmsgSpace(unix.SizeofInet6Pktinfo)
|
|
||||||
|
|
||||||
const StdNetSupportsStickySockets = true
|
|
||||||
@@ -1,42 +0,0 @@
|
|||||||
package tun
|
|
||||||
|
|
||||||
import "encoding/binary"
|
|
||||||
|
|
||||||
// TODO: Explore SIMD and/or other assembly optimizations.
|
|
||||||
func checksumNoFold(b []byte, initial uint64) uint64 {
|
|
||||||
ac := initial
|
|
||||||
i := 0
|
|
||||||
n := len(b)
|
|
||||||
for n >= 4 {
|
|
||||||
ac += uint64(binary.BigEndian.Uint32(b[i : i+4]))
|
|
||||||
n -= 4
|
|
||||||
i += 4
|
|
||||||
}
|
|
||||||
for n >= 2 {
|
|
||||||
ac += uint64(binary.BigEndian.Uint16(b[i : i+2]))
|
|
||||||
n -= 2
|
|
||||||
i += 2
|
|
||||||
}
|
|
||||||
if n == 1 {
|
|
||||||
ac += uint64(b[i]) << 8
|
|
||||||
}
|
|
||||||
return ac
|
|
||||||
}
|
|
||||||
|
|
||||||
func checksum(b []byte, initial uint64) uint16 {
|
|
||||||
ac := checksumNoFold(b, initial)
|
|
||||||
ac = (ac >> 16) + (ac & 0xffff)
|
|
||||||
ac = (ac >> 16) + (ac & 0xffff)
|
|
||||||
ac = (ac >> 16) + (ac & 0xffff)
|
|
||||||
ac = (ac >> 16) + (ac & 0xffff)
|
|
||||||
return uint16(ac)
|
|
||||||
}
|
|
||||||
|
|
||||||
func pseudoHeaderChecksumNoFold(protocol uint8, srcAddr, dstAddr []byte, totalLen uint16) uint64 {
|
|
||||||
sum := checksumNoFold(srcAddr, 0)
|
|
||||||
sum = checksumNoFold(dstAddr, sum)
|
|
||||||
sum = checksumNoFold([]byte{0, protocol}, sum)
|
|
||||||
tmp := make([]byte, 2)
|
|
||||||
binary.BigEndian.PutUint16(tmp, totalLen)
|
|
||||||
return checksumNoFold(tmp, sum)
|
|
||||||
}
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
package tun
|
|
||||||
|
|
||||||
const VirtioNetHdrLen = virtioNetHdrLen
|
|
||||||
@@ -1,630 +0,0 @@
|
|||||||
//go:build linux
|
|
||||||
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
//
|
|
||||||
// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
|
|
||||||
package tun
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"encoding/binary"
|
|
||||||
"errors"
|
|
||||||
"io"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
wgconn "github.com/slackhq/nebula/wgstack/conn"
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
var ErrTooManySegments = errors.New("tun: too many segments for TSO")
|
|
||||||
|
|
||||||
const tcpFlagsOffset = 13
|
|
||||||
|
|
||||||
const (
|
|
||||||
tcpFlagFIN uint8 = 0x01
|
|
||||||
tcpFlagPSH uint8 = 0x08
|
|
||||||
tcpFlagACK uint8 = 0x10
|
|
||||||
)
|
|
||||||
|
|
||||||
// virtioNetHdr is defined in the kernel in include/uapi/linux/virtio_net.h. The
|
|
||||||
// kernel symbol is virtio_net_hdr.
|
|
||||||
type virtioNetHdr struct {
|
|
||||||
flags uint8
|
|
||||||
gsoType uint8
|
|
||||||
hdrLen uint16
|
|
||||||
gsoSize uint16
|
|
||||||
csumStart uint16
|
|
||||||
csumOffset uint16
|
|
||||||
}
|
|
||||||
|
|
||||||
func (v *virtioNetHdr) decode(b []byte) error {
|
|
||||||
if len(b) < virtioNetHdrLen {
|
|
||||||
return io.ErrShortBuffer
|
|
||||||
}
|
|
||||||
copy(unsafe.Slice((*byte)(unsafe.Pointer(v)), virtioNetHdrLen), b[:virtioNetHdrLen])
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (v *virtioNetHdr) encode(b []byte) error {
|
|
||||||
if len(b) < virtioNetHdrLen {
|
|
||||||
return io.ErrShortBuffer
|
|
||||||
}
|
|
||||||
copy(b[:virtioNetHdrLen], unsafe.Slice((*byte)(unsafe.Pointer(v)), virtioNetHdrLen))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
|
||||||
// virtioNetHdrLen is the length in bytes of virtioNetHdr. This matches the
|
|
||||||
// shape of the C ABI for its kernel counterpart -- sizeof(virtio_net_hdr).
|
|
||||||
virtioNetHdrLen = int(unsafe.Sizeof(virtioNetHdr{}))
|
|
||||||
)
|
|
||||||
|
|
||||||
// flowKey represents the key for a flow.
|
|
||||||
type flowKey struct {
|
|
||||||
srcAddr, dstAddr [16]byte
|
|
||||||
srcPort, dstPort uint16
|
|
||||||
rxAck uint32 // varying ack values should not be coalesced. Treat them as separate flows.
|
|
||||||
}
|
|
||||||
|
|
||||||
// tcpGROTable holds flow and coalescing information for the purposes of GRO.
|
|
||||||
type tcpGROTable struct {
|
|
||||||
itemsByFlow map[flowKey][]tcpGROItem
|
|
||||||
itemsPool [][]tcpGROItem
|
|
||||||
}
|
|
||||||
|
|
||||||
func newTCPGROTable() *tcpGROTable {
|
|
||||||
t := &tcpGROTable{
|
|
||||||
itemsByFlow: make(map[flowKey][]tcpGROItem, wgconn.IdealBatchSize),
|
|
||||||
itemsPool: make([][]tcpGROItem, wgconn.IdealBatchSize),
|
|
||||||
}
|
|
||||||
for i := range t.itemsPool {
|
|
||||||
t.itemsPool[i] = make([]tcpGROItem, 0, wgconn.IdealBatchSize)
|
|
||||||
}
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
func newFlowKey(pkt []byte, srcAddr, dstAddr, tcphOffset int) flowKey {
|
|
||||||
key := flowKey{}
|
|
||||||
addrSize := dstAddr - srcAddr
|
|
||||||
copy(key.srcAddr[:], pkt[srcAddr:dstAddr])
|
|
||||||
copy(key.dstAddr[:], pkt[dstAddr:dstAddr+addrSize])
|
|
||||||
key.srcPort = binary.BigEndian.Uint16(pkt[tcphOffset:])
|
|
||||||
key.dstPort = binary.BigEndian.Uint16(pkt[tcphOffset+2:])
|
|
||||||
key.rxAck = binary.BigEndian.Uint32(pkt[tcphOffset+8:])
|
|
||||||
return key
|
|
||||||
}
|
|
||||||
|
|
||||||
// lookupOrInsert looks up a flow for the provided packet and metadata,
|
|
||||||
// returning the packets found for the flow, or inserting a new one if none
|
|
||||||
// is found.
|
|
||||||
func (t *tcpGROTable) lookupOrInsert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) ([]tcpGROItem, bool) {
|
|
||||||
key := newFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset)
|
|
||||||
items, ok := t.itemsByFlow[key]
|
|
||||||
if ok {
|
|
||||||
return items, ok
|
|
||||||
}
|
|
||||||
// TODO: insert() performs another map lookup. This could be rearranged to avoid.
|
|
||||||
t.insert(pkt, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex)
|
|
||||||
return nil, false
|
|
||||||
}
|
|
||||||
|
|
||||||
// insert an item in the table for the provided packet and packet metadata.
|
|
||||||
func (t *tcpGROTable) insert(pkt []byte, srcAddrOffset, dstAddrOffset, tcphOffset, tcphLen, bufsIndex int) {
|
|
||||||
key := newFlowKey(pkt, srcAddrOffset, dstAddrOffset, tcphOffset)
|
|
||||||
item := tcpGROItem{
|
|
||||||
key: key,
|
|
||||||
bufsIndex: uint16(bufsIndex),
|
|
||||||
gsoSize: uint16(len(pkt[tcphOffset+tcphLen:])),
|
|
||||||
iphLen: uint8(tcphOffset),
|
|
||||||
tcphLen: uint8(tcphLen),
|
|
||||||
sentSeq: binary.BigEndian.Uint32(pkt[tcphOffset+4:]),
|
|
||||||
pshSet: pkt[tcphOffset+tcpFlagsOffset]&tcpFlagPSH != 0,
|
|
||||||
}
|
|
||||||
items, ok := t.itemsByFlow[key]
|
|
||||||
if !ok {
|
|
||||||
items = t.newItems()
|
|
||||||
}
|
|
||||||
items = append(items, item)
|
|
||||||
t.itemsByFlow[key] = items
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tcpGROTable) updateAt(item tcpGROItem, i int) {
|
|
||||||
items, _ := t.itemsByFlow[item.key]
|
|
||||||
items[i] = item
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tcpGROTable) deleteAt(key flowKey, i int) {
|
|
||||||
items, _ := t.itemsByFlow[key]
|
|
||||||
items = append(items[:i], items[i+1:]...)
|
|
||||||
t.itemsByFlow[key] = items
|
|
||||||
}
|
|
||||||
|
|
||||||
// tcpGROItem represents bookkeeping data for a TCP packet during the lifetime
|
|
||||||
// of a GRO evaluation across a vector of packets.
|
|
||||||
type tcpGROItem struct {
|
|
||||||
key flowKey
|
|
||||||
sentSeq uint32 // the sequence number
|
|
||||||
bufsIndex uint16 // the index into the original bufs slice
|
|
||||||
numMerged uint16 // the number of packets merged into this item
|
|
||||||
gsoSize uint16 // payload size
|
|
||||||
iphLen uint8 // ip header len
|
|
||||||
tcphLen uint8 // tcp header len
|
|
||||||
pshSet bool // psh flag is set
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tcpGROTable) newItems() []tcpGROItem {
|
|
||||||
var items []tcpGROItem
|
|
||||||
items, t.itemsPool = t.itemsPool[len(t.itemsPool)-1], t.itemsPool[:len(t.itemsPool)-1]
|
|
||||||
return items
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *tcpGROTable) reset() {
|
|
||||||
for k, items := range t.itemsByFlow {
|
|
||||||
items = items[:0]
|
|
||||||
t.itemsPool = append(t.itemsPool, items)
|
|
||||||
delete(t.itemsByFlow, k)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// canCoalesce represents the outcome of checking if two TCP packets are
|
|
||||||
// candidates for coalescing.
|
|
||||||
type canCoalesce int
|
|
||||||
|
|
||||||
const (
|
|
||||||
coalescePrepend canCoalesce = -1
|
|
||||||
coalesceUnavailable canCoalesce = 0
|
|
||||||
coalesceAppend canCoalesce = 1
|
|
||||||
)
|
|
||||||
|
|
||||||
// tcpPacketsCanCoalesce evaluates if pkt can be coalesced with the packet
|
|
||||||
// described by item. This function makes considerations that match the kernel's
|
|
||||||
// GRO self tests, which can be found in tools/testing/selftests/net/gro.c.
|
|
||||||
func tcpPacketsCanCoalesce(pkt []byte, iphLen, tcphLen uint8, seq uint32, pshSet bool, gsoSize uint16, item tcpGROItem, bufs [][]byte, bufsOffset int) canCoalesce {
|
|
||||||
pktTarget := bufs[item.bufsIndex][bufsOffset:]
|
|
||||||
if tcphLen != item.tcphLen {
|
|
||||||
// cannot coalesce with unequal tcp options len
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
if tcphLen > 20 {
|
|
||||||
if !bytes.Equal(pkt[iphLen+20:iphLen+tcphLen], pktTarget[item.iphLen+20:iphLen+tcphLen]) {
|
|
||||||
// cannot coalesce with unequal tcp options
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if pkt[0]>>4 == 6 {
|
|
||||||
if pkt[0] != pktTarget[0] || pkt[1]>>4 != pktTarget[1]>>4 {
|
|
||||||
// cannot coalesce with unequal Traffic class values
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
if pkt[7] != pktTarget[7] {
|
|
||||||
// cannot coalesce with unequal Hop limit values
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if pkt[1] != pktTarget[1] {
|
|
||||||
// cannot coalesce with unequal ToS values
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
if pkt[6]>>5 != pktTarget[6]>>5 {
|
|
||||||
// cannot coalesce with unequal DF or reserved bits. MF is checked
|
|
||||||
// further up the stack.
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
if pkt[8] != pktTarget[8] {
|
|
||||||
// cannot coalesce with unequal TTL values
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// seq adjacency
|
|
||||||
lhsLen := item.gsoSize
|
|
||||||
lhsLen += item.numMerged * item.gsoSize
|
|
||||||
if seq == item.sentSeq+uint32(lhsLen) { // pkt aligns following item from a seq num perspective
|
|
||||||
if item.pshSet {
|
|
||||||
// We cannot append to a segment that has the PSH flag set, PSH
|
|
||||||
// can only be set on the final segment in a reassembled group.
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
if len(pktTarget[iphLen+tcphLen:])%int(item.gsoSize) != 0 {
|
|
||||||
// A smaller than gsoSize packet has been appended previously.
|
|
||||||
// Nothing can come after a smaller packet on the end.
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
if gsoSize > item.gsoSize {
|
|
||||||
// We cannot have a larger packet following a smaller one.
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
return coalesceAppend
|
|
||||||
} else if seq+uint32(gsoSize) == item.sentSeq { // pkt aligns in front of item from a seq num perspective
|
|
||||||
if pshSet {
|
|
||||||
// We cannot prepend with a segment that has the PSH flag set, PSH
|
|
||||||
// can only be set on the final segment in a reassembled group.
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
if gsoSize < item.gsoSize {
|
|
||||||
// We cannot have a larger packet following a smaller one.
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
if gsoSize > item.gsoSize && item.numMerged > 0 {
|
|
||||||
// There's at least one previous merge, and we're larger than all
|
|
||||||
// previous. This would put multiple smaller packets on the end.
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
return coalescePrepend
|
|
||||||
}
|
|
||||||
return coalesceUnavailable
|
|
||||||
}
|
|
||||||
|
|
||||||
func tcpChecksumValid(pkt []byte, iphLen uint8, isV6 bool) bool {
|
|
||||||
srcAddrAt := ipv4SrcAddrOffset
|
|
||||||
addrSize := 4
|
|
||||||
if isV6 {
|
|
||||||
srcAddrAt = ipv6SrcAddrOffset
|
|
||||||
addrSize = 16
|
|
||||||
}
|
|
||||||
tcpTotalLen := uint16(len(pkt) - int(iphLen))
|
|
||||||
tcpCSumNoFold := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, pkt[srcAddrAt:srcAddrAt+addrSize], pkt[srcAddrAt+addrSize:srcAddrAt+addrSize*2], tcpTotalLen)
|
|
||||||
return ^checksum(pkt[iphLen:], tcpCSumNoFold) == 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// coalesceResult represents the result of attempting to coalesce two TCP
|
|
||||||
// packets.
|
|
||||||
type coalesceResult int
|
|
||||||
|
|
||||||
const (
|
|
||||||
coalesceInsufficientCap coalesceResult = 0
|
|
||||||
coalescePSHEnding coalesceResult = 1
|
|
||||||
coalesceItemInvalidCSum coalesceResult = 2
|
|
||||||
coalescePktInvalidCSum coalesceResult = 3
|
|
||||||
coalesceSuccess coalesceResult = 4
|
|
||||||
)
|
|
||||||
|
|
||||||
// coalesceTCPPackets attempts to coalesce pkt with the packet described by
|
|
||||||
// item, returning the outcome. This function may swap bufs elements in the
|
|
||||||
// event of a prepend as item's bufs index is already being tracked for writing
|
|
||||||
// to a Device.
|
|
||||||
func coalesceTCPPackets(mode canCoalesce, pkt []byte, pktBuffsIndex int, gsoSize uint16, seq uint32, pshSet bool, item *tcpGROItem, bufs [][]byte, bufsOffset int, isV6 bool) coalesceResult {
|
|
||||||
var pktHead []byte // the packet that will end up at the front
|
|
||||||
headersLen := item.iphLen + item.tcphLen
|
|
||||||
coalescedLen := len(bufs[item.bufsIndex][bufsOffset:]) + len(pkt) - int(headersLen)
|
|
||||||
|
|
||||||
// Copy data
|
|
||||||
if mode == coalescePrepend {
|
|
||||||
pktHead = pkt
|
|
||||||
if cap(pkt)-bufsOffset < coalescedLen {
|
|
||||||
// We don't want to allocate a new underlying array if capacity is
|
|
||||||
// too small.
|
|
||||||
return coalesceInsufficientCap
|
|
||||||
}
|
|
||||||
if pshSet {
|
|
||||||
return coalescePSHEnding
|
|
||||||
}
|
|
||||||
if item.numMerged == 0 {
|
|
||||||
if !tcpChecksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, isV6) {
|
|
||||||
return coalesceItemInvalidCSum
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !tcpChecksumValid(pkt, item.iphLen, isV6) {
|
|
||||||
return coalescePktInvalidCSum
|
|
||||||
}
|
|
||||||
item.sentSeq = seq
|
|
||||||
extendBy := coalescedLen - len(pktHead)
|
|
||||||
bufs[pktBuffsIndex] = append(bufs[pktBuffsIndex], make([]byte, extendBy)...)
|
|
||||||
copy(bufs[pktBuffsIndex][bufsOffset+len(pkt):], bufs[item.bufsIndex][bufsOffset+int(headersLen):])
|
|
||||||
// Flip the slice headers in bufs as part of prepend. The index of item
|
|
||||||
// is already being tracked for writing.
|
|
||||||
bufs[item.bufsIndex], bufs[pktBuffsIndex] = bufs[pktBuffsIndex], bufs[item.bufsIndex]
|
|
||||||
} else {
|
|
||||||
pktHead = bufs[item.bufsIndex][bufsOffset:]
|
|
||||||
if cap(pktHead)-bufsOffset < coalescedLen {
|
|
||||||
// We don't want to allocate a new underlying array if capacity is
|
|
||||||
// too small.
|
|
||||||
return coalesceInsufficientCap
|
|
||||||
}
|
|
||||||
if item.numMerged == 0 {
|
|
||||||
if !tcpChecksumValid(bufs[item.bufsIndex][bufsOffset:], item.iphLen, isV6) {
|
|
||||||
return coalesceItemInvalidCSum
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !tcpChecksumValid(pkt, item.iphLen, isV6) {
|
|
||||||
return coalescePktInvalidCSum
|
|
||||||
}
|
|
||||||
if pshSet {
|
|
||||||
// We are appending a segment with PSH set.
|
|
||||||
item.pshSet = pshSet
|
|
||||||
pktHead[item.iphLen+tcpFlagsOffset] |= tcpFlagPSH
|
|
||||||
}
|
|
||||||
extendBy := len(pkt) - int(headersLen)
|
|
||||||
bufs[item.bufsIndex] = append(bufs[item.bufsIndex], make([]byte, extendBy)...)
|
|
||||||
copy(bufs[item.bufsIndex][bufsOffset+len(pktHead):], pkt[headersLen:])
|
|
||||||
}
|
|
||||||
|
|
||||||
if gsoSize > item.gsoSize {
|
|
||||||
item.gsoSize = gsoSize
|
|
||||||
}
|
|
||||||
hdr := virtioNetHdr{
|
|
||||||
flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, // this turns into CHECKSUM_PARTIAL in the skb
|
|
||||||
hdrLen: uint16(headersLen),
|
|
||||||
gsoSize: uint16(item.gsoSize),
|
|
||||||
csumStart: uint16(item.iphLen),
|
|
||||||
csumOffset: 16,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recalculate the total len (IPv4) or payload len (IPv6). Recalculate the
|
|
||||||
// (IPv4) header checksum.
|
|
||||||
if isV6 {
|
|
||||||
hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV6
|
|
||||||
binary.BigEndian.PutUint16(pktHead[4:], uint16(coalescedLen)-uint16(item.iphLen)) // set new payload len
|
|
||||||
} else {
|
|
||||||
hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV4
|
|
||||||
pktHead[10], pktHead[11] = 0, 0 // clear checksum field
|
|
||||||
binary.BigEndian.PutUint16(pktHead[2:], uint16(coalescedLen)) // set new total length
|
|
||||||
iphCSum := ^checksum(pktHead[:item.iphLen], 0) // compute checksum
|
|
||||||
binary.BigEndian.PutUint16(pktHead[10:], iphCSum) // set checksum field
|
|
||||||
}
|
|
||||||
hdr.encode(bufs[item.bufsIndex][bufsOffset-virtioNetHdrLen:])
|
|
||||||
|
|
||||||
// Calculate the pseudo header checksum and place it at the TCP checksum
|
|
||||||
// offset. Downstream checksum offloading will combine this with computation
|
|
||||||
// of the tcp header and payload checksum.
|
|
||||||
addrLen := 4
|
|
||||||
addrOffset := ipv4SrcAddrOffset
|
|
||||||
if isV6 {
|
|
||||||
addrLen = 16
|
|
||||||
addrOffset = ipv6SrcAddrOffset
|
|
||||||
}
|
|
||||||
srcAddrAt := bufsOffset + addrOffset
|
|
||||||
srcAddr := bufs[item.bufsIndex][srcAddrAt : srcAddrAt+addrLen]
|
|
||||||
dstAddr := bufs[item.bufsIndex][srcAddrAt+addrLen : srcAddrAt+addrLen*2]
|
|
||||||
psum := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, srcAddr, dstAddr, uint16(coalescedLen-int(item.iphLen)))
|
|
||||||
binary.BigEndian.PutUint16(pktHead[hdr.csumStart+hdr.csumOffset:], checksum([]byte{}, psum))
|
|
||||||
|
|
||||||
item.numMerged++
|
|
||||||
return coalesceSuccess
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
|
||||||
ipv4FlagMoreFragments uint8 = 0x20
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
ipv4SrcAddrOffset = 12
|
|
||||||
ipv6SrcAddrOffset = 8
|
|
||||||
maxUint16 = 1<<16 - 1
|
|
||||||
)
|
|
||||||
|
|
||||||
// tcpGRO evaluates the TCP packet at pktI in bufs for coalescing with
|
|
||||||
// existing packets tracked in table. It will return false when pktI is not
|
|
||||||
// coalesced, otherwise true. This indicates to the caller if bufs[pktI]
|
|
||||||
// should be written to the Device.
|
|
||||||
func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool) (pktCoalesced bool) {
|
|
||||||
pkt := bufs[pktI][offset:]
|
|
||||||
if len(pkt) > maxUint16 {
|
|
||||||
// A valid IPv4 or IPv6 packet will never exceed this.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
iphLen := int((pkt[0] & 0x0F) * 4)
|
|
||||||
if isV6 {
|
|
||||||
iphLen = 40
|
|
||||||
ipv6HPayloadLen := int(binary.BigEndian.Uint16(pkt[4:]))
|
|
||||||
if ipv6HPayloadLen != len(pkt)-iphLen {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
totalLen := int(binary.BigEndian.Uint16(pkt[2:]))
|
|
||||||
if totalLen != len(pkt) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(pkt) < iphLen {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
tcphLen := int((pkt[iphLen+12] >> 4) * 4)
|
|
||||||
if tcphLen < 20 || tcphLen > 60 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if len(pkt) < iphLen+tcphLen {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if !isV6 {
|
|
||||||
if pkt[6]&ipv4FlagMoreFragments != 0 || pkt[6]<<3 != 0 || pkt[7] != 0 {
|
|
||||||
// no GRO support for fragmented segments for now
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tcpFlags := pkt[iphLen+tcpFlagsOffset]
|
|
||||||
var pshSet bool
|
|
||||||
// not a candidate if any non-ACK flags (except PSH+ACK) are set
|
|
||||||
if tcpFlags != tcpFlagACK {
|
|
||||||
if pkt[iphLen+tcpFlagsOffset] != tcpFlagACK|tcpFlagPSH {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
pshSet = true
|
|
||||||
}
|
|
||||||
gsoSize := uint16(len(pkt) - tcphLen - iphLen)
|
|
||||||
// not a candidate if payload len is 0
|
|
||||||
if gsoSize < 1 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
seq := binary.BigEndian.Uint32(pkt[iphLen+4:])
|
|
||||||
srcAddrOffset := ipv4SrcAddrOffset
|
|
||||||
addrLen := 4
|
|
||||||
if isV6 {
|
|
||||||
srcAddrOffset = ipv6SrcAddrOffset
|
|
||||||
addrLen = 16
|
|
||||||
}
|
|
||||||
items, existing := table.lookupOrInsert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI)
|
|
||||||
if !existing {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for i := len(items) - 1; i >= 0; i-- {
|
|
||||||
// In the best case of packets arriving in order iterating in reverse is
|
|
||||||
// more efficient if there are multiple items for a given flow. This
|
|
||||||
// also enables a natural table.deleteAt() in the
|
|
||||||
// coalesceItemInvalidCSum case without the need for index tracking.
|
|
||||||
// This algorithm makes a best effort to coalesce in the event of
|
|
||||||
// unordered packets, where pkt may land anywhere in items from a
|
|
||||||
// sequence number perspective, however once an item is inserted into
|
|
||||||
// the table it is never compared across other items later.
|
|
||||||
item := items[i]
|
|
||||||
can := tcpPacketsCanCoalesce(pkt, uint8(iphLen), uint8(tcphLen), seq, pshSet, gsoSize, item, bufs, offset)
|
|
||||||
if can != coalesceUnavailable {
|
|
||||||
result := coalesceTCPPackets(can, pkt, pktI, gsoSize, seq, pshSet, &item, bufs, offset, isV6)
|
|
||||||
switch result {
|
|
||||||
case coalesceSuccess:
|
|
||||||
table.updateAt(item, i)
|
|
||||||
return true
|
|
||||||
case coalesceItemInvalidCSum:
|
|
||||||
// delete the item with an invalid csum
|
|
||||||
table.deleteAt(item.key, i)
|
|
||||||
case coalescePktInvalidCSum:
|
|
||||||
// no point in inserting an item that we can't coalesce
|
|
||||||
return false
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// failed to coalesce with any other packets; store the item in the flow
|
|
||||||
table.insert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI)
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func isTCP4NoIPOptions(b []byte) bool {
|
|
||||||
if len(b) < 40 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if b[0]>>4 != 4 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if b[0]&0x0F != 5 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if b[9] != unix.IPPROTO_TCP {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func isTCP6NoEH(b []byte) bool {
|
|
||||||
if len(b) < 60 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if b[0]>>4 != 6 {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if b[6] != unix.IPPROTO_TCP {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleGRO evaluates bufs for GRO, and writes the indices of the resulting
|
|
||||||
// packets into toWrite. toWrite, tcp4Table, and tcp6Table should initially be
|
|
||||||
// empty (but non-nil), and are passed in to save allocs as the caller may reset
|
|
||||||
// and recycle them across vectors of packets.
|
|
||||||
func handleGRO(bufs [][]byte, offset int, tcp4Table, tcp6Table *tcpGROTable, toWrite *[]int) error {
|
|
||||||
for i := range bufs {
|
|
||||||
if offset < virtioNetHdrLen || offset > len(bufs[i])-1 {
|
|
||||||
return errors.New("invalid offset")
|
|
||||||
}
|
|
||||||
var coalesced bool
|
|
||||||
switch {
|
|
||||||
case isTCP4NoIPOptions(bufs[i][offset:]): // ipv4 packets w/IP options do not coalesce
|
|
||||||
coalesced = tcpGRO(bufs, offset, i, tcp4Table, false)
|
|
||||||
case isTCP6NoEH(bufs[i][offset:]): // ipv6 packets w/extension headers do not coalesce
|
|
||||||
coalesced = tcpGRO(bufs, offset, i, tcp6Table, true)
|
|
||||||
}
|
|
||||||
if !coalesced {
|
|
||||||
hdr := virtioNetHdr{}
|
|
||||||
err := hdr.encode(bufs[i][offset-virtioNetHdrLen:])
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
*toWrite = append(*toWrite, i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// tcpTSO splits packets from in into outBuffs, writing the size of each
|
|
||||||
// element into sizes. It returns the number of buffers populated, and/or an
|
|
||||||
// error.
|
|
||||||
func tcpTSO(in []byte, hdr virtioNetHdr, outBuffs [][]byte, sizes []int, outOffset int) (int, error) {
|
|
||||||
iphLen := int(hdr.csumStart)
|
|
||||||
srcAddrOffset := ipv6SrcAddrOffset
|
|
||||||
addrLen := 16
|
|
||||||
if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV4 {
|
|
||||||
in[10], in[11] = 0, 0 // clear ipv4 header checksum
|
|
||||||
srcAddrOffset = ipv4SrcAddrOffset
|
|
||||||
addrLen = 4
|
|
||||||
}
|
|
||||||
tcpCSumAt := int(hdr.csumStart + hdr.csumOffset)
|
|
||||||
in[tcpCSumAt], in[tcpCSumAt+1] = 0, 0 // clear tcp checksum
|
|
||||||
firstTCPSeqNum := binary.BigEndian.Uint32(in[hdr.csumStart+4:])
|
|
||||||
nextSegmentDataAt := int(hdr.hdrLen)
|
|
||||||
i := 0
|
|
||||||
for ; nextSegmentDataAt < len(in); i++ {
|
|
||||||
if i == len(outBuffs) {
|
|
||||||
return i - 1, ErrTooManySegments
|
|
||||||
}
|
|
||||||
nextSegmentEnd := nextSegmentDataAt + int(hdr.gsoSize)
|
|
||||||
if nextSegmentEnd > len(in) {
|
|
||||||
nextSegmentEnd = len(in)
|
|
||||||
}
|
|
||||||
segmentDataLen := nextSegmentEnd - nextSegmentDataAt
|
|
||||||
totalLen := int(hdr.hdrLen) + segmentDataLen
|
|
||||||
sizes[i] = totalLen
|
|
||||||
out := outBuffs[i][outOffset:]
|
|
||||||
|
|
||||||
copy(out, in[:iphLen])
|
|
||||||
if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_TCPV4 {
|
|
||||||
// For IPv4 we are responsible for incrementing the ID field,
|
|
||||||
// updating the total len field, and recalculating the header
|
|
||||||
// checksum.
|
|
||||||
if i > 0 {
|
|
||||||
id := binary.BigEndian.Uint16(out[4:])
|
|
||||||
id += uint16(i)
|
|
||||||
binary.BigEndian.PutUint16(out[4:], id)
|
|
||||||
}
|
|
||||||
binary.BigEndian.PutUint16(out[2:], uint16(totalLen))
|
|
||||||
ipv4CSum := ^checksum(out[:iphLen], 0)
|
|
||||||
binary.BigEndian.PutUint16(out[10:], ipv4CSum)
|
|
||||||
} else {
|
|
||||||
// For IPv6 we are responsible for updating the payload length field.
|
|
||||||
binary.BigEndian.PutUint16(out[4:], uint16(totalLen-iphLen))
|
|
||||||
}
|
|
||||||
|
|
||||||
// TCP header
|
|
||||||
copy(out[hdr.csumStart:hdr.hdrLen], in[hdr.csumStart:hdr.hdrLen])
|
|
||||||
tcpSeq := firstTCPSeqNum + uint32(hdr.gsoSize*uint16(i))
|
|
||||||
binary.BigEndian.PutUint32(out[hdr.csumStart+4:], tcpSeq)
|
|
||||||
if nextSegmentEnd != len(in) {
|
|
||||||
// FIN and PSH should only be set on last segment
|
|
||||||
clearFlags := tcpFlagFIN | tcpFlagPSH
|
|
||||||
out[hdr.csumStart+tcpFlagsOffset] &^= clearFlags
|
|
||||||
}
|
|
||||||
|
|
||||||
// payload
|
|
||||||
copy(out[hdr.hdrLen:], in[nextSegmentDataAt:nextSegmentEnd])
|
|
||||||
|
|
||||||
// TCP checksum
|
|
||||||
tcpHLen := int(hdr.hdrLen - hdr.csumStart)
|
|
||||||
tcpLenForPseudo := uint16(tcpHLen + segmentDataLen)
|
|
||||||
tcpCSumNoFold := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, in[srcAddrOffset:srcAddrOffset+addrLen], in[srcAddrOffset+addrLen:srcAddrOffset+addrLen*2], tcpLenForPseudo)
|
|
||||||
tcpCSum := ^checksum(out[hdr.csumStart:totalLen], tcpCSumNoFold)
|
|
||||||
binary.BigEndian.PutUint16(out[hdr.csumStart+hdr.csumOffset:], tcpCSum)
|
|
||||||
|
|
||||||
nextSegmentDataAt += int(hdr.gsoSize)
|
|
||||||
}
|
|
||||||
return i, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func gsoNoneChecksum(in []byte, cSumStart, cSumOffset uint16) error {
|
|
||||||
cSumAt := cSumStart + cSumOffset
|
|
||||||
// The initial value at the checksum offset should be summed with the
|
|
||||||
// checksum we compute. This is typically the pseudo-header checksum.
|
|
||||||
initial := binary.BigEndian.Uint16(in[cSumAt:])
|
|
||||||
in[cSumAt], in[cSumAt+1] = 0, 0
|
|
||||||
binary.BigEndian.PutUint16(in[cSumAt:], ^checksum(in[cSumStart:], uint64(initial)))
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
// SPDX-License-Identifier: MIT
|
|
||||||
//
|
|
||||||
// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
|
|
||||||
package tun
|
|
||||||
|
|
||||||
import (
|
|
||||||
"os"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Event int
|
|
||||||
|
|
||||||
const (
|
|
||||||
EventUp = 1 << iota
|
|
||||||
EventDown
|
|
||||||
EventMTUUpdate
|
|
||||||
)
|
|
||||||
|
|
||||||
type Device interface {
|
|
||||||
// File returns the file descriptor of the device.
|
|
||||||
File() *os.File
|
|
||||||
|
|
||||||
// Read one or more packets from the Device (without any additional headers).
|
|
||||||
// On a successful read it returns the number of packets read, and sets
|
|
||||||
// packet lengths within the sizes slice. len(sizes) must be >= len(bufs).
|
|
||||||
// A nonzero offset can be used to instruct the Device on where to begin
|
|
||||||
// reading into each element of the bufs slice.
|
|
||||||
Read(bufs [][]byte, sizes []int, offset int) (n int, err error)
|
|
||||||
|
|
||||||
// Write one or more packets to the device (without any additional headers).
|
|
||||||
// On a successful write it returns the number of packets written. A nonzero
|
|
||||||
// offset can be used to instruct the Device on where to begin writing from
|
|
||||||
// each packet contained within the bufs slice.
|
|
||||||
Write(bufs [][]byte, offset int) (int, error)
|
|
||||||
|
|
||||||
// MTU returns the MTU of the Device.
|
|
||||||
MTU() (int, error)
|
|
||||||
|
|
||||||
// Name returns the current name of the Device.
|
|
||||||
Name() (string, error)
|
|
||||||
|
|
||||||
// Events returns a channel of type Event, which is fed Device events.
|
|
||||||
Events() <-chan Event
|
|
||||||
|
|
||||||
// Close stops the Device and closes the Event channel.
|
|
||||||
Close() error
|
|
||||||
|
|
||||||
// BatchSize returns the preferred/max number of packets that can be read or
|
|
||||||
// written in a single read/write call. BatchSize must not change over the
|
|
||||||
// lifetime of a Device.
|
|
||||||
BatchSize() int
|
|
||||||
}
|
|
||||||
@@ -1,664 +0,0 @@
|
|||||||
//go:build linux
|
|
||||||
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
//
|
|
||||||
// Copyright (C) 2017-2023 WireGuard LLC. All Rights Reserved.
|
|
||||||
|
|
||||||
package tun
|
|
||||||
|
|
||||||
/* Implementation of the TUN device interface for linux
|
|
||||||
*/
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"sync"
|
|
||||||
"syscall"
|
|
||||||
"time"
|
|
||||||
"unsafe"
|
|
||||||
|
|
||||||
wgconn "github.com/slackhq/nebula/wgstack/conn"
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
"golang.zx2c4.com/wireguard/rwcancel"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
cloneDevicePath = "/dev/net/tun"
|
|
||||||
ifReqSize = unix.IFNAMSIZ + 64
|
|
||||||
)
|
|
||||||
|
|
||||||
type NativeTun struct {
|
|
||||||
tunFile *os.File
|
|
||||||
index int32 // if index
|
|
||||||
errors chan error // async error handling
|
|
||||||
events chan Event // device related events
|
|
||||||
netlinkSock int
|
|
||||||
netlinkCancel *rwcancel.RWCancel
|
|
||||||
hackListenerClosed sync.Mutex
|
|
||||||
statusListenersShutdown chan struct{}
|
|
||||||
batchSize int
|
|
||||||
vnetHdr bool
|
|
||||||
|
|
||||||
closeOnce sync.Once
|
|
||||||
|
|
||||||
nameOnce sync.Once // guards calling initNameCache, which sets following fields
|
|
||||||
nameCache string // name of interface
|
|
||||||
nameErr error
|
|
||||||
|
|
||||||
readOpMu sync.Mutex // readOpMu guards readBuff
|
|
||||||
readBuff [virtioNetHdrLen + 65535]byte // if vnetHdr every read() is prefixed by virtioNetHdr
|
|
||||||
|
|
||||||
writeOpMu sync.Mutex // writeOpMu guards toWrite, tcp4GROTable, tcp6GROTable
|
|
||||||
toWrite []int
|
|
||||||
tcp4GROTable, tcp6GROTable *tcpGROTable
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) File() *os.File {
|
|
||||||
return tun.tunFile
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) routineHackListener() {
|
|
||||||
defer tun.hackListenerClosed.Unlock()
|
|
||||||
/* This is needed for the detection to work across network namespaces
|
|
||||||
* If you are reading this and know a better method, please get in touch.
|
|
||||||
*/
|
|
||||||
last := 0
|
|
||||||
const (
|
|
||||||
up = 1
|
|
||||||
down = 2
|
|
||||||
)
|
|
||||||
for {
|
|
||||||
sysconn, err := tun.tunFile.SyscallConn()
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
err2 := sysconn.Control(func(fd uintptr) {
|
|
||||||
_, err = unix.Write(int(fd), nil)
|
|
||||||
})
|
|
||||||
if err2 != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
switch err {
|
|
||||||
case unix.EINVAL:
|
|
||||||
if last != up {
|
|
||||||
// If the tunnel is up, it reports that write() is
|
|
||||||
// allowed but we provided invalid data.
|
|
||||||
tun.events <- EventUp
|
|
||||||
last = up
|
|
||||||
}
|
|
||||||
case unix.EIO:
|
|
||||||
if last != down {
|
|
||||||
// If the tunnel is down, it reports that no I/O
|
|
||||||
// is possible, without checking our provided data.
|
|
||||||
tun.events <- EventDown
|
|
||||||
last = down
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return
|
|
||||||
}
|
|
||||||
select {
|
|
||||||
case <-time.After(time.Second):
|
|
||||||
// nothing
|
|
||||||
case <-tun.statusListenersShutdown:
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func createNetlinkSocket() (int, error) {
|
|
||||||
sock, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW|unix.SOCK_CLOEXEC, unix.NETLINK_ROUTE)
|
|
||||||
if err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
saddr := &unix.SockaddrNetlink{
|
|
||||||
Family: unix.AF_NETLINK,
|
|
||||||
Groups: unix.RTMGRP_LINK | unix.RTMGRP_IPV4_IFADDR | unix.RTMGRP_IPV6_IFADDR,
|
|
||||||
}
|
|
||||||
err = unix.Bind(sock, saddr)
|
|
||||||
if err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
return sock, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) routineNetlinkListener() {
|
|
||||||
defer func() {
|
|
||||||
unix.Close(tun.netlinkSock)
|
|
||||||
tun.hackListenerClosed.Lock()
|
|
||||||
close(tun.events)
|
|
||||||
tun.netlinkCancel.Close()
|
|
||||||
}()
|
|
||||||
|
|
||||||
for msg := make([]byte, 1<<16); ; {
|
|
||||||
var err error
|
|
||||||
var msgn int
|
|
||||||
for {
|
|
||||||
msgn, _, _, _, err = unix.Recvmsg(tun.netlinkSock, msg[:], nil, 0)
|
|
||||||
if err == nil || !rwcancel.RetryAfterError(err) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if !tun.netlinkCancel.ReadyRead() {
|
|
||||||
tun.errors <- fmt.Errorf("netlink socket closed: %w", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
tun.errors <- fmt.Errorf("failed to receive netlink message: %w", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
select {
|
|
||||||
case <-tun.statusListenersShutdown:
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
|
|
||||||
wasEverUp := false
|
|
||||||
for remain := msg[:msgn]; len(remain) >= unix.SizeofNlMsghdr; {
|
|
||||||
|
|
||||||
hdr := *(*unix.NlMsghdr)(unsafe.Pointer(&remain[0]))
|
|
||||||
|
|
||||||
if int(hdr.Len) > len(remain) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
switch hdr.Type {
|
|
||||||
case unix.NLMSG_DONE:
|
|
||||||
remain = []byte{}
|
|
||||||
|
|
||||||
case unix.RTM_NEWLINK:
|
|
||||||
info := *(*unix.IfInfomsg)(unsafe.Pointer(&remain[unix.SizeofNlMsghdr]))
|
|
||||||
remain = remain[hdr.Len:]
|
|
||||||
|
|
||||||
if info.Index != tun.index {
|
|
||||||
// not our interface
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if info.Flags&unix.IFF_RUNNING != 0 {
|
|
||||||
tun.events <- EventUp
|
|
||||||
wasEverUp = true
|
|
||||||
}
|
|
||||||
|
|
||||||
if info.Flags&unix.IFF_RUNNING == 0 {
|
|
||||||
// Don't emit EventDown before we've ever emitted EventUp.
|
|
||||||
// This avoids a startup race with HackListener, which
|
|
||||||
// might detect Up before we have finished reporting Down.
|
|
||||||
if wasEverUp {
|
|
||||||
tun.events <- EventDown
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tun.events <- EventMTUUpdate
|
|
||||||
|
|
||||||
default:
|
|
||||||
remain = remain[hdr.Len:]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func getIFIndex(name string) (int32, error) {
|
|
||||||
fd, err := unix.Socket(
|
|
||||||
unix.AF_INET,
|
|
||||||
unix.SOCK_DGRAM|unix.SOCK_CLOEXEC,
|
|
||||||
0,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
defer unix.Close(fd)
|
|
||||||
|
|
||||||
var ifr [ifReqSize]byte
|
|
||||||
copy(ifr[:], name)
|
|
||||||
_, _, errno := unix.Syscall(
|
|
||||||
unix.SYS_IOCTL,
|
|
||||||
uintptr(fd),
|
|
||||||
uintptr(unix.SIOCGIFINDEX),
|
|
||||||
uintptr(unsafe.Pointer(&ifr[0])),
|
|
||||||
)
|
|
||||||
|
|
||||||
if errno != 0 {
|
|
||||||
return 0, errno
|
|
||||||
}
|
|
||||||
|
|
||||||
return *(*int32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) setMTU(n int) error {
|
|
||||||
name, err := tun.Name()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// open datagram socket
|
|
||||||
fd, err := unix.Socket(
|
|
||||||
unix.AF_INET,
|
|
||||||
unix.SOCK_DGRAM|unix.SOCK_CLOEXEC,
|
|
||||||
0,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer unix.Close(fd)
|
|
||||||
|
|
||||||
var ifr [ifReqSize]byte
|
|
||||||
copy(ifr[:], name)
|
|
||||||
*(*uint32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ])) = uint32(n)
|
|
||||||
|
|
||||||
_, _, errno := unix.Syscall(
|
|
||||||
unix.SYS_IOCTL,
|
|
||||||
uintptr(fd),
|
|
||||||
uintptr(unix.SIOCSIFMTU),
|
|
||||||
uintptr(unsafe.Pointer(&ifr[0])),
|
|
||||||
)
|
|
||||||
|
|
||||||
if errno != 0 {
|
|
||||||
return errno
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) routineNetlinkRead() {
|
|
||||||
defer func() {
|
|
||||||
unix.Close(tun.netlinkSock)
|
|
||||||
tun.hackListenerClosed.Lock()
|
|
||||||
close(tun.events)
|
|
||||||
tun.netlinkCancel.Close()
|
|
||||||
}()
|
|
||||||
|
|
||||||
for msg := make([]byte, 1<<16); ; {
|
|
||||||
var err error
|
|
||||||
var msgn int
|
|
||||||
for {
|
|
||||||
msgn, _, _, _, err = unix.Recvmsg(tun.netlinkSock, msg[:], nil, 0)
|
|
||||||
if err == nil || !rwcancel.RetryAfterError(err) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if !tun.netlinkCancel.ReadyRead() {
|
|
||||||
tun.errors <- fmt.Errorf("netlink socket closed: %w", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
tun.errors <- fmt.Errorf("failed to receive netlink message: %w", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
wasEverUp := false
|
|
||||||
for remain := msg[:msgn]; len(remain) >= unix.SizeofNlMsghdr; {
|
|
||||||
|
|
||||||
hdr := *(*unix.NlMsghdr)(unsafe.Pointer(&remain[0]))
|
|
||||||
|
|
||||||
if int(hdr.Len) > len(remain) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
switch hdr.Type {
|
|
||||||
case unix.NLMSG_DONE:
|
|
||||||
remain = []byte{}
|
|
||||||
|
|
||||||
case unix.RTM_NEWLINK:
|
|
||||||
info := *(*unix.IfInfomsg)(unsafe.Pointer(&remain[unix.SizeofNlMsghdr]))
|
|
||||||
remain = remain[hdr.Len:]
|
|
||||||
|
|
||||||
if info.Index != tun.index {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if info.Flags&unix.IFF_RUNNING != 0 {
|
|
||||||
tun.events <- EventUp
|
|
||||||
wasEverUp = true
|
|
||||||
}
|
|
||||||
|
|
||||||
if info.Flags&unix.IFF_RUNNING == 0 {
|
|
||||||
if wasEverUp {
|
|
||||||
tun.events <- EventDown
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tun.events <- EventMTUUpdate
|
|
||||||
|
|
||||||
default:
|
|
||||||
remain = remain[hdr.Len:]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) routineNetlink() {
|
|
||||||
var err error
|
|
||||||
|
|
||||||
tun.netlinkSock, err = createNetlinkSocket()
|
|
||||||
if err != nil {
|
|
||||||
tun.errors <- fmt.Errorf("failed to create netlink socket: %w", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
tun.netlinkCancel, err = rwcancel.NewRWCancel(tun.netlinkSock)
|
|
||||||
if err != nil {
|
|
||||||
tun.errors <- fmt.Errorf("failed to create netlink cancel: %w", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
go tun.routineNetlinkListener()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) Close() error {
|
|
||||||
var err1, err2 error
|
|
||||||
tun.closeOnce.Do(func() {
|
|
||||||
if tun.statusListenersShutdown != nil {
|
|
||||||
close(tun.statusListenersShutdown)
|
|
||||||
if tun.netlinkCancel != nil {
|
|
||||||
err1 = tun.netlinkCancel.Cancel()
|
|
||||||
}
|
|
||||||
} else if tun.events != nil {
|
|
||||||
close(tun.events)
|
|
||||||
}
|
|
||||||
err2 = tun.tunFile.Close()
|
|
||||||
})
|
|
||||||
if err1 != nil {
|
|
||||||
return err1
|
|
||||||
}
|
|
||||||
return err2
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) BatchSize() int {
|
|
||||||
return tun.batchSize
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
|
||||||
// TODO: support TSO with ECN bits
|
|
||||||
tunOffloads = unix.TUN_F_CSUM | unix.TUN_F_TSO4 | unix.TUN_F_TSO6
|
|
||||||
)
|
|
||||||
|
|
||||||
func (tun *NativeTun) initFromFlags(name string) error {
|
|
||||||
sc, err := tun.tunFile.SyscallConn()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if e := sc.Control(func(fd uintptr) {
|
|
||||||
var (
|
|
||||||
ifr *unix.Ifreq
|
|
||||||
)
|
|
||||||
ifr, err = unix.NewIfreq(name)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
err = unix.IoctlIfreq(int(fd), unix.TUNGETIFF, ifr)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
got := ifr.Uint16()
|
|
||||||
if got&unix.IFF_VNET_HDR != 0 {
|
|
||||||
err = unix.IoctlSetInt(int(fd), unix.TUNSETOFFLOAD, tunOffloads)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
tun.vnetHdr = true
|
|
||||||
tun.batchSize = wgconn.IdealBatchSize
|
|
||||||
} else {
|
|
||||||
tun.batchSize = 1
|
|
||||||
}
|
|
||||||
}); e != nil {
|
|
||||||
return e
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// CreateTUN creates a Device with the provided name and MTU.
|
|
||||||
func CreateTUN(name string, mtu int) (Device, error) {
|
|
||||||
nfd, err := unix.Open(cloneDevicePath, unix.O_RDWR|unix.O_CLOEXEC, 0)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("CreateTUN(%q) failed; %s does not exist", name, cloneDevicePath)
|
|
||||||
}
|
|
||||||
fd := os.NewFile(uintptr(nfd), cloneDevicePath)
|
|
||||||
tun, err := CreateTUNFromFile(fd, mtu)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if name != "tun" {
|
|
||||||
if err := tun.(*NativeTun).initFromFlags(name); err != nil {
|
|
||||||
tun.Close()
|
|
||||||
return nil, fmt.Errorf("CreateTUN(%q) failed to set flags: %w", name, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return tun, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// CreateTUNFromFile creates a Device from an os.File with the provided MTU.
|
|
||||||
func CreateTUNFromFile(file *os.File, mtu int) (Device, error) {
|
|
||||||
tun := &NativeTun{
|
|
||||||
tunFile: file,
|
|
||||||
errors: make(chan error, 5),
|
|
||||||
events: make(chan Event, 5),
|
|
||||||
}
|
|
||||||
|
|
||||||
name, err := tun.Name()
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to determine TUN name: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := tun.initFromFlags(name); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to query TUN flags: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if tun.batchSize == 0 {
|
|
||||||
tun.batchSize = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
tun.index, err = getIFIndex(name)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to get TUN index: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err = tun.setMTU(mtu); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to set MTU: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
tun.statusListenersShutdown = make(chan struct{})
|
|
||||||
go tun.routineNetlink()
|
|
||||||
|
|
||||||
if tun.batchSize == 0 {
|
|
||||||
tun.batchSize = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
tun.tcp4GROTable = newTCPGROTable()
|
|
||||||
tun.tcp6GROTable = newTCPGROTable()
|
|
||||||
|
|
||||||
return tun, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) Name() (string, error) {
|
|
||||||
tun.nameOnce.Do(tun.initNameCache)
|
|
||||||
return tun.nameCache, tun.nameErr
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) initNameCache() {
|
|
||||||
sysconn, err := tun.tunFile.SyscallConn()
|
|
||||||
if err != nil {
|
|
||||||
tun.nameErr = err
|
|
||||||
return
|
|
||||||
}
|
|
||||||
err = sysconn.Control(func(fd uintptr) {
|
|
||||||
var ifr [ifReqSize]byte
|
|
||||||
_, _, errno := unix.Syscall(
|
|
||||||
unix.SYS_IOCTL,
|
|
||||||
fd,
|
|
||||||
uintptr(unix.TUNGETIFF),
|
|
||||||
uintptr(unsafe.Pointer(&ifr[0])),
|
|
||||||
)
|
|
||||||
if errno != 0 {
|
|
||||||
tun.nameErr = errno
|
|
||||||
return
|
|
||||||
}
|
|
||||||
tun.nameCache = unix.ByteSliceToString(ifr[:])
|
|
||||||
})
|
|
||||||
if err != nil && tun.nameErr == nil {
|
|
||||||
tun.nameErr = err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) MTU() (int, error) {
|
|
||||||
name, err := tun.Name()
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// open datagram socket
|
|
||||||
fd, err := unix.Socket(
|
|
||||||
unix.AF_INET,
|
|
||||||
unix.SOCK_DGRAM|unix.SOCK_CLOEXEC,
|
|
||||||
0,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
defer unix.Close(fd)
|
|
||||||
|
|
||||||
var ifr [ifReqSize]byte
|
|
||||||
copy(ifr[:], name)
|
|
||||||
|
|
||||||
_, _, errno := unix.Syscall(
|
|
||||||
unix.SYS_IOCTL,
|
|
||||||
uintptr(fd),
|
|
||||||
uintptr(unix.SIOCGIFMTU),
|
|
||||||
uintptr(unsafe.Pointer(&ifr[0])),
|
|
||||||
)
|
|
||||||
|
|
||||||
if errno != 0 {
|
|
||||||
return 0, errno
|
|
||||||
}
|
|
||||||
|
|
||||||
return int(*(*uint32)(unsafe.Pointer(&ifr[unix.IFNAMSIZ]))), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) Events() <-chan Event {
|
|
||||||
return tun.events
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) Write(bufs [][]byte, offset int) (int, error) {
|
|
||||||
tun.writeOpMu.Lock()
|
|
||||||
defer func() {
|
|
||||||
tun.tcp4GROTable.reset()
|
|
||||||
tun.tcp6GROTable.reset()
|
|
||||||
tun.writeOpMu.Unlock()
|
|
||||||
}()
|
|
||||||
var (
|
|
||||||
errs error
|
|
||||||
total int
|
|
||||||
)
|
|
||||||
tun.toWrite = tun.toWrite[:0]
|
|
||||||
if tun.vnetHdr {
|
|
||||||
err := handleGRO(bufs, offset, tun.tcp4GROTable, tun.tcp6GROTable, &tun.toWrite)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
offset -= virtioNetHdrLen
|
|
||||||
} else {
|
|
||||||
for i := range bufs {
|
|
||||||
tun.toWrite = append(tun.toWrite, i)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, bufsI := range tun.toWrite {
|
|
||||||
n, err := tun.tunFile.Write(bufs[bufsI][offset:])
|
|
||||||
if errors.Is(err, syscall.EBADFD) {
|
|
||||||
return total, os.ErrClosed
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
errs = errors.Join(errs, err)
|
|
||||||
} else {
|
|
||||||
total += n
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return total, errs
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleVirtioRead splits in into bufs, leaving offset bytes at the front of
|
|
||||||
// each buffer. It mutates sizes to reflect the size of each element of bufs,
|
|
||||||
// and returns the number of packets read.
|
|
||||||
func handleVirtioRead(in []byte, bufs [][]byte, sizes []int, offset int) (int, error) {
|
|
||||||
var hdr virtioNetHdr
|
|
||||||
if err := hdr.decode(in); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
in = in[virtioNetHdrLen:]
|
|
||||||
if hdr.gsoType == unix.VIRTIO_NET_HDR_GSO_NONE {
|
|
||||||
if hdr.flags&unix.VIRTIO_NET_HDR_F_NEEDS_CSUM != 0 {
|
|
||||||
if err := gsoNoneChecksum(in, hdr.csumStart, hdr.csumOffset); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(in) > len(bufs[0][offset:]) {
|
|
||||||
return 0, fmt.Errorf("read len %d overflows bufs element len %d", len(in), len(bufs[0][offset:]))
|
|
||||||
}
|
|
||||||
n := copy(bufs[0][offset:], in)
|
|
||||||
sizes[0] = n
|
|
||||||
return 1, nil
|
|
||||||
}
|
|
||||||
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 && hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 {
|
|
||||||
return 0, fmt.Errorf("unsupported virtio GSO type: %d", hdr.gsoType)
|
|
||||||
}
|
|
||||||
|
|
||||||
ipVersion := in[0] >> 4
|
|
||||||
switch ipVersion {
|
|
||||||
case 4:
|
|
||||||
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV4 {
|
|
||||||
return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType)
|
|
||||||
}
|
|
||||||
case 6:
|
|
||||||
if hdr.gsoType != unix.VIRTIO_NET_HDR_GSO_TCPV6 {
|
|
||||||
return 0, fmt.Errorf("ip header version: %d, GSO type: %d", ipVersion, hdr.gsoType)
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return 0, fmt.Errorf("invalid ip header version: %d", ipVersion)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(in) <= int(hdr.csumStart+12) {
|
|
||||||
return 0, errors.New("packet is too short")
|
|
||||||
}
|
|
||||||
tcpHLen := uint16(in[hdr.csumStart+12] >> 4 * 4)
|
|
||||||
if tcpHLen < 20 || tcpHLen > 60 {
|
|
||||||
return 0, fmt.Errorf("tcp header len is invalid: %d", tcpHLen)
|
|
||||||
}
|
|
||||||
hdr.hdrLen = hdr.csumStart + tcpHLen
|
|
||||||
if len(in) < int(hdr.hdrLen) {
|
|
||||||
return 0, fmt.Errorf("length of packet (%d) < virtioNetHdr.hdrLen (%d)", len(in), hdr.hdrLen)
|
|
||||||
}
|
|
||||||
if hdr.hdrLen < hdr.csumStart {
|
|
||||||
return 0, fmt.Errorf("virtioNetHdr.hdrLen (%d) < virtioNetHdr.csumStart (%d)", hdr.hdrLen, hdr.csumStart)
|
|
||||||
}
|
|
||||||
cSumAt := int(hdr.csumStart + hdr.csumOffset)
|
|
||||||
if cSumAt+1 >= len(in) {
|
|
||||||
return 0, fmt.Errorf("end of checksum offset (%d) exceeds packet length (%d)", cSumAt+1, len(in))
|
|
||||||
}
|
|
||||||
|
|
||||||
return tcpTSO(in, hdr, bufs, sizes, offset)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (tun *NativeTun) Read(bufs [][]byte, sizes []int, offset int) (int, error) {
|
|
||||||
tun.readOpMu.Lock()
|
|
||||||
defer tun.readOpMu.Unlock()
|
|
||||||
select {
|
|
||||||
case err := <-tun.errors:
|
|
||||||
return 0, err
|
|
||||||
default:
|
|
||||||
readInto := bufs[0][offset:]
|
|
||||||
if tun.vnetHdr {
|
|
||||||
readInto = tun.readBuff[:]
|
|
||||||
}
|
|
||||||
n, err := tun.tunFile.Read(readInto)
|
|
||||||
if errors.Is(err, syscall.EBADFD) {
|
|
||||||
err = os.ErrClosed
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
if tun.vnetHdr {
|
|
||||||
return handleVirtioRead(readInto[:n], bufs, sizes, offset)
|
|
||||||
}
|
|
||||||
sizes[0] = n
|
|
||||||
return 1, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user