mirror of
https://github.com/slackhq/nebula.git
synced 2025-11-10 12:33:57 +01:00
The goal of this work is to send packets between two hosts using more than one
5-tuple. When running on networks like AWS where the underlying network driver
and overlay fabric makes routing, load balancing, and failover decisions based
on the flow hash, this enables more than one flow between pairs of hosts.
Multiport spreads outgoing UDP packets across multiple UDP send ports,
which allows nebula to work around any issues on the underlay network.
Some example issues this could work around:
- UDP rate limits on a per flow basis.
- Partial underlay network failure in which some flows work and some don't
Agreement is done during the handshake to decide if multiport mode will
be used for a given tunnel (one side must have tx_enabled set, the other
side must have rx_enabled set)
NOTE: you cannot use multiport on a host if you are relying on UDP hole
punching to get through a NAT or firewall.
NOTE: Linux only (uses raw sockets to send). Also currently only works
with IPv4 underlay network remotes.
This is implemented by opening a raw socket and sending packets with
a source port that is based on a hash of the overlay source/destiation
port. For ICMP and Nebula metadata packets, we use a random source port.
Example configuration:
multiport:
# This host support sending via multiple UDP ports.
tx_enabled: false
# This host supports receiving packets sent from multiple UDP ports.
rx_enabled: false
# How many UDP ports to use when sending. The lowest source port will be
# listen.port and go up to (but not including) listen.port + tx_ports.
tx_ports: 100
# NOTE: All of your hosts must be running a version of Nebula that supports
# multiport if you want to enable this feature. Older versions of Nebula
# will be confused by these multiport handshakes.
#
# If handshakes are not getting a response, attempt to transmit handshakes
# using random UDP source ports (to get around partial underlay network
# failures).
tx_handshake: false
# How many unresponded handshakes we should send before we attempt to
# send multiport handshakes.
tx_handshake_delay: 2
386 lines
13 KiB
Go
386 lines
13 KiB
Go
package nebula
|
|
|
|
import (
|
|
"sync/atomic"
|
|
|
|
"github.com/flynn/noise"
|
|
"github.com/sirupsen/logrus"
|
|
"github.com/slackhq/nebula/firewall"
|
|
"github.com/slackhq/nebula/header"
|
|
"github.com/slackhq/nebula/iputil"
|
|
"github.com/slackhq/nebula/udp"
|
|
)
|
|
|
|
func (f *Interface) consumeInsidePacket(packet []byte, fwPacket *firewall.Packet, nb, out []byte, q int, localCache firewall.ConntrackCache) {
|
|
err := newPacket(packet, false, fwPacket)
|
|
if err != nil {
|
|
if f.l.Level >= logrus.DebugLevel {
|
|
f.l.WithField("packet", packet).Debugf("Error while validating outbound packet: %s", err)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Ignore local broadcast packets
|
|
if f.dropLocalBroadcast && fwPacket.RemoteIP == f.localBroadcast {
|
|
return
|
|
}
|
|
|
|
if fwPacket.RemoteIP == f.myVpnIp {
|
|
// Immediately forward packets from self to self.
|
|
// This should only happen on Darwin-based hosts, which routes packets from
|
|
// the Nebula IP to the Nebula IP through the Nebula TUN device.
|
|
if immediatelyForwardToSelf {
|
|
_, err := f.readers[q].Write(packet)
|
|
if err != nil {
|
|
f.l.WithError(err).Error("Failed to forward to tun")
|
|
}
|
|
}
|
|
// Otherwise, drop. On linux, we should never see these packets - Linux
|
|
// routes packets from the nebula IP to the nebula IP through the loopback device.
|
|
return
|
|
}
|
|
|
|
// Ignore broadcast packets
|
|
if f.dropMulticast && isMulticast(fwPacket.RemoteIP) {
|
|
return
|
|
}
|
|
|
|
hostinfo := f.getOrHandshake(fwPacket.RemoteIP)
|
|
if hostinfo == nil {
|
|
if f.l.Level >= logrus.DebugLevel {
|
|
f.l.WithField("vpnIp", fwPacket.RemoteIP).
|
|
WithField("fwPacket", fwPacket).
|
|
Debugln("dropping outbound packet, vpnIp not in our CIDR or in unsafe routes")
|
|
}
|
|
return
|
|
}
|
|
ci := hostinfo.ConnectionState
|
|
|
|
if ci.ready == false {
|
|
// Because we might be sending stored packets, lock here to stop new things going to
|
|
// the packet queue.
|
|
ci.queueLock.Lock()
|
|
if !ci.ready {
|
|
hostinfo.cachePacket(f.l, header.Message, 0, packet, f.sendMessageNow, f.cachedPacketMetrics)
|
|
ci.queueLock.Unlock()
|
|
return
|
|
}
|
|
ci.queueLock.Unlock()
|
|
}
|
|
|
|
dropReason := f.firewall.Drop(packet, *fwPacket, false, hostinfo, f.caPool, localCache)
|
|
if dropReason == nil {
|
|
f.sendNoMetrics(header.Message, 0, ci, hostinfo, nil, packet, nb, out, q, fwPacket)
|
|
|
|
} else if f.l.Level >= logrus.DebugLevel {
|
|
hostinfo.logger(f.l).
|
|
WithField("fwPacket", fwPacket).
|
|
WithField("reason", dropReason).
|
|
Debugln("dropping outbound packet")
|
|
}
|
|
}
|
|
|
|
func (f *Interface) Handshake(vpnIp iputil.VpnIp) {
|
|
f.getOrHandshake(vpnIp)
|
|
}
|
|
|
|
// getOrHandshake returns nil if the vpnIp is not routable
|
|
func (f *Interface) getOrHandshake(vpnIp iputil.VpnIp) *HostInfo {
|
|
if !ipMaskContains(f.lightHouse.myVpnIp, f.lightHouse.myVpnZeros, vpnIp) {
|
|
vpnIp = f.inside.RouteFor(vpnIp)
|
|
if vpnIp == 0 {
|
|
return nil
|
|
}
|
|
}
|
|
hostinfo, err := f.hostMap.PromoteBestQueryVpnIp(vpnIp, f)
|
|
|
|
//if err != nil || hostinfo.ConnectionState == nil {
|
|
if err != nil {
|
|
hostinfo, err = f.handshakeManager.pendingHostMap.QueryVpnIp(vpnIp)
|
|
if err != nil {
|
|
hostinfo = f.handshakeManager.AddVpnIp(vpnIp, f.initHostInfo)
|
|
}
|
|
}
|
|
ci := hostinfo.ConnectionState
|
|
|
|
if ci != nil && ci.eKey != nil && ci.ready {
|
|
return hostinfo
|
|
}
|
|
|
|
// Handshake is not ready, we need to grab the lock now before we start the handshake process
|
|
hostinfo.Lock()
|
|
defer hostinfo.Unlock()
|
|
|
|
// Double check, now that we have the lock
|
|
ci = hostinfo.ConnectionState
|
|
if ci != nil && ci.eKey != nil && ci.ready {
|
|
return hostinfo
|
|
}
|
|
|
|
// If we have already created the handshake packet, we don't want to call the function at all.
|
|
if !hostinfo.HandshakeReady {
|
|
ixHandshakeStage0(f, vpnIp, hostinfo)
|
|
// FIXME: Maybe make XX selectable, but probably not since psk makes it nearly pointless for us.
|
|
//xx_handshakeStage0(f, ip, hostinfo)
|
|
|
|
// If this is a static host, we don't need to wait for the HostQueryReply
|
|
// We can trigger the handshake right now
|
|
if _, ok := f.lightHouse.GetStaticHostList()[vpnIp]; ok {
|
|
select {
|
|
case f.handshakeManager.trigger <- vpnIp:
|
|
default:
|
|
}
|
|
}
|
|
}
|
|
|
|
return hostinfo
|
|
}
|
|
|
|
// initHostInfo is the init function to pass to (*HandshakeManager).AddVpnIP that
|
|
// will create the initial Noise ConnectionState
|
|
func (f *Interface) initHostInfo(hostinfo *HostInfo) {
|
|
hostinfo.ConnectionState = f.newConnectionState(f.l, true, noise.HandshakeIX, []byte{}, 0)
|
|
}
|
|
|
|
func (f *Interface) sendMessageNow(t header.MessageType, st header.MessageSubType, hostInfo *HostInfo, p, nb, out []byte) {
|
|
fp := &firewall.Packet{}
|
|
err := newPacket(p, false, fp)
|
|
if err != nil {
|
|
f.l.Warnf("error while parsing outgoing packet for firewall check; %v", err)
|
|
return
|
|
}
|
|
|
|
// check if packet is in outbound fw rules
|
|
dropReason := f.firewall.Drop(p, *fp, false, hostInfo, f.caPool, nil)
|
|
if dropReason != nil {
|
|
if f.l.Level >= logrus.DebugLevel {
|
|
f.l.WithField("fwPacket", fp).
|
|
WithField("reason", dropReason).
|
|
Debugln("dropping cached packet")
|
|
}
|
|
return
|
|
}
|
|
|
|
f.sendNoMetrics(header.Message, st, hostInfo.ConnectionState, hostInfo, nil, p, nb, out, 0, nil)
|
|
}
|
|
|
|
// SendMessageToVpnIp handles real ip:port lookup and sends to the current best known address for vpnIp
|
|
func (f *Interface) SendMessageToVpnIp(t header.MessageType, st header.MessageSubType, vpnIp iputil.VpnIp, p, nb, out []byte) {
|
|
hostInfo := f.getOrHandshake(vpnIp)
|
|
if hostInfo == nil {
|
|
if f.l.Level >= logrus.DebugLevel {
|
|
f.l.WithField("vpnIp", vpnIp).
|
|
Debugln("dropping SendMessageToVpnIp, vpnIp not in our CIDR or in unsafe routes")
|
|
}
|
|
return
|
|
}
|
|
|
|
if !hostInfo.ConnectionState.ready {
|
|
// Because we might be sending stored packets, lock here to stop new things going to
|
|
// the packet queue.
|
|
hostInfo.ConnectionState.queueLock.Lock()
|
|
if !hostInfo.ConnectionState.ready {
|
|
hostInfo.cachePacket(f.l, t, st, p, f.sendMessageToVpnIp, f.cachedPacketMetrics)
|
|
hostInfo.ConnectionState.queueLock.Unlock()
|
|
return
|
|
}
|
|
hostInfo.ConnectionState.queueLock.Unlock()
|
|
}
|
|
|
|
f.sendMessageToVpnIp(t, st, hostInfo, p, nb, out)
|
|
return
|
|
}
|
|
|
|
func (f *Interface) sendMessageToVpnIp(t header.MessageType, st header.MessageSubType, hostInfo *HostInfo, p, nb, out []byte) {
|
|
f.send(t, st, hostInfo.ConnectionState, hostInfo, p, nb, out)
|
|
}
|
|
|
|
func (f *Interface) send(t header.MessageType, st header.MessageSubType, ci *ConnectionState, hostinfo *HostInfo, p, nb, out []byte) {
|
|
f.messageMetrics.Tx(t, st, 1)
|
|
f.sendNoMetrics(t, st, ci, hostinfo, nil, p, nb, out, 0, nil)
|
|
}
|
|
|
|
func (f *Interface) sendTo(t header.MessageType, st header.MessageSubType, ci *ConnectionState, hostinfo *HostInfo, remote *udp.Addr, p, nb, out []byte) {
|
|
f.messageMetrics.Tx(t, st, 1)
|
|
f.sendNoMetrics(t, st, ci, hostinfo, remote, p, nb, out, 0, nil)
|
|
}
|
|
|
|
// sendVia sends a payload through a Relay tunnel. No authentication or encryption is done
|
|
// to the payload for the ultimate target host, making this a useful method for sending
|
|
// handshake messages to peers through relay tunnels.
|
|
// via is the HostInfo through which the message is relayed.
|
|
// ad is the plaintext data to authenticate, but not encrypt
|
|
// nb is a buffer used to store the nonce value, re-used for performance reasons.
|
|
// out is a buffer used to store the result of the Encrypt operation
|
|
// q indicates which writer to use to send the packet.
|
|
func (f *Interface) SendVia(viaIfc interface{},
|
|
relayIfc interface{},
|
|
ad,
|
|
nb,
|
|
out []byte,
|
|
nocopy bool,
|
|
) {
|
|
via := viaIfc.(*HostInfo)
|
|
relay := relayIfc.(*Relay)
|
|
c := atomic.AddUint64(&via.ConnectionState.atomicMessageCounter, 1)
|
|
|
|
out = header.Encode(out, header.Version, header.Message, header.MessageRelay, relay.RemoteIndex, c)
|
|
f.connectionManager.Out(via.vpnIp)
|
|
|
|
// Authenticate the header and payload, but do not encrypt for this message type.
|
|
// The payload consists of the inner, unencrypted Nebula header, as well as the end-to-end encrypted payload.
|
|
if len(out)+len(ad)+via.ConnectionState.eKey.Overhead() > cap(out) {
|
|
via.logger(f.l).
|
|
WithField("outCap", cap(out)).
|
|
WithField("payloadLen", len(ad)).
|
|
WithField("headerLen", len(out)).
|
|
WithField("cipherOverhead", via.ConnectionState.eKey.Overhead()).
|
|
Error("SendVia out buffer not large enough for relay")
|
|
return
|
|
}
|
|
|
|
// The header bytes are written to the 'out' slice; Grow the slice to hold the header and associated data payload.
|
|
offset := len(out)
|
|
out = out[:offset+len(ad)]
|
|
|
|
// In one call path, the associated data _is_ already stored in out. In other call paths, the associated data must
|
|
// be copied into 'out'.
|
|
if !nocopy {
|
|
copy(out[offset:], ad)
|
|
}
|
|
|
|
var err error
|
|
out, err = via.ConnectionState.eKey.EncryptDanger(out, out, nil, c, nb)
|
|
if err != nil {
|
|
via.logger(f.l).WithError(err).Info("Failed to EncryptDanger in sendVia")
|
|
return
|
|
}
|
|
err = f.writers[0].WriteTo(out, via.remote)
|
|
if err != nil {
|
|
via.logger(f.l).WithError(err).Info("Failed to WriteTo in sendVia")
|
|
}
|
|
}
|
|
|
|
func (f *Interface) sendNoMetrics(t header.MessageType, st header.MessageSubType, ci *ConnectionState, hostinfo *HostInfo, remote *udp.Addr, p, nb, out []byte, q int, udpPortGetter udp.SendPortGetter) {
|
|
if ci.eKey == nil {
|
|
//TODO: log warning
|
|
return
|
|
}
|
|
|
|
multiport := f.multiPort.Tx && hostinfo.multiportTx
|
|
rawOut := out
|
|
if multiport {
|
|
if len(out) < udp.RawOverhead {
|
|
// NOTE: This is because some spots in the code send us `out[:0]`, so
|
|
// we need to expand the slice back out to get our 8 bytes back.
|
|
out = out[:udp.RawOverhead]
|
|
}
|
|
// Preserve bytes needed for the raw socket
|
|
out = out[udp.RawOverhead:]
|
|
|
|
if udpPortGetter == nil {
|
|
udpPortGetter = udp.RandomSendPort
|
|
}
|
|
}
|
|
|
|
useRelay := remote == nil && hostinfo.remote == nil
|
|
fullOut := out
|
|
|
|
if useRelay {
|
|
if len(out) < header.Len {
|
|
// out always has a capacity of mtu, but not always a length greater than the header.Len.
|
|
// Grow it to make sure the next operation works.
|
|
out = out[:header.Len]
|
|
}
|
|
// Save a header's worth of data at the front of the 'out' buffer.
|
|
out = out[header.Len:]
|
|
}
|
|
|
|
//TODO: enable if we do more than 1 tun queue
|
|
//ci.writeLock.Lock()
|
|
c := atomic.AddUint64(&ci.atomicMessageCounter, 1)
|
|
|
|
//l.WithField("trace", string(debug.Stack())).Error("out Header ", &Header{Version, t, st, 0, hostinfo.remoteIndexId, c}, p)
|
|
out = header.Encode(out, header.Version, t, st, hostinfo.remoteIndexId, c)
|
|
f.connectionManager.Out(hostinfo.vpnIp)
|
|
|
|
// Query our LH if we haven't since the last time we've been rebound, this will cause the remote to punch against
|
|
// all our IPs and enable a faster roaming.
|
|
if t != header.CloseTunnel && hostinfo.lastRebindCount != f.rebindCount {
|
|
//NOTE: there is an update hole if a tunnel isn't used and exactly 256 rebinds occur before the tunnel is
|
|
// finally used again. This tunnel would eventually be torn down and recreated if this action didn't help.
|
|
f.lightHouse.QueryServer(hostinfo.vpnIp, f)
|
|
hostinfo.lastRebindCount = f.rebindCount
|
|
if f.l.Level >= logrus.DebugLevel {
|
|
f.l.WithField("vpnIp", hostinfo.vpnIp).Debug("Lighthouse update triggered for punch due to rebind counter")
|
|
}
|
|
}
|
|
|
|
var err error
|
|
out, err = ci.eKey.EncryptDanger(out, out, p, c, nb)
|
|
//TODO: see above note on lock
|
|
//ci.writeLock.Unlock()
|
|
if err != nil {
|
|
hostinfo.logger(f.l).WithError(err).
|
|
WithField("udpAddr", remote).WithField("counter", c).
|
|
WithField("attemptedCounter", c).
|
|
Error("Failed to encrypt outgoing packet")
|
|
return
|
|
}
|
|
|
|
if remote != nil {
|
|
if multiport {
|
|
rawOut = rawOut[:len(out)+udp.RawOverhead]
|
|
port := udpPortGetter.UDPSendPort(f.multiPort.TxPorts)
|
|
err = f.udpRaw.WriteTo(rawOut, port, remote)
|
|
} else {
|
|
err = f.writers[q].WriteTo(out, remote)
|
|
}
|
|
if err != nil {
|
|
hostinfo.logger(f.l).WithError(err).
|
|
WithField("udpAddr", remote).Error("Failed to write outgoing packet")
|
|
}
|
|
} else if hostinfo.remote != nil {
|
|
if multiport {
|
|
rawOut = rawOut[:len(out)+udp.RawOverhead]
|
|
port := udpPortGetter.UDPSendPort(f.multiPort.TxPorts)
|
|
err = f.udpRaw.WriteTo(rawOut, port, hostinfo.remote)
|
|
} else {
|
|
err = f.writers[q].WriteTo(out, hostinfo.remote)
|
|
}
|
|
if err != nil {
|
|
hostinfo.logger(f.l).WithError(err).
|
|
WithField("udpAddr", remote).Error("Failed to write outgoing packet")
|
|
}
|
|
} else {
|
|
// Try to send via a relay
|
|
for _, relayIP := range hostinfo.relayState.CopyRelayIps() {
|
|
relayHostInfo, err := f.hostMap.QueryVpnIp(relayIP)
|
|
if err != nil {
|
|
hostinfo.logger(f.l).WithField("relayIp", relayIP).WithError(err).Info("sendNoMetrics failed to find HostInfo")
|
|
continue
|
|
}
|
|
relay, ok := relayHostInfo.relayState.QueryRelayForByIp(hostinfo.vpnIp)
|
|
if !ok {
|
|
hostinfo.logger(f.l).
|
|
WithField("relayIp", relayHostInfo.vpnIp).
|
|
WithField("relayTarget", hostinfo.vpnIp).
|
|
Info("sendNoMetrics relay missing object for target")
|
|
continue
|
|
}
|
|
f.SendVia(relayHostInfo, relay, out, nb, fullOut[:header.Len+len(out)], true)
|
|
break
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func isMulticast(ip iputil.VpnIp) bool {
|
|
// Class D multicast
|
|
if (((ip >> 24) & 0xff) & 0xf0) == 0xe0 {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|