public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v4 15/16] flow, tcp: flow based NAT and port forwarding for TCP
Date: Fri,  3 May 2024 11:11:34 +1000	[thread overview]
Message-ID: <20240503011135.2924437-16-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240503011135.2924437-1-david@gibson.dropbear.id.au>

Currently the code to translate host side addresses and ports to guest side
addresses and ports, and vice versa, is scattered across the TCP code.
This includes both port redirection as controlled by the -t and -T options,
and our special case NAT controlled by the --no-map-gw option.

Gather all this logic into a new fwd_nat_flow() function in fwd.c which
takes protocol and flowside as input and generates the protocol and
flowside to which to forward the flow, including applying any NAT or port
translation.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 fwd.c        | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fwd.h        |   5 ++
 tcp.c        |  92 +++++++++++-----------------------
 tcp_splice.c |  57 +--------------------
 tcp_splice.h |   3 +-
 5 files changed, 175 insertions(+), 121 deletions(-)

diff --git a/fwd.c b/fwd.c
index b3d5a37..d0ca492 100644
--- a/fwd.c
+++ b/fwd.c
@@ -25,6 +25,7 @@
 #include "fwd.h"
 #include "passt.h"
 #include "lineread.h"
+#include "flow_table.h"
 
 /* See enum in kernel's include/net/tcp_states.h */
 #define UDP_LISTEN	0x07
@@ -154,3 +155,141 @@ void fwd_scan_ports_init(struct ctx *c)
 				   &c->tcp.fwd_out, &c->tcp.fwd_in);
 	}
 }
+
+static bool fwd_from_tap(const struct ctx *c, uint8_t proto,
+			 const struct flowside *a, struct flowside *b)
+{
+	(void)proto;
+
+	b->pif = PIF_HOST;
+	b->eaddr = a->faddr;
+	b->eport = a->fport;
+
+	if (!c->no_map_gw) {
+		struct in_addr *v4 = inany_v4(&b->eaddr);
+
+		if (v4 && IN4_ARE_ADDR_EQUAL(v4, &c->ip4.gw))
+			*v4 = in4addr_loopback;
+		if (IN6_ARE_ADDR_EQUAL(&b->eaddr, &c->ip6.gw))
+			b->eaddr.a6 = in6addr_loopback;
+	}
+
+	return true;
+}
+
+static bool fwd_from_splice(const struct ctx *c, uint8_t proto,
+			 const struct flowside *a, struct flowside *b)
+{
+	const struct in_addr *ae4 = inany_v4(&a->eaddr);
+
+	if (!inany_is_loopback(&a->eaddr) ||
+	    (!inany_is_loopback(&a->faddr) && !inany_is_unspecified(&a->faddr))) {
+		char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
+
+		debug("Non loopback address on %s: [%s]:%hu -> [%s]:%hu",
+		      pif_name(a->pif),
+		      inany_ntop(&a->eaddr, estr, sizeof(estr)), a->eport,
+		      inany_ntop(&a->faddr, fstr, sizeof(fstr)), a->fport);
+		return false;
+	}
+
+	b->pif = PIF_HOST;
+
+	if (ae4)
+		inany_from_af(&b->eaddr, AF_INET, &in4addr_loopback);
+	else
+		inany_from_af(&b->eaddr, AF_INET6, &in6addr_loopback);
+
+	b->eport = a->fport;
+
+	if (proto == IPPROTO_TCP)
+		b->eport += c->tcp.fwd_out.delta[b->eport];
+
+	return true;
+}
+
+static bool fwd_from_host(const struct ctx *c, uint8_t proto,
+			  const struct flowside *a, struct flowside *b)
+{
+	struct in_addr *bf4;
+
+	if (c->mode == MODE_PASTA && inany_is_loopback(&a->eaddr) &&
+	    proto == IPPROTO_TCP) {
+		/* spliceable */
+		b->pif = PIF_SPLICE;
+		b->faddr = a->eaddr;
+
+		if (inany_v4(&a->eaddr))
+			inany_from_af(&b->eaddr, AF_INET, &in4addr_loopback);
+		else
+			inany_from_af(&b->eaddr, AF_INET6, &in6addr_loopback);
+		b->eport = a->fport;
+		if (proto == IPPROTO_TCP)
+			b->eport += c->tcp.fwd_in.delta[b->eport];
+
+		return true;
+	}
+
+	b->pif = PIF_TAP;
+	b->faddr = a->eaddr;
+	b->fport = a->eport;
+
+	bf4 = inany_v4(&b->faddr);
+
+	if (bf4) {
+		if (IN4_IS_ADDR_LOOPBACK(bf4) ||
+		    IN4_IS_ADDR_UNSPECIFIED(bf4) ||
+		    IN4_ARE_ADDR_EQUAL(bf4, &c->ip4.addr_seen))
+			*bf4 = c->ip4.gw;
+	} else {
+		struct in6_addr *bf6 = &b->faddr.a6;
+
+		if (IN6_IS_ADDR_LOOPBACK(bf6) ||
+		    IN6_ARE_ADDR_EQUAL(bf6, &c->ip6.addr_seen) ||
+		    IN6_ARE_ADDR_EQUAL(bf6, &c->ip6.addr)) {
+			if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw))
+				*bf6 = c->ip6.gw;
+			else
+				*bf6 = c->ip6.addr_ll;
+		}
+	}
+
+	if (bf4) {
+		inany_from_af(&b->eaddr, AF_INET, &c->ip4.addr_seen);
+	} else {
+		if (IN6_IS_ADDR_LINKLOCAL(&b->faddr.a6))
+			b->eaddr.a6 = c->ip6.addr_ll_seen;
+		else
+			b->eaddr.a6 = c->ip6.addr_seen;
+	}
+
+	b->eport = a->fport;
+	if (proto == IPPROTO_TCP)
+		b->eport += c->tcp.fwd_in.delta[b->eport];
+
+	return true;
+}
+
+bool fwd_nat_flow(const struct ctx *c, uint8_t proto,
+		  const struct flowside *a, struct flowside *b)
+{
+	char estr[INANY_ADDRSTRLEN], fstr[INANY_ADDRSTRLEN];
+
+	switch (a->pif) {
+	case PIF_TAP:
+		return fwd_from_tap(c, proto, a, b);
+
+	case PIF_SPLICE:
+		return fwd_from_splice(c, proto, a, b);
+
+	case PIF_HOST:
+		return fwd_from_host(c, proto, a, b);
+
+	default:
+		debug("No rules to forward from %s: [%s]:%hu -> [%s]:%hu",
+		      pif_name(a->pif),
+		      inany_ntop(&a->eaddr, estr, sizeof(estr)), a->eport,
+		      inany_ntop(&a->faddr, fstr, sizeof(fstr)), a->fport);
+		return false;
+	}
+}
diff --git a/fwd.h b/fwd.h
index 23281d9..e884f4e 100644
--- a/fwd.h
+++ b/fwd.h
@@ -7,6 +7,8 @@
 #ifndef FWD_H
 #define FWD_H
 
+struct flowside;
+
 /* Number of ports for both TCP and UDP */
 #define	NUM_PORTS	(1U << 16)
 
@@ -41,4 +43,7 @@ void fwd_scan_ports_udp(struct fwd_ports *fwd, const struct fwd_ports *rev,
 			const struct fwd_ports *tcp_rev);
 void fwd_scan_ports_init(struct ctx *c);
 
+bool fwd_nat_flow(const struct ctx *c, uint8_t proto,
+		  const struct flowside *a, struct flowside *b);
+
 #endif /* FWD_H */
diff --git a/tcp.c b/tcp.c
index 8b4d792..e131280 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1782,17 +1782,13 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
 		goto cancel;
 	}
 
-	sockside->pif = PIF_HOST;
-	sockside->eaddr = tapside->faddr;
-	sockside->eport = tapside->fport;
-
-	if (!c->no_map_gw) {
-		struct in_addr *v4 = inany_v4(&sockside->eaddr);
+	if (!fwd_nat_flow(c, IPPROTO_TCP, tapside, sockside))
+		goto cancel;
 
-		if (v4 && IN4_ARE_ADDR_EQUAL(v4, &c->ip4.gw))
-			*v4 = in4addr_loopback;
-		if (IN6_ARE_ADDR_EQUAL(&sockside->eaddr, &c->ip6.gw))
-			sockside->eaddr.a6 = in6addr_loopback;
+	if (sockside->pif != PIF_HOST) {
+		err("No support for forwarding TCP from %s to %s",
+		    pif_name(tapside->pif), pif_name(sockside->pif));
+		goto cancel;
 	}
 
 	if ((s = tcp_conn_sock(c, af)) < 0)
@@ -2479,67 +2475,19 @@ static void tcp_connect_finish(struct ctx *c, struct tcp_tap_conn *conn)
 	conn_flag(c, conn, ACK_FROM_TAP_DUE);
 }
 
-/**
- * tcp_snat_inbound() - Translate source address for inbound data if needed
- * @c:		Execution context
- * @addr:	Source address of inbound packet/connection
- */
-static void tcp_snat_inbound(const struct ctx *c, union inany_addr *addr)
-{
-	struct in_addr *addr4 = inany_v4(addr);
-
-	if (addr4) {
-		if (IN4_IS_ADDR_LOOPBACK(addr4) ||
-		    IN4_IS_ADDR_UNSPECIFIED(addr4) ||
-		    IN4_ARE_ADDR_EQUAL(addr4, &c->ip4.addr_seen))
-			*addr4 = c->ip4.gw;
-	} else {
-		struct in6_addr *addr6 = &addr->a6;
-
-		if (IN6_IS_ADDR_LOOPBACK(addr6) ||
-		    IN6_ARE_ADDR_EQUAL(addr6, &c->ip6.addr_seen) ||
-		    IN6_ARE_ADDR_EQUAL(addr6, &c->ip6.addr)) {
-			if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw))
-				*addr6 = c->ip6.gw;
-			else
-				*addr6 = c->ip6.addr_ll;
-		}
-	}
-}
-
 /**
  * tcp_tap_conn_from_sock() - Initialize state for non-spliced connection
  * @c:		Execution context
- * @dstport:	Destination port for connection (host side)
  * @flow:	flow to initialise
  * @s:		Accepted socket
  * @now:	Current timestamp
  */
-static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport,
-				   union flow *flow, int s,
+static void tcp_tap_conn_from_sock(struct ctx *c, union flow *flow, int s,
 				   const struct timespec *now)
 {
-	const struct flowside *sockside = &flow->f.side[SOCKSIDE];
-	struct flowside *tapside = &flow->f.side[TAPSIDE];
-	struct tcp_tap_conn *conn;
+	struct tcp_tap_conn *conn = FLOW_START(flow, FLOW_TCP, tcp, SOCKSIDE);
 	uint64_t hash;
 
-	tapside->pif = PIF_TAP;
-	tapside->faddr = sockside->eaddr;
-	tapside->fport = sockside->eport;
-	tcp_snat_inbound(c, &tapside->faddr);
-	if (CONN_V4(flow)) {
-		inany_from_af(&tapside->eaddr, AF_INET, &c->ip4.addr_seen);
-	} else {
-		if (IN6_IS_ADDR_LINKLOCAL(&tapside->faddr.a6))
-			tapside->eaddr.a6 = c->ip6.addr_ll_seen;
-		else
-			tapside->eaddr.a6 = c->ip6.addr_seen;
-	}
-	tapside->eport = dstport + c->tcp.fwd_in.delta[dstport];
-
-	conn = FLOW_START(flow, FLOW_TCP, tcp, SOCKSIDE);
-
 	conn->sock = s;
 	conn->timer = -1;
 	conn->ws_to_tap = conn->ws_from_tap = 0;
@@ -2567,9 +2515,9 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport,
 void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
 			const struct timespec *now)
 {
+	struct flowside *side0, *side1;
 	union sockaddr_inany sa;
 	socklen_t sl = sizeof(sa);
-	struct flowside *side0;
 	union flow *flow;
 	int s;
 
@@ -2577,6 +2525,7 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
 		return;
 
 	side0 = &flow->f.side[0];
+	side1 = &flow->f.side[1];
 
 	s = accept4(ref.fd, &sa.sa, &sl, SOCK_NONBLOCK);
 	if (s < 0)
@@ -2594,10 +2543,25 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
 		goto cancel;
 	}
 
-	if (tcp_splice_conn_from_sock(c, ref.tcp_listen.port, flow, s))
-		return;
+	if (!fwd_nat_flow(c, IPPROTO_TCP, side0, side1))
+		goto cancel;
+
+	switch (side1->pif) {
+	case PIF_SPLICE:
+	case PIF_HOST:
+		tcp_splice_conn_from_sock(c, flow, s);
+		break;
+
+	case PIF_TAP:
+		tcp_tap_conn_from_sock(c, flow, s, now);
+		break;
+
+	default:
+		err("No support for forwarding TCP from %s to %s",
+		    pif_name(side0->pif), pif_name(side1->pif));
+		goto cancel;
+	}
 
-	tcp_tap_conn_from_sock(c, ref.tcp_listen.port, flow, s, now);
 	return;
 
 cancel:
diff --git a/tcp_splice.c b/tcp_splice.c
index 00b88c5..c4fbbdd 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -394,67 +394,16 @@ static int tcp_conn_sock_ns(const struct ctx *c, sa_family_t af)
 /**
  * tcp_splice_conn_from_sock() - Attempt to init state for a spliced connection
  * @c:		Execution context
- * @dstport:	Side 0 destination port of connection
  * @flow:	flow to initialise
  * @s0:		Accepted (side 0) socket
  *
- * Return: true if able to create a spliced connection, false otherwise
  * #syscalls:pasta setsockopt
  */
-bool tcp_splice_conn_from_sock(const struct ctx *c, in_port_t dstport,
-			       union flow *flow, int s0)
+void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0)
 {
-	const struct flowside *side0 = &flow->f.side[0];
-	const union inany_addr *src = &side0->eaddr;
-	struct flowside *side1 = &flow->f.side[1];
 	struct tcp_splice_conn *conn;
-	sa_family_t af;
-	uint8_t pif1;
-
-	if (c->mode != MODE_PASTA)
-		return false;
-
-	af = inany_v4(src) ? AF_INET : AF_INET6;
-
-	switch (side0->pif) {
-	case PIF_SPLICE:
-		if (!inany_is_loopback(src)) {
-			char str[INANY_ADDRSTRLEN];
-
-			/* We can't use flow_err() etc. because we haven't set
-			 * the flow type yet
-			 */
-			warn("Bad source address %s for splice, closing",
-			     inany_ntop(src, str, sizeof(str)));
-
-			/* We *don't* want to fall back to tap */
-			flow_alloc_cancel(flow);
-			return true;
-		}
-
-		pif1 = PIF_HOST;
-		dstport += c->tcp.fwd_out.delta[dstport];
-		break;
-
-	case PIF_HOST:
-		if (!inany_is_loopback(src))
-			return false;
-
-		pif1 = PIF_SPLICE;
-		dstport += c->tcp.fwd_in.delta[dstport];
-		break;
-
-	default:
-		return false;
-	}
-
-	if (af == AF_INET)
-		flowside_from_af(side1, pif1, AF_INET, NULL, 0,
-				 &in4addr_loopback, dstport);
-	else
-		flowside_from_af(side1, pif1, AF_INET6, NULL, 0,
-				 &in6addr_loopback, dstport);
 
+	ASSERT(c->mode == MODE_PASTA);
 	conn = FLOW_START(flow, FLOW_TCP_SPLICE, tcp_splice, 0);
 
 	conn->s[0] = s0;
@@ -467,8 +416,6 @@ bool tcp_splice_conn_from_sock(const struct ctx *c, in_port_t dstport,
 
 	if (tcp_splice_connect(c, conn))
 		conn_flag(c, conn, CLOSING);
-
-	return true;
 }
 
 /**
diff --git a/tcp_splice.h b/tcp_splice.h
index e523c7e..a20f3e2 100644
--- a/tcp_splice.h
+++ b/tcp_splice.h
@@ -11,8 +11,7 @@ union sockaddr_inany;
 
 void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
 			     uint32_t events);
-bool tcp_splice_conn_from_sock(const struct ctx *c, in_port_t dstport,
-			       union flow *flow, int s0);
+void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0);
 void tcp_splice_init(struct ctx *c);
 
 #endif /* TCP_SPLICE_H */
-- 
@@ -11,8 +11,7 @@ union sockaddr_inany;
 
 void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
 			     uint32_t events);
-bool tcp_splice_conn_from_sock(const struct ctx *c, in_port_t dstport,
-			       union flow *flow, int s0);
+void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0);
 void tcp_splice_init(struct ctx *c);
 
 #endif /* TCP_SPLICE_H */
-- 
2.44.0


  parent reply	other threads:[~2024-05-03  1:11 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-03  1:11 [PATCH v4 00/16] RFC: Unified flow table David Gibson
2024-05-03  1:11 ` [PATCH v4 01/16] flow: Common data structures for tracking flow addresses David Gibson
2024-05-13 18:07   ` Stefano Brivio
2024-05-14  0:11     ` David Gibson
2024-05-03  1:11 ` [PATCH v4 02/16] tcp: Maintain flowside information for "tap" connections David Gibson
2024-05-13 18:07   ` Stefano Brivio
2024-05-14  0:15     ` David Gibson
2024-05-03  1:11 ` [PATCH v4 03/16] tcp_splice: Maintain flowside information for spliced connections David Gibson
2024-05-03  1:11 ` [PATCH v4 04/16] tcp: Obtain guest address from flowside David Gibson
2024-05-13 18:07   ` Stefano Brivio
2024-05-14  0:18     ` David Gibson
2024-05-03  1:11 ` [PATCH v4 05/16] tcp: Simplify endpoint validation using flowside information David Gibson
2024-05-03  1:11 ` [PATCH v4 06/16] tcp, tcp_splice: Construct sockaddrs for connect() from flowside David Gibson
2024-05-03  1:11 ` [PATCH v4 07/16] tcp_splice: Eliminate SPLICE_V6 flag David Gibson
2024-05-03  1:11 ` [PATCH v4 08/16] tcp, flow: Replace TCP specific hash function with general flow hash David Gibson
2024-05-03  1:11 ` [PATCH v4 09/16] flow, tcp: Generalise TCP hash table to general flow hash table David Gibson
2024-05-03  1:11 ` [PATCH v4 10/16] tcp: Re-use flow hash for initial sequence number generation David Gibson
2024-05-03  1:11 ` [PATCH v4 11/16] icmp: Populate flowside information David Gibson
2024-05-03  1:11 ` [PATCH v4 12/16] icmp: Use flowsides as the source of truth wherever possible David Gibson
2024-05-03  1:11 ` [PATCH v4 13/16] icmp: Look up ping flows using flow hash David Gibson
2024-05-03  1:11 ` [PATCH v4 14/16] icmp: Eliminate icmp_id_map David Gibson
2024-05-03  1:11 ` David Gibson [this message]
2024-05-03  1:11 ` [PATCH v4 16/16] flow, icmp: Use general flow forwarding rules for ICMP David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240503011135.2924437-16-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).