public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: jmaloy@redhat.com, David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v8 05/27] tcp: Manage outbound address via flow table
Date: Thu, 18 Jul 2024 15:26:31 +1000	[thread overview]
Message-ID: <20240718052653.3241585-6-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240718052653.3241585-1-david@gibson.dropbear.id.au>

For now when we forward a connection to the host we leave the host side
forwarding address and port blank since we don't necessarily know what
source address and port will be used by the kernel.  When the outbound
address option is active, though, we do know the address at least, so we
can record it in the flowside.

Having done that, use it as the primary source of truth, binding the
outgoing socket based on the information in there.  This allows the
possibility of more complex rules for what outbound address and/or port
we use in future.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c | 93 ++++++++++++++++++++++++++++++++---------------------------
 1 file changed, 50 insertions(+), 43 deletions(-)

diff --git a/tcp.c b/tcp.c
index bac72c02..f0bf76bc 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1581,46 +1581,48 @@ static uint16_t tcp_conn_tap_mss(const struct tcp_tap_conn *conn,
 /**
  * tcp_bind_outbound() - Bind socket to outbound address and interface if given
  * @c:		Execution context
+ * @conn:	Connection entry for socket to bind
  * @s:		Outbound TCP socket
- * @af:		Address family
  */
-static void tcp_bind_outbound(const struct ctx *c, int s, sa_family_t af)
+static void tcp_bind_outbound(const struct ctx *c,
+			      const struct tcp_tap_conn *conn, int s)
 {
-	if (af == AF_INET) {
-		if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr_out)) {
-			struct sockaddr_in addr4 = {
-				.sin_family = AF_INET,
-				.sin_port = 0,
-				.sin_addr = c->ip4.addr_out,
-			};
-
-			if (bind(s, (struct sockaddr *)&addr4, sizeof(addr4)))
-				debug_perror("IPv4 TCP socket address bind");
+	const struct flowside *tgt = &conn->f.side[TGTSIDE];
+	union sockaddr_inany bind_sa;
+	socklen_t sl;
+
+
+	pif_sockaddr(c, &bind_sa, &sl, PIF_HOST, &tgt->faddr, tgt->fport);
+	if (!inany_is_unspecified(&tgt->faddr) || tgt->fport) {
+		if (bind(s, &bind_sa.sa, sl)) {
+			char sstr[INANY_ADDRSTRLEN];
+
+			flow_dbg(conn,
+				 "Can't bind TCP outbound socket to %s:%hu: %s",
+				 inany_ntop(&tgt->faddr, sstr, sizeof(sstr)),
+				 tgt->fport, strerror(errno));
 		}
+	}
 
+	if (bind_sa.sa_family == AF_INET) {
 		if (*c->ip4.ifname_out) {
 			if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE,
 				       c->ip4.ifname_out,
-				       strlen(c->ip4.ifname_out)))
-				debug_perror("IPv4 TCP socket interface bind");
-		}
-	} else if (af == AF_INET6) {
-		if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_out)) {
-			struct sockaddr_in6 addr6 = {
-				.sin6_family = AF_INET6,
-				.sin6_port = 0,
-				.sin6_addr = c->ip6.addr_out,
-			};
-
-			if (bind(s, (struct sockaddr *)&addr6, sizeof(addr6)))
-				debug_perror("IPv6 TCP socket address bind");
+				       strlen(c->ip4.ifname_out))) {
+				flow_dbg(conn, "Can't bind IPv4 TCP socket to"
+					 " interface %s: %s", c->ip4.ifname_out,
+					 strerror(errno));
+			}
 		}
-
+	} else if (bind_sa.sa_family == AF_INET6) {
 		if (*c->ip6.ifname_out) {
 			if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE,
 				       c->ip6.ifname_out,
-				       strlen(c->ip6.ifname_out)))
-				debug_perror("IPv6 TCP socket interface bind");
+				       strlen(c->ip6.ifname_out))) {
+				flow_dbg(conn, "Can't bind IPv6 TCP socket to"
+					 " interface %s: %s", c->ip6.ifname_out,
+					 strerror(errno));
+			}
 		}
 	}
 }
@@ -1643,9 +1645,9 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
 {
 	in_port_t srcport = ntohs(th->source);
 	in_port_t dstport = ntohs(th->dest);
+	union inany_addr srcaddr, dstaddr; /* FIXME: Avoid bulky temporaries */
 	const struct flowside *ini, *tgt;
 	struct tcp_tap_conn *conn;
-	union inany_addr dstaddr; /* FIXME: Avoid bulky temporary */
 	union sockaddr_inany sa;
 	union flow *flow;
 	int s = -1, mss;
@@ -1666,9 +1668,24 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
 
 	}
 
-	/* FIXME: Record outbound source address when known */
+	if (inany_is_linklocal6(&dstaddr)) {
+		srcaddr.a6 = c->ip6.addr_ll;
+	} else if (inany_is_loopback(&dstaddr)) {
+		srcaddr = dstaddr;
+	} else if (inany_v4(&dstaddr)) {
+		if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr_out))
+			srcaddr = inany_from_v4(c->ip4.addr_out);
+		else
+			srcaddr = inany_any4;
+	} else {
+		if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_out))
+			srcaddr.a6 = c->ip6.addr_out;
+		else
+			srcaddr = inany_any6;
+	}
+
 	tgt = flow_target_af(flow, PIF_HOST, AF_INET6,
-			     NULL, 0, /* Kernel decides source address */
+			     &srcaddr, 0, /* Kernel decides source port */
 			     &dstaddr, dstport);
 	conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
 
@@ -1731,18 +1748,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
 			goto cancel;
 	}
 
-	if (inany_is_linklocal6(&tgt->eaddr)) {
-		struct sockaddr_in6 addr6_ll = {
-			.sin6_family = AF_INET6,
-			.sin6_addr = c->ip6.addr_ll,
-			.sin6_scope_id = c->ifi6,
-		};
-		if (bind(s, (struct sockaddr *)&addr6_ll, sizeof(addr6_ll)))
-			goto cancel;
-	} else if (!inany_is_loopback(&tgt->eaddr)) {
-		tcp_bind_outbound(c, s, af);
-	}
-
 	conn->sock = s;
 	conn->timer = -1;
 	conn_event(c, conn, TAP_SYN_RCVD);
@@ -1771,6 +1776,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
 
 	tcp_hash_insert(c, conn);
 
+	tcp_bind_outbound(c, conn, s);
+
 	if (connect(s, &sa.sa, sl)) {
 		if (errno != EINPROGRESS) {
 			tcp_rst(c, conn);
-- 
@@ -1581,46 +1581,48 @@ static uint16_t tcp_conn_tap_mss(const struct tcp_tap_conn *conn,
 /**
  * tcp_bind_outbound() - Bind socket to outbound address and interface if given
  * @c:		Execution context
+ * @conn:	Connection entry for socket to bind
  * @s:		Outbound TCP socket
- * @af:		Address family
  */
-static void tcp_bind_outbound(const struct ctx *c, int s, sa_family_t af)
+static void tcp_bind_outbound(const struct ctx *c,
+			      const struct tcp_tap_conn *conn, int s)
 {
-	if (af == AF_INET) {
-		if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr_out)) {
-			struct sockaddr_in addr4 = {
-				.sin_family = AF_INET,
-				.sin_port = 0,
-				.sin_addr = c->ip4.addr_out,
-			};
-
-			if (bind(s, (struct sockaddr *)&addr4, sizeof(addr4)))
-				debug_perror("IPv4 TCP socket address bind");
+	const struct flowside *tgt = &conn->f.side[TGTSIDE];
+	union sockaddr_inany bind_sa;
+	socklen_t sl;
+
+
+	pif_sockaddr(c, &bind_sa, &sl, PIF_HOST, &tgt->faddr, tgt->fport);
+	if (!inany_is_unspecified(&tgt->faddr) || tgt->fport) {
+		if (bind(s, &bind_sa.sa, sl)) {
+			char sstr[INANY_ADDRSTRLEN];
+
+			flow_dbg(conn,
+				 "Can't bind TCP outbound socket to %s:%hu: %s",
+				 inany_ntop(&tgt->faddr, sstr, sizeof(sstr)),
+				 tgt->fport, strerror(errno));
 		}
+	}
 
+	if (bind_sa.sa_family == AF_INET) {
 		if (*c->ip4.ifname_out) {
 			if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE,
 				       c->ip4.ifname_out,
-				       strlen(c->ip4.ifname_out)))
-				debug_perror("IPv4 TCP socket interface bind");
-		}
-	} else if (af == AF_INET6) {
-		if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_out)) {
-			struct sockaddr_in6 addr6 = {
-				.sin6_family = AF_INET6,
-				.sin6_port = 0,
-				.sin6_addr = c->ip6.addr_out,
-			};
-
-			if (bind(s, (struct sockaddr *)&addr6, sizeof(addr6)))
-				debug_perror("IPv6 TCP socket address bind");
+				       strlen(c->ip4.ifname_out))) {
+				flow_dbg(conn, "Can't bind IPv4 TCP socket to"
+					 " interface %s: %s", c->ip4.ifname_out,
+					 strerror(errno));
+			}
 		}
-
+	} else if (bind_sa.sa_family == AF_INET6) {
 		if (*c->ip6.ifname_out) {
 			if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE,
 				       c->ip6.ifname_out,
-				       strlen(c->ip6.ifname_out)))
-				debug_perror("IPv6 TCP socket interface bind");
+				       strlen(c->ip6.ifname_out))) {
+				flow_dbg(conn, "Can't bind IPv6 TCP socket to"
+					 " interface %s: %s", c->ip6.ifname_out,
+					 strerror(errno));
+			}
 		}
 	}
 }
@@ -1643,9 +1645,9 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
 {
 	in_port_t srcport = ntohs(th->source);
 	in_port_t dstport = ntohs(th->dest);
+	union inany_addr srcaddr, dstaddr; /* FIXME: Avoid bulky temporaries */
 	const struct flowside *ini, *tgt;
 	struct tcp_tap_conn *conn;
-	union inany_addr dstaddr; /* FIXME: Avoid bulky temporary */
 	union sockaddr_inany sa;
 	union flow *flow;
 	int s = -1, mss;
@@ -1666,9 +1668,24 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
 
 	}
 
-	/* FIXME: Record outbound source address when known */
+	if (inany_is_linklocal6(&dstaddr)) {
+		srcaddr.a6 = c->ip6.addr_ll;
+	} else if (inany_is_loopback(&dstaddr)) {
+		srcaddr = dstaddr;
+	} else if (inany_v4(&dstaddr)) {
+		if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.addr_out))
+			srcaddr = inany_from_v4(c->ip4.addr_out);
+		else
+			srcaddr = inany_any4;
+	} else {
+		if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.addr_out))
+			srcaddr.a6 = c->ip6.addr_out;
+		else
+			srcaddr = inany_any6;
+	}
+
 	tgt = flow_target_af(flow, PIF_HOST, AF_INET6,
-			     NULL, 0, /* Kernel decides source address */
+			     &srcaddr, 0, /* Kernel decides source port */
 			     &dstaddr, dstport);
 	conn = FLOW_SET_TYPE(flow, FLOW_TCP, tcp);
 
@@ -1731,18 +1748,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
 			goto cancel;
 	}
 
-	if (inany_is_linklocal6(&tgt->eaddr)) {
-		struct sockaddr_in6 addr6_ll = {
-			.sin6_family = AF_INET6,
-			.sin6_addr = c->ip6.addr_ll,
-			.sin6_scope_id = c->ifi6,
-		};
-		if (bind(s, (struct sockaddr *)&addr6_ll, sizeof(addr6_ll)))
-			goto cancel;
-	} else if (!inany_is_loopback(&tgt->eaddr)) {
-		tcp_bind_outbound(c, s, af);
-	}
-
 	conn->sock = s;
 	conn->timer = -1;
 	conn_event(c, conn, TAP_SYN_RCVD);
@@ -1771,6 +1776,8 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
 
 	tcp_hash_insert(c, conn);
 
+	tcp_bind_outbound(c, conn, s);
+
 	if (connect(s, &sa.sa, sl)) {
 		if (errno != EINPROGRESS) {
 			tcp_rst(c, conn);
-- 
2.45.2


  parent reply	other threads:[~2024-07-18  5:27 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-18  5:26 [PATCH v8 00/27] Unified flow table David Gibson
2024-07-18  5:26 ` [PATCH v8 01/27] flow: Common address information for initiating side David Gibson
2024-07-18  5:26 ` [PATCH v8 02/27] flow: Common address information for target side David Gibson
2024-07-18  5:26 ` [PATCH v8 03/27] tcp, flow: Remove redundant information, repack connection structures David Gibson
2024-07-18  5:26 ` [PATCH v8 04/27] tcp: Obtain guest address from flowside David Gibson
2024-07-18  5:26 ` David Gibson [this message]
2024-07-18  5:26 ` [PATCH v8 06/27] tcp: Simplify endpoint validation using flowside information David Gibson
2024-07-18  5:26 ` [PATCH v8 07/27] tcp_splice: Eliminate SPLICE_V6 flag David Gibson
2024-07-18  5:26 ` [PATCH v8 08/27] tcp, flow: Replace TCP specific hash function with general flow hash David Gibson
2024-07-18  5:26 ` [PATCH v8 09/27] flow, tcp: Generalise TCP hash table to general flow hash table David Gibson
2024-07-18  5:26 ` [PATCH v8 10/27] tcp: Re-use flow hash for initial sequence number generation David Gibson
2024-07-18  5:26 ` [PATCH v8 11/27] icmp: Remove redundant id field from flow table entry David Gibson
2024-07-18  5:26 ` [PATCH v8 12/27] icmp: Obtain destination addresses from the flowsides David Gibson
2024-07-18  5:26 ` [PATCH v8 13/27] icmp: Look up ping flows using flow hash David Gibson
2024-07-18  5:26 ` [PATCH v8 14/27] icmp: Eliminate icmp_id_map David Gibson
2024-07-18  5:26 ` [PATCH v8 15/27] flow: Helper to create sockets based on flowside David Gibson
2024-07-18  5:26 ` [PATCH v8 16/27] icmp: Manage outbound socket address via flow table David Gibson
2024-07-18  5:26 ` [PATCH v8 17/27] flow, tcp: Flow based NAT and port forwarding for TCP David Gibson
2024-07-18  5:26 ` [PATCH v8 18/27] flow, icmp: Use general flow forwarding rules for ICMP David Gibson
2024-07-18  5:26 ` [PATCH v8 19/27] fwd: Update flow forwarding logic for UDP David Gibson
2024-07-18  5:26 ` [PATCH v8 20/27] udp: Create flows for datagrams from originating sockets David Gibson
2024-07-18  5:26 ` [PATCH v8 21/27] udp: Handle "spliced" datagrams with per-flow sockets David Gibson
2024-07-18  5:26 ` [PATCH v8 22/27] udp: Remove obsolete splice tracking David Gibson
2024-07-18  5:26 ` [PATCH v8 23/27] udp: Find or create flows for datagrams from tap interface David Gibson
2024-07-18  5:26 ` [PATCH v8 24/27] udp: Direct datagrams from host to guest via flow table David Gibson
2024-07-18  5:26 ` [PATCH v8 25/27] udp: Remove obsolete socket tracking David Gibson
2024-07-18  5:26 ` [PATCH v8 26/27] udp: Remove rdelta port forwarding maps David Gibson
2024-07-18  5:26 ` [PATCH v8 27/27] udp: Rename UDP listening sockets David Gibson
2024-07-19 19:20 ` [PATCH v8 00/27] Unified flow table Stefano Brivio
2024-07-20  3:37   ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240718052653.3241585-6-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=jmaloy@redhat.com \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).