public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Stefano Brivio <sbrivio@redhat.com>
To: passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 2/3] tcp, tcp_splice: Fix port remapping for inbound, spliced connections
Date: Tue, 11 Oct 2022 01:33:49 +0200	[thread overview]
Message-ID: <20221010233350.1198630-3-sbrivio@redhat.com> (raw)
In-Reply-To: <20221010233350.1198630-1-sbrivio@redhat.com>

It's indeed convenient to use the final destination port in the epoll
reference data, so that we don't have to check the configured port
deltas before establishing connections. However, this doesn't work if
we need to access the original port information to know what to do
once we receive a connection.

The existing implementation resulted in inbound, spliced connections
with a remapped destination port (and only those) to be attempted on
the outbound side, as we would check the wrong position in port
bitmaps to establish whether to use inbound or outbound sockets.

For listening spliced sockets, set the port index in the epoll
reference to the original port. Once we get a connection, use the
port delta arrays to find the final destination port, and the
original destination port to decide what socket pool we should use.

This avoids the need for a reverse mapping table as implemented for
UDP.

Fixes: 33482d5bf293 ("passt: Add PASTA mode, major rework")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
 tcp.c        | 19 ++++++++-----------
 tcp_splice.c | 18 ++++++++++++++----
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/tcp.c b/tcp.c
index 63153b6..cc08353 100644
--- a/tcp.c
+++ b/tcp.c
@@ -3084,15 +3084,16 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
 		   const void *addr, const char *ifname, in_port_t port)
 {
+	union tcp_epoll_ref tref_spliced = { .tcp.listen = 1, .tcp.splice = 1 };
 	union tcp_epoll_ref tref = { .tcp.listen = 1 };
 	const void *bind_addr;
 	int s;
 
-	if (ns) {
+	if (ns)
 		tref.tcp.index = (in_port_t)(port + c->tcp.fwd_out.delta[port]);
-	} else {
+	else
 		tref.tcp.index = (in_port_t)(port + c->tcp.fwd_in.delta[port]);
-	}
+	tref_spliced.tcp.index = (in_port_t) port;
 
 	if (af == AF_INET || af == AF_UNSPEC) {
 		if (!addr && c->mode == MODE_PASTA)
@@ -3100,8 +3101,7 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
 		else
 			bind_addr = addr;
 
-		tref.tcp.v6 = 0;
-		tref.tcp.splice = 0;
+		tref.tcp.v6 = tref_spliced.tcp.v6 = 0;
 
 		if (!ns) {
 			s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname,
@@ -3118,9 +3118,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
 		if (c->mode == MODE_PASTA) {
 			bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) };
 
-			tref.tcp.splice = 1;
 			s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname,
-				    port, tref.u32);
+				    port, tref_spliced.u32);
 			if (s >= 0)
 				tcp_sock_set_bufsize(c, s);
 			else
@@ -3141,9 +3140,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
 		else
 			bind_addr = addr;
 
-		tref.tcp.v6 = 1;
+		tref.tcp.v6 = tref_spliced.tcp.v6 = 1;
 
-		tref.tcp.splice = 0;
 		if (!ns) {
 			s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname,
 				    port, tref.u32);
@@ -3159,9 +3157,8 @@ void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
 		if (c->mode == MODE_PASTA) {
 			bind_addr = &in6addr_loopback;
 
-			tref.tcp.splice = 1;
 			s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname,
-				    port, tref.u32);
+				    port, tref_spliced.u32);
 			if (s >= 0)
 				tcp_sock_set_bufsize(c, s);
 			else
diff --git a/tcp_splice.c b/tcp_splice.c
index 96c31c8..bf5f62c 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -35,6 +35,7 @@
 #include <fcntl.h>
 #include <limits.h>
 #include <stdint.h>
+#include <stdbool.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
@@ -512,13 +513,18 @@ static int tcp_splice_connect_ns(void *arg)
 static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
 			  in_port_t port)
 {
-	struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
 	int *p, i, s = -1;
+	bool inbound;
 
-	if (bitmap_isset(c->tcp.fwd_in.map, port))
+	if (bitmap_isset(c->tcp.fwd_in.map, port)) {
 		p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
-	else
+		port += c->tcp.fwd_in.delta[port];
+		inbound = true;
+	} else {
 		p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
+		port += c->tcp.fwd_out.delta[port];
+		inbound = false;
+	}
 
 	for (i = 0; i < TCP_SOCK_POOL_SIZE; i++, p++) {
 		SWAP(s, *p);
@@ -526,11 +532,15 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
 			break;
 	}
 
-	if (s < 0 && bitmap_isset(c->tcp.fwd_in.map, port)) {
+	/* No socket available in namespace: create a new one for connect() */
+	if (s < 0 && inbound) {
+		struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
+
 		NS_CALL(tcp_splice_connect_ns, &ns_arg);
 		return ns_arg.ret;
 	}
 
+	/* Otherwise, the socket will connect on the side it was created on */
 	return tcp_splice_connect(c, conn, s, port);
 }
 
-- 
@@ -35,6 +35,7 @@
 #include <fcntl.h>
 #include <limits.h>
 #include <stdint.h>
+#include <stdbool.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
@@ -512,13 +513,18 @@ static int tcp_splice_connect_ns(void *arg)
 static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
 			  in_port_t port)
 {
-	struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
 	int *p, i, s = -1;
+	bool inbound;
 
-	if (bitmap_isset(c->tcp.fwd_in.map, port))
+	if (bitmap_isset(c->tcp.fwd_in.map, port)) {
 		p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
-	else
+		port += c->tcp.fwd_in.delta[port];
+		inbound = true;
+	} else {
 		p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
+		port += c->tcp.fwd_out.delta[port];
+		inbound = false;
+	}
 
 	for (i = 0; i < TCP_SOCK_POOL_SIZE; i++, p++) {
 		SWAP(s, *p);
@@ -526,11 +532,15 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
 			break;
 	}
 
-	if (s < 0 && bitmap_isset(c->tcp.fwd_in.map, port)) {
+	/* No socket available in namespace: create a new one for connect() */
+	if (s < 0 && inbound) {
+		struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
+
 		NS_CALL(tcp_splice_connect_ns, &ns_arg);
 		return ns_arg.ret;
 	}
 
+	/* Otherwise, the socket will connect on the side it was created on */
 	return tcp_splice_connect(c, conn, s, port);
 }
 
-- 
2.35.1


  parent reply	other threads:[~2022-10-10 23:33 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-10 23:33 [PATCH 0/3] Fixes for spliced connections Stefano Brivio
2022-10-10 23:33 ` [PATCH 1/3] tcp, tcp_splice: Adjust comments to current meaning of inbound and outbound Stefano Brivio
2022-10-11  0:57   ` David Gibson
2022-10-10 23:33 ` Stefano Brivio [this message]
2022-10-11  1:19   ` [PATCH 2/3] tcp, tcp_splice: Fix port remapping for inbound, spliced connections David Gibson
2022-10-11  7:42     ` Stefano Brivio
2022-10-10 23:33 ` [PATCH 3/3] tcp: Don't create 'tap' socket for ports that are bound to loopback only Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221010233350.1198630-3-sbrivio@redhat.com \
    --to=sbrivio@redhat.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).