public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 09/12] fwd, tcp, udp: Set up listening sockets based on forward table
Date: Sat, 20 Dec 2025 01:19:01 +1100	[thread overview]
Message-ID: <20251219141904.1758072-10-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20251219141904.1758072-1-david@gibson.dropbear.id.au>

Previously we created inbound listening sockets as we parsed the forwarding
options (-t, -u) whereas outbound listening sockets were created during
{tcp,udp}_init().  Now that we have a data structure recording the full
details of the listening options we can move socket creation to
{tcp,udp}_init().  This means that errors for either direction are
detected and reported the same way.

Introduce fwd_listen_sync() which synchronizes the state of listening
sockets to the forward table data structure, both for fixed and automatic
forwards.

This does cause a change in semantics for "exclude only" port
specifications.  Previously an option like -t ~6000 wouldn't cause a fatal
error, as long as we could bind at least one port.  Now, it requires at
least one port bound in each of the contiguous blocks of ports the
specification resolves to.  With typical ephemeral ports settings that's
one port each in 1..5999, 6001..32767 and 61000..65535.

Preserving the exact behaviour for this case would require a considerably
more complex data structure, so I'm hoping this is a sufficiently niche
case for the change to be acceptable.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 conf.c |  27 ----------
 fwd.c  | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 fwd.h  |   3 ++
 ip.c   |   1 -
 tcp.c  | 119 +------------------------------------------
 tcp.h  |   1 -
 udp.c  |  96 ++---------------------------------
 udp.h  |   1 -
 8 files changed, 160 insertions(+), 245 deletions(-)

diff --git a/conf.c b/conf.c
index 3d41a0fb..c0672be5 100644
--- a/conf.c
+++ b/conf.c
@@ -148,9 +148,7 @@ static void conf_ports_range_except(const struct ctx *c, char optname,
 				    uint8_t flags)
 {
 	unsigned delta = to - first;
-	bool bound_one = false;
 	unsigned base, i;
-	int fd;
 
 	if (first == 0) {
 		die("Can't forward port 0 for option '-%c %s'",
@@ -173,28 +171,6 @@ static void conf_ports_range_except(const struct ctx *c, char optname,
 			if (!(flags & FWD_SCAN))
 				bitmap_set(fwd->map, i);
 			fwd->delta[i] = delta;
-
-			if (!(flags & FWD_SCAN) && optname == 't')
-				fd = tcp_listen(c, PIF_HOST, addr, ifname, i);
-			else if (!(flags & FWD_SCAN) && optname == 'u')
-				fd = udp_listen(c, PIF_HOST, addr, ifname, i);
-			else
-				/* No way to check in advance for -T and -U */
-				fd = 0; /* dummy */
-
-			if (fd == -ENFILE || fd == -EMFILE) {
-				die(
-"Can't open enough sockets for port specifier: %s",
-				    optarg);
-			}
-
-			if (fd >= 0) {
-				bound_one = true;
-			} else if (!(flags & FWD_WEAK)) {
-				die(
-"Failed to bind port %u (%s) for option '-%c %s'",
-				    i, strerror_(-fd), optname, optarg);
-			}
 		}
 
 		if ((optname == 'T' || optname == 'U') && c->no_bindtodevice) {
@@ -220,9 +196,6 @@ static void conf_ports_range_except(const struct ctx *c, char optname,
 		}
 		base = i - 1;
 	}
-
-	if (!bound_one)
-		die("Failed to bind any port for '-%c %s'", optname, optarg);
 }
 
 /**
diff --git a/fwd.c b/fwd.c
index 5215cee9..21e852af 100644
--- a/fwd.c
+++ b/fwd.c
@@ -22,6 +22,7 @@
 #include <stdio.h>
 
 #include "util.h"
+#include "epoll_ctl.h"
 #include "ip.h"
 #include "siphash.h"
 #include "inany.h"
@@ -399,6 +400,148 @@ void fwd_table_print(const struct fwd_ports *fwd)
 	}
 }
 
+/** fwd_sync_one() - Create or remove listening sockets for a forward entry
+ * @c:		Execution context
+ * @fe:		Forwarding entry
+ * @pif:	Interface to create listening sockets for
+ * @proto:	Protocol to listen for
+ * @scanmap:	Bitmap of ports to listen for on FWD_SCAN entries
+ */
+static void fwd_sync_one(const struct ctx *c, const struct fwd_entry *fe,
+			 uint8_t pif, uint8_t proto, const uint8_t *scanmap)
+{
+	const union inany_addr *addr = &fe->addr;
+	const char *ifname = fe->ifname;
+	bool bound_one = false;
+	unsigned port;
+
+	ASSERT(pif_is_socket(pif));
+
+	if (fe->flags & FWD_DUAL_STACK)
+		addr = NULL;
+	if (!*ifname)
+		ifname = NULL;
+
+	for (port = fe->first; port <= fe->last; port++) {
+		int fd = fe->socks[port - fe->first];
+
+		if ((fe->flags & FWD_SCAN) && !bitmap_isset(scanmap, port)) {
+			/* We don't want to listen on this port */
+			if (fd >= 0) {
+				/* We already are, so stop */
+				epoll_del(c->epollfd, fd);
+				close(fd);
+				fe->socks[port - fe->first] = -1;
+			}
+			continue;
+		}
+
+		if (fd >= 0) /* Already listening, nothing to do */ {
+			bound_one = true;
+			continue;
+		}
+
+		if (proto == IPPROTO_TCP)
+			fd = tcp_listen(c, pif, addr, ifname, port);
+		else if (proto == IPPROTO_UDP)
+			fd = udp_listen(c, pif, addr, ifname, port);
+		else
+			ASSERT(0);
+
+		if (fd < 0) {
+			char astr[INANY_ADDRSTRLEN] = "";
+
+			if (addr)
+				inany_ntop(addr, astr, sizeof(astr));
+
+			warn("Listen failed for %s %s port %s%s%s%s%u: %s",
+			     pif_name(pif), ipproto_name(proto),
+			     astr, ifname ? "%" : "", ifname ? ifname : "",
+			     addr || ifname ? "/" : "", port, strerror_(-fd));
+
+			if (!(fe->flags & FWD_WEAK))
+				goto die;
+
+			continue;
+		}
+
+		fe->socks[port - fe->first] = fd;
+		bound_one = true;
+	}
+
+	if (!bound_one && !(fe->flags & FWD_SCAN)) {
+		char astr[INANY_ADDRSTRLEN] = "";
+
+		if (addr)
+			inany_ntop(addr, astr, sizeof(astr));
+
+		err("All listens failed for %s %s %s%s%s%s%u-%u",
+		    pif_name(pif), ipproto_name(proto),
+		    astr, ifname ? "%" : "", ifname ? ifname : "",
+		    addr || ifname ? "/" : "", fe->first, fe->last);
+		goto die;
+	}
+
+	return;
+
+die:
+	die("Couldn't listen on requested %s ports", ipproto_name(proto));
+}
+
+/** struct fwd_listen_args - arguments for fwd_listen_init_()
+ * @c:		Execution context
+ * @fwd:	Forwarding information
+ * @scanmap:	Bitmap of ports to auto-forward
+ * @pif:	Interface to create listening sockets for
+ * @proto:	Protocol
+ */
+struct fwd_listen_args {
+	const struct ctx *c;
+	const struct fwd_ports *fwd;
+	const uint8_t *scanmap;
+	uint8_t pif;
+	uint8_t proto;
+};
+
+/** fwd_listen_sync_() - Update listening sockets to match forwards
+ * @arg:	struct fwd_listen_args with arguments
+ *
+ * Returns: zero
+ */
+static int fwd_listen_sync_(void *arg)
+{
+	const struct fwd_listen_args *a = arg;
+	unsigned i;
+
+	if (a->pif == PIF_SPLICE)
+		ns_enter(a->c);
+
+	for (i = 0; i < a->fwd->count; i++)
+		fwd_sync_one(a->c, &a->fwd->tab[i], a->pif, a->proto,
+			     a->fwd->map);
+
+	return 0;
+}
+
+/** fwd_listen_sync() - Update listening sockets to match forwards
+ * @c:		Execution context
+ * @fwd:	Forwarding information
+ * @pif:	Interface to create listening sockets for
+ * @proto:	Protocol
+ */
+void fwd_listen_sync(const struct ctx *c, const struct fwd_ports *fwd,
+		     uint8_t pif, uint8_t proto)
+{
+	struct fwd_listen_args a = {
+		.c = c, .fwd = fwd, .pif = pif, .proto = proto,
+	};
+
+	if (pif == PIF_SPLICE)
+		NS_CALL(fwd_listen_sync_, &a);
+	else
+		fwd_listen_sync_(&a);
+}
+
 /* See enum in kernel's include/net/tcp_states.h */
 #define UDP_LISTEN	0x07
 #define TCP_LISTEN	0x0a
@@ -506,7 +649,7 @@ static void fwd_scan_ports(struct ctx *c)
 }
 
 /**
- * fwd_scan_ports_init() - Initial setup for automatic port forwarding
+ * fwd_scan_ports_init() - Initial setup for port forwarding
  * @c:		Execution context
  */
 void fwd_scan_ports_init(struct ctx *c)
@@ -557,10 +700,14 @@ void fwd_scan_ports_timer(struct ctx *c, const struct timespec *now)
 
 	fwd_scan_ports(c);
 
-	if (!c->no_tcp)
-		tcp_port_rebind_all(c);
-	if (!c->no_udp)
-		udp_port_rebind_all(c);
+	if (!c->no_tcp) {
+		fwd_listen_sync(c, &c->tcp.fwd_in, PIF_HOST, IPPROTO_TCP);
+		fwd_listen_sync(c, &c->tcp.fwd_out, PIF_SPLICE, IPPROTO_TCP);
+	}
+	if (!c->no_udp) {
+		fwd_listen_sync(c, &c->udp.fwd_in, PIF_HOST, IPPROTO_UDP);
+		fwd_listen_sync(c, &c->udp.fwd_out, PIF_SPLICE, IPPROTO_UDP);
+	}
 }
 
 /**
diff --git a/fwd.h b/fwd.h
index 84c463e2..3f3b111c 100644
--- a/fwd.h
+++ b/fwd.h
@@ -91,6 +91,9 @@ void fwd_table_print(const struct fwd_ports *fwd);
 void fwd_scan_ports_init(struct ctx *c);
 void fwd_scan_ports_timer(struct ctx * c, const struct timespec *now);
 
+void fwd_listen_sync(const struct ctx *c, const struct fwd_ports *fwd,
+		     uint8_t pif, uint8_t proto);
+
 bool nat_inbound(const struct ctx *c, const union inany_addr *addr,
 		 union inany_addr *translated);
 uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto,
diff --git a/ip.c b/ip.c
index f1d224bd..fc26dab2 100644
--- a/ip.c
+++ b/ip.c
@@ -78,7 +78,6 @@ found:
  * /etc/protocols and might allocate, which isn't possible for us once
  * self-isolated.
  */
-/* cppcheck-suppress unusedFunction */
 const char *ipproto_name(uint8_t proto)
 {
 	switch (proto) {
diff --git a/tcp.c b/tcp.c
index e52f5420..06f58b10 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2734,50 +2734,6 @@ int tcp_listen(const struct ctx *c, uint8_t pif,
 	return s;
 }
 
-/**
- * tcp_ns_listen() - Init socket to listen for spliced outbound connections
- * @c:		Execution context
- * @port:	Port, host order
- */
-static void tcp_ns_listen(const struct ctx *c, in_port_t port)
-{
-	ASSERT(!c->no_tcp);
-
-	if (!c->no_bindtodevice) {
-		tcp_listen(c, PIF_SPLICE, NULL, "lo", port);
-		return;
-	}
-
-	if (c->ifi4)
-		tcp_listen(c, PIF_SPLICE, &inany_loopback4, NULL, port);
-	if (c->ifi6)
-		tcp_listen(c, PIF_SPLICE, &inany_loopback6, NULL, port);
-}
-
-/**
- * tcp_ns_socks_init() - Bind sockets in namespace for outbound connections
- * @arg:	Execution context
- *
- * Return: 0
- */
-/* cppcheck-suppress [constParameterCallback, unmatchedSuppression] */
-static int tcp_ns_socks_init(void *arg)
-{
-	const struct ctx *c = (const struct ctx *)arg;
-	unsigned port;
-
-	ns_enter(c);
-
-	for (port = 0; port < NUM_PORTS; port++) {
-		if (!bitmap_isset(c->tcp.fwd_out.map, port))
-			continue;
-
-		tcp_ns_listen(c, port);
-	}
-
-	return 0;
-}
-
 /**
  * tcp_sock_refill_pool() - Refill one pool of pre-opened sockets
  * @pool:	Pool of sockets to refill
@@ -2921,10 +2877,10 @@ int tcp_init(struct ctx *c)
 
 	tcp_sock_refill_init(c);
 
+	fwd_listen_sync(c, &c->tcp.fwd_in, PIF_HOST, IPPROTO_TCP);
 	if (c->mode == MODE_PASTA) {
 		tcp_splice_init(c);
-
-		NS_CALL(tcp_ns_socks_init, c);
+		fwd_listen_sync(c, &c->tcp.fwd_out, PIF_SPLICE, IPPROTO_TCP);
 	}
 
 	peek_offset_cap = (!c->ifi4 || tcp_probe_peek_offset_cap(AF_INET)) &&
@@ -2943,77 +2899,6 @@ int tcp_init(struct ctx *c)
 	return 0;
 }
 
-/**
- * tcp_port_rebind() - Rebind ports to match forward maps
- * @c:		Execution context
- * @outbound:	True to remap outbound forwards, otherwise inbound
- *
- * Must be called in namespace context if @outbound is true.
- */
-static void tcp_port_rebind(struct ctx *c, bool outbound)
-{
-	const uint8_t *fmap = outbound ? c->tcp.fwd_out.map : c->tcp.fwd_in.map;
-	int (*socks)[IP_VERSIONS] = outbound ? tcp_sock_ns : tcp_sock_init_ext;
-	unsigned port;
-
-	for (port = 0; port < NUM_PORTS; port++) {
-		if (!bitmap_isset(fmap, port)) {
-			if (socks[port][V4] >= 0) {
-				close(socks[port][V4]);
-				socks[port][V4] = -1;
-			}
-
-			if (socks[port][V6] >= 0) {
-				close(socks[port][V6]);
-				socks[port][V6] = -1;
-			}
-
-			continue;
-		}
-
-		if ((c->ifi4 && socks[port][V4] == -1) ||
-		    (c->ifi6 && socks[port][V6] == -1)) {
-			if (outbound)
-				tcp_ns_listen(c, port);
-			else
-				tcp_listen(c, PIF_HOST, NULL, NULL, port);
-		}
-	}
-}
-
-/**
- * tcp_port_rebind_outbound() - Rebind ports in namespace
- * @arg:	Execution context
- *
- * Called with NS_CALL()
- *
- * Return: 0
- */
-static int tcp_port_rebind_outbound(void *arg)
-{
-	struct ctx *c = (struct ctx *)arg;
-
-	ns_enter(c);
-	tcp_port_rebind(c, true);
-
-	return 0;
-}
-
-/**
- * tcp_port_rebind_all() - Rebind ports to match forward maps (in host & ns)
- * @c:		Execution context
- */
-void tcp_port_rebind_all(struct ctx *c)
-{
-	ASSERT(c->mode == MODE_PASTA && !c->no_tcp);
-
-	if (c->tcp.fwd_out.mode == FWD_AUTO)
-		NS_CALL(tcp_port_rebind_outbound, c);
-
-	if (c->tcp.fwd_in.mode == FWD_AUTO)
-		tcp_port_rebind(c, false);
-}
-
 /**
  * tcp_timer() - Periodic tasks: port detection, closed connections, pool refill
  * @c:		Execution context
diff --git a/tcp.h b/tcp.h
index 9dd88762..8b44e321 100644
--- a/tcp.h
+++ b/tcp.h
@@ -22,7 +22,6 @@ int tcp_listen(const struct ctx *c, uint8_t pif,
 	       const union inany_addr *addr, const char *ifname,
 	       in_port_t port);
 int tcp_init(struct ctx *c);
-void tcp_port_rebind_all(struct ctx *c);
 void tcp_timer(const struct ctx *c, const struct timespec *now);
 void tcp_defer_handler(struct ctx *c);
 
diff --git a/udp.c b/udp.c
index 6168c36c..adcd2d4a 100644
--- a/udp.c
+++ b/udp.c
@@ -1205,98 +1205,6 @@ static void udp_splice_iov_init(void)
 	}
 }
 
-/**
- * udp_ns_listen() - Init socket to listen for spliced outbound connections
- * @c:		Execution context
- * @port:	Port, host order
- */
-static void udp_ns_listen(const struct ctx *c, in_port_t port)
-{
-	ASSERT(!c->no_udp);
-
-	if (!c->no_bindtodevice) {
-		udp_listen(c, PIF_SPLICE, NULL, "lo", port);
-		return;
-	}
-
-	if (c->ifi4)
-		udp_listen(c, PIF_SPLICE, &inany_loopback4, NULL, port);
-	if (c->ifi6)
-		udp_listen(c, PIF_SPLICE, &inany_loopback6, NULL, port);
-}
-
-/**
- * udp_port_rebind() - Rebind ports to match forward maps
- * @c:		Execution context
- * @outbound:	True to remap outbound forwards, otherwise inbound
- *
- * Must be called in namespace context if @outbound is true.
- */
-static void udp_port_rebind(struct ctx *c, bool outbound)
-{
-	int (*socks)[NUM_PORTS] = outbound ? udp_splice_ns : udp_splice_init;
-	const uint8_t *fmap
-		= outbound ? c->udp.fwd_out.map : c->udp.fwd_in.map;
-	unsigned port;
-
-	for (port = 0; port < NUM_PORTS; port++) {
-		if (!bitmap_isset(fmap, port)) {
-			if (socks[V4][port] >= 0) {
-				close(socks[V4][port]);
-				socks[V4][port] = -1;
-			}
-
-			if (socks[V6][port] >= 0) {
-				close(socks[V6][port]);
-				socks[V6][port] = -1;
-			}
-
-			continue;
-		}
-
-		if ((c->ifi4 && socks[V4][port] == -1) ||
-		    (c->ifi6 && socks[V6][port] == -1)) {
-			if (outbound)
-				udp_ns_listen(c, port);
-			else
-				udp_listen(c, PIF_HOST, NULL, NULL, port);
-		}
-	}
-}
-
-/**
- * udp_port_rebind_outbound() - Rebind ports in namespace
- * @arg:	Execution context
- *
- * Called with NS_CALL()
- *
- * Return: 0
- */
-static int udp_port_rebind_outbound(void *arg)
-{
-	struct ctx *c = (struct ctx *)arg;
-
-	ns_enter(c);
-	udp_port_rebind(c, true);
-
-	return 0;
-}
-
-/**
- * udp_port_rebind_all() - Rebind ports to match forward maps (in host & ns)
- * @c:		Execution context
- */
-void udp_port_rebind_all(struct ctx *c)
-{
-	ASSERT(c->mode == MODE_PASTA && !c->no_udp);
-
-	if (c->udp.fwd_out.mode == FWD_AUTO)
-		NS_CALL(udp_port_rebind_outbound, c);
-
-	if (c->udp.fwd_in.mode == FWD_AUTO)
-		udp_port_rebind(c, false);
-}
-
 /**
  * udp_init() - Initialise per-socket data, and sockets in namespace
  * @c:		Execution context
@@ -1309,9 +1217,11 @@ int udp_init(struct ctx *c)
 
 	udp_iov_init(c);
 
+	fwd_listen_sync(c, &c->udp.fwd_in, PIF_HOST, IPPROTO_UDP);
+
 	if (c->mode == MODE_PASTA) {
 		udp_splice_iov_init();
-		NS_CALL(udp_port_rebind_outbound, c);
+		fwd_listen_sync(c, &c->udp.fwd_out, PIF_SPLICE, IPPROTO_UDP);
 	}
 
 	return 0;
diff --git a/udp.h b/udp.h
index 5407db3b..f1a7a026 100644
--- a/udp.h
+++ b/udp.h
@@ -19,7 +19,6 @@ int udp_listen(const struct ctx *c, uint8_t pif,
 	       const union inany_addr *addr, const char *ifname,
 	       in_port_t port);
 int udp_init(struct ctx *c);
-void udp_port_rebind_all(struct ctx *c);
 void udp_update_l2_buf(const unsigned char *eth_d);
 
 /**
-- 
2.52.0


  parent reply	other threads:[~2025-12-19 14:19 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-19 14:18 [PATCH v2 00/12] RFC: Improve forwarding data structure David Gibson
2025-12-19 14:18 ` [PATCH v2 01/12] tcp: Combine tcp_sock_init_one() and tcp_sock_init() into tcp_listen() David Gibson
2025-12-19 14:18 ` [PATCH v2 02/12] udp: Rename udp_sock_init() to udp_listen() with small cleanups David Gibson
2025-12-19 14:18 ` [PATCH v2 03/12] conf, fwd: Keep a table of our port forwarding configuration David Gibson
2025-12-19 14:18 ` [PATCH v2 04/12] conf: Accurately record ifname and address for outbound forwards David Gibson
2025-12-19 14:18 ` [PATCH v2 05/12] conf, fwd: Record "auto" port forwards in forwarding table David Gibson
2025-12-19 14:18 ` [PATCH v2 06/12] tcp, udp: Make {tcp,udp}_listen() return socket fds David Gibson
2025-12-19 14:18 ` [PATCH v2 07/12] fwd: Make space to store listening sockets in forward table David Gibson
2025-12-19 14:19 ` [PATCH v2 08/12] ip: Add ipproto_name() function David Gibson
2025-12-19 14:19 ` David Gibson [this message]
2025-12-19 14:19 ` [PATCH v2 10/12] tcp, udp: Remove old auto-forwarding socket arrays David Gibson
2025-12-19 14:19 ` [PATCH v2 11/12] fwd: Generate auto-forward exclusions from socket fd tables David Gibson
2025-12-19 14:19 ` [PATCH v2 12/12] tcp: Remove unused tcp_epoll_ref David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251219141904.1758072-10-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).