public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v3 07/14] fwd, tcp, udp: Set up listening sockets based on forward table
Date: Thu,  8 Jan 2026 13:29:41 +1100	[thread overview]
Message-ID: <20260108022948.2657573-8-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20260108022948.2657573-1-david@gibson.dropbear.id.au>

Previously we created inbound listening sockets as we parsed the forwarding
options (-t, -u) whereas outbound listening sockets were created during
{tcp,udp}_init().  Now that we have a data structure recording the full
details of the listening options we can move all listening socket creation
to {tcp,udp}_init().  This means that errors for either direction are
detected and reported the same way.

Introduce fwd_listen_sync() which synchronizes the state of listening
sockets to the forwarding rules table, both for fixed and automatic
forwards.

This does cause a change in semantics for "exclude only" port
specifications.  Previously an option like -t ~6000 wouldn't cause a
fatal error, as long as we could bind at least one port.  Now, it
requires at least one port for each generated rule; that is for each
of the contiguous blocks of ports the specification resolves to.  With
typical ephemeral ports settings that's one port each in 1..5999,
6001..32767 and 61000..65535.

Preserving the exact behaviour for this case would require a considerably
more complex data structure, so I'm hoping this is a sufficiently niche
case for the change to be acceptable.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 conf.c |  27 ----------
 fwd.c  | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 fwd.h  |   3 ++
 ip.c   |   1 -
 tcp.c  | 122 ++---------------------------------------
 tcp.h  |   1 -
 udp.c  |  99 +++-------------------------------
 udp.h  |   1 -
 8 files changed, 177 insertions(+), 244 deletions(-)

diff --git a/conf.c b/conf.c
index 0bcf80d7..57693b3f 100644
--- a/conf.c
+++ b/conf.c
@@ -148,9 +148,7 @@ static void conf_ports_range_except(const struct ctx *c, char optname,
 				    uint8_t flags)
 {
 	unsigned delta = to - first;
-	bool bound_one = false;
 	unsigned base, i;
-	int fd;
 
 	if (first == 0) {
 		die("Can't forward port 0 for option '-%c %s'",
@@ -179,28 +177,6 @@ static void conf_ports_range_except(const struct ctx *c, char optname,
 				warn(
 "Altering mapping of already mapped port number: %s", optarg);
 			}
-
-			if (!(flags & FWD_SCAN) && optname == 't')
-				fd = tcp_listen(c, PIF_HOST, addr, ifname, i);
-			else if (!(flags & FWD_SCAN) && optname == 'u')
-				fd = udp_listen(c, PIF_HOST, addr, ifname, i);
-			else
-				/* No way to check in advance for -T and -U */
-				fd = 0;
-
-			if (fd == -ENFILE || fd == -EMFILE) {
-				die(
-"Can't open enough sockets for port specifier: %s",
-				    optarg);
-			}
-
-			if (fd >= 0) {
-				bound_one = true;
-			} else if (!(flags & FWD_WEAK)) {
-				die(
-"Failed to bind port %u (%s) for option '-%c %s'",
-				    i, strerror_(-fd), optname, optarg);
-			}
 		}
 
 		if ((optname == 'T' || optname == 'U') && c->no_bindtodevice) {
@@ -226,9 +202,6 @@ static void conf_ports_range_except(const struct ctx *c, char optname,
 		}
 		base = i - 1;
 	}
-
-	if (!bound_one)
-		die("Failed to bind any port for '-%c %s'", optname, optarg);
 }
 
 /**
diff --git a/fwd.c b/fwd.c
index f27a4220..70ef73a3 100644
--- a/fwd.c
+++ b/fwd.c
@@ -22,6 +22,7 @@
 #include <stdio.h>
 
 #include "util.h"
+#include "epoll_ctl.h"
 #include "ip.h"
 #include "siphash.h"
 #include "inany.h"
@@ -420,6 +421,160 @@ void fwd_rules_print(const struct fwd_ports *fwd)
 	}
 }
 
+/** fwd_sync_one() - Create or remove listening sockets for a forward entry
+ * @c:		Execution context
+ * @rule:	Forwarding rule
+ * @pif:	Interface to create listening sockets for
+ * @proto:	Protocol to listen for
+ * @scanmap:	Bitmap of ports to listen for on FWD_SCAN entries
+ *
+ * Return: 0 on success, -1 on failure
+ */
+static int fwd_sync_one(const struct ctx *c, const struct fwd_rule *rule,
+			uint8_t pif, uint8_t proto, const uint8_t *scanmap)
+{
+	const union inany_addr *addr = fwd_rule_addr(rule);
+	const char *ifname = rule->ifname;
+	bool bound_one = false;
+	unsigned port;
+
+	ASSERT(pif_is_socket(pif));
+
+	if (!*ifname)
+		ifname = NULL;
+
+	for (port = rule->first; port <= rule->last; port++) {
+		int fd = rule->socks[port - rule->first];
+
+		if ((rule->flags & FWD_SCAN) && !bitmap_isset(scanmap, port)) {
+			/* We don't want to listen on this port */
+			if (fd >= 0) {
+				/* We already are, so stop */
+				epoll_del(c->epollfd, fd);
+				close(fd);
+				rule->socks[port - rule->first] = -1;
+			}
+			continue;
+		}
+
+		if (fd >= 0) /* Already listening, nothing to do */ {
+			bound_one = true;
+			continue;
+		}
+
+		if (proto == IPPROTO_TCP)
+			fd = tcp_listen(c, pif, addr, ifname, port);
+		else if (proto == IPPROTO_UDP)
+			fd = udp_listen(c, pif, addr, ifname, port);
+		else
+			ASSERT(0);
+
+		if (fd < 0) {
+			char astr[INANY_ADDRSTRLEN] = "";
+
+			if (addr)
+				inany_ntop(addr, astr, sizeof(astr));
+
+			warn("Listen failed for %s %s port %s%s%s%s%u: %s",
+			     pif_name(pif), ipproto_name(proto),
+			     astr, ifname ? "%" : "", ifname ? ifname : "",
+			     addr || ifname ? "/" : "", port, strerror_(-fd));
+
+			if (!(rule->flags & FWD_WEAK))
+				return -1;
+
+			continue;
+		}
+
+		rule->socks[port - rule->first] = fd;
+		bound_one = true;
+	}
+
+	if (!bound_one && !(rule->flags & FWD_SCAN)) {
+		char astr[INANY_ADDRSTRLEN] = "";
+
+		if (addr)
+			inany_ntop(addr, astr, sizeof(astr));
+
+		warn("All listens failed for %s %s %s%s%s%s%u-%u",
+		     pif_name(pif), ipproto_name(proto),
+		     astr, ifname ? "%" : "", ifname ? ifname : "",
+		     addr || ifname ? "/" : "", rule->first, rule->last);
+		return -1;
+	}
+
+	return 0;
+}
+
+/** struct fwd_listen_args - arguments for fwd_listen_init_()
+ * @c:		Execution context
+ * @fwd:	Forwarding information
+ * @scanmap:	Bitmap of ports to auto-forward
+ * @pif:	Interface to create listening sockets for
+ * @proto:	Protocol
+ * @ret:	Return code
+ */
+struct fwd_listen_args {
+	const struct ctx *c;
+	const struct fwd_ports *fwd;
+	const uint8_t *scanmap;
+	uint8_t pif;
+	uint8_t proto;
+	int ret;
+};
+
+/** fwd_listen_sync_() - Update listening sockets to match forwards
+ * @arg:	struct fwd_listen_args with arguments
+ *
+ * Returns: zero
+ */
+static int fwd_listen_sync_(void *arg)
+{
+	struct fwd_listen_args *a = arg;
+	unsigned i;
+
+	if (a->pif == PIF_SPLICE)
+		ns_enter(a->c);
+
+	for (i = 0; i < a->fwd->count; i++) {
+		a->ret = fwd_sync_one(a->c, &a->fwd->rules[i],
+				      a->pif, a->proto, a->fwd->map);
+		if (a->ret < 0)
+			break;
+	}
+
+	return 0;
+}
+
+/** fwd_listen_sync() - Update listening sockets to match forwards
+ * @c:		Execution context
+ * @fwd:	Forwarding information
+ * @pif:	Interface to create listening sockets for
+ * @proto:	Protocol
+ *
+ * Return: 0 on success, -1 on failure
+ */
+int fwd_listen_sync(const struct ctx *c, const struct fwd_ports *fwd,
+		    uint8_t pif, uint8_t proto)
+{
+	struct fwd_listen_args a = {
+		.c = c, .fwd = fwd, .pif = pif, .proto = proto,
+	};
+
+	if (pif == PIF_SPLICE)
+		NS_CALL(fwd_listen_sync_, &a);
+	else
+		fwd_listen_sync_(&a);
+
+	if (a.ret < 0) {
+		err("Couldn't listen on requested %s ports",
+		    ipproto_name(proto));
+		return -1;
+	}
+
+	return 0;
+}
+
 /* See enum in kernel's include/net/tcp_states.h */
 #define UDP_LISTEN	0x07
 #define TCP_LISTEN	0x0a
@@ -578,10 +733,14 @@ void fwd_scan_ports_timer(struct ctx *c, const struct timespec *now)
 
 	fwd_scan_ports(c);
 
-	if (!c->no_tcp)
-		tcp_port_rebind_all(c);
-	if (!c->no_udp)
-		udp_port_rebind_all(c);
+	if (!c->no_tcp) {
+		fwd_listen_sync(c, &c->tcp.fwd_in, PIF_HOST, IPPROTO_TCP);
+		fwd_listen_sync(c, &c->tcp.fwd_out, PIF_SPLICE, IPPROTO_TCP);
+	}
+	if (!c->no_udp) {
+		fwd_listen_sync(c, &c->udp.fwd_in, PIF_HOST, IPPROTO_UDP);
+		fwd_listen_sync(c, &c->udp.fwd_out, PIF_SPLICE, IPPROTO_UDP);
+	}
 }
 
 /**
diff --git a/fwd.h b/fwd.h
index 3ddcb91d..f84e7c01 100644
--- a/fwd.h
+++ b/fwd.h
@@ -108,6 +108,9 @@ void fwd_rules_print(const struct fwd_ports *fwd);
 void fwd_scan_ports_init(struct ctx *c);
 void fwd_scan_ports_timer(struct ctx * c, const struct timespec *now);
 
+int fwd_listen_sync(const struct ctx *c, const struct fwd_ports *fwd,
+		    uint8_t pif, uint8_t proto);
+
 bool nat_inbound(const struct ctx *c, const union inany_addr *addr,
 		 union inany_addr *translated);
 uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto,
diff --git a/ip.c b/ip.c
index f1d224bd..fc26dab2 100644
--- a/ip.c
+++ b/ip.c
@@ -78,7 +78,6 @@ found:
  * /etc/protocols and might allocate, which isn't possible for us once
  * self-isolated.
  */
-/* cppcheck-suppress unusedFunction */
 const char *ipproto_name(uint8_t proto)
 {
 	switch (proto) {
diff --git a/tcp.c b/tcp.c
index 57faed4b..976f0ab7 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2732,50 +2732,6 @@ int tcp_listen(const struct ctx *c, uint8_t pif,
 	return s;
 }
 
-/**
- * tcp_ns_listen() - Init socket to listen for spliced outbound connections
- * @c:		Execution context
- * @port:	Port, host order
- */
-static void tcp_ns_listen(const struct ctx *c, in_port_t port)
-{
-	ASSERT(!c->no_tcp);
-
-	if (!c->no_bindtodevice) {
-		tcp_listen(c, PIF_SPLICE, NULL, "lo", port);
-		return;
-	}
-
-	if (c->ifi4)
-		tcp_listen(c, PIF_SPLICE, &inany_loopback4, NULL, port);
-	if (c->ifi6)
-		tcp_listen(c, PIF_SPLICE, &inany_loopback6, NULL, port);
-}
-
-/**
- * tcp_ns_socks_init() - Bind sockets in namespace for outbound connections
- * @arg:	Execution context
- *
- * Return: 0
- */
-/* cppcheck-suppress [constParameterCallback, unmatchedSuppression] */
-static int tcp_ns_socks_init(void *arg)
-{
-	const struct ctx *c = (const struct ctx *)arg;
-	unsigned port;
-
-	ns_enter(c);
-
-	for (port = 0; port < NUM_PORTS; port++) {
-		if (!bitmap_isset(c->tcp.fwd_out.map, port))
-			continue;
-
-		tcp_ns_listen(c, port);
-	}
-
-	return 0;
-}
-
 /**
  * tcp_sock_refill_pool() - Refill one pool of pre-opened sockets
  * @pool:	Pool of sockets to refill
@@ -2919,10 +2875,13 @@ int tcp_init(struct ctx *c)
 
 	tcp_sock_refill_init(c);
 
+	if (fwd_listen_sync(c, &c->tcp.fwd_in, PIF_HOST, IPPROTO_TCP) < 0)
+		return -1;
 	if (c->mode == MODE_PASTA) {
 		tcp_splice_init(c);
-
-		NS_CALL(tcp_ns_socks_init, c);
+		if (fwd_listen_sync(c, &c->tcp.fwd_out,
+				    PIF_SPLICE, IPPROTO_TCP) < 0)
+			return -1;
 	}
 
 	peek_offset_cap = (!c->ifi4 || tcp_probe_peek_offset_cap(AF_INET)) &&
@@ -2941,77 +2900,6 @@ int tcp_init(struct ctx *c)
 	return 0;
 }
 
-/**
- * tcp_port_rebind() - Rebind ports to match forward maps
- * @c:		Execution context
- * @outbound:	True to remap outbound forwards, otherwise inbound
- *
- * Must be called in namespace context if @outbound is true.
- */
-static void tcp_port_rebind(struct ctx *c, bool outbound)
-{
-	const uint8_t *fmap = outbound ? c->tcp.fwd_out.map : c->tcp.fwd_in.map;
-	int (*socks)[IP_VERSIONS] = outbound ? tcp_sock_ns : tcp_sock_init_ext;
-	unsigned port;
-
-	for (port = 0; port < NUM_PORTS; port++) {
-		if (!bitmap_isset(fmap, port)) {
-			if (socks[port][V4] >= 0) {
-				close(socks[port][V4]);
-				socks[port][V4] = -1;
-			}
-
-			if (socks[port][V6] >= 0) {
-				close(socks[port][V6]);
-				socks[port][V6] = -1;
-			}
-
-			continue;
-		}
-
-		if ((c->ifi4 && socks[port][V4] == -1) ||
-		    (c->ifi6 && socks[port][V6] == -1)) {
-			if (outbound)
-				tcp_ns_listen(c, port);
-			else
-				tcp_listen(c, PIF_HOST, NULL, NULL, port);
-		}
-	}
-}
-
-/**
- * tcp_port_rebind_outbound() - Rebind ports in namespace
- * @arg:	Execution context
- *
- * Called with NS_CALL()
- *
- * Return: 0
- */
-static int tcp_port_rebind_outbound(void *arg)
-{
-	struct ctx *c = (struct ctx *)arg;
-
-	ns_enter(c);
-	tcp_port_rebind(c, true);
-
-	return 0;
-}
-
-/**
- * tcp_port_rebind_all() - Rebind ports to match forward maps (in host & ns)
- * @c:		Execution context
- */
-void tcp_port_rebind_all(struct ctx *c)
-{
-	ASSERT(c->mode == MODE_PASTA && !c->no_tcp);
-
-	if (c->tcp.fwd_out.mode == FWD_AUTO)
-		NS_CALL(tcp_port_rebind_outbound, c);
-
-	if (c->tcp.fwd_in.mode == FWD_AUTO)
-		tcp_port_rebind(c, false);
-}
-
 /**
  * tcp_timer() - Periodic tasks: port detection, closed connections, pool refill
  * @c:		Execution context
diff --git a/tcp.h b/tcp.h
index ef1e3544..45f97d93 100644
--- a/tcp.h
+++ b/tcp.h
@@ -22,7 +22,6 @@ int tcp_listen(const struct ctx *c, uint8_t pif,
 	       const union inany_addr *addr, const char *ifname,
 	       in_port_t port);
 int tcp_init(struct ctx *c);
-void tcp_port_rebind_all(struct ctx *c);
 void tcp_timer(const struct ctx *c, const struct timespec *now);
 void tcp_defer_handler(struct ctx *c);
 
diff --git a/udp.c b/udp.c
index d7dcb1d2..7c5546df 100644
--- a/udp.c
+++ b/udp.c
@@ -1203,98 +1203,6 @@ static void udp_splice_iov_init(void)
 	}
 }
 
-/**
- * udp_ns_listen() - Init socket to listen for spliced outbound connections
- * @c:		Execution context
- * @port:	Port, host order
- */
-static void udp_ns_listen(const struct ctx *c, in_port_t port)
-{
-	ASSERT(!c->no_udp);
-
-	if (!c->no_bindtodevice) {
-		udp_listen(c, PIF_SPLICE, NULL, "lo", port);
-		return;
-	}
-
-	if (c->ifi4)
-		udp_listen(c, PIF_SPLICE, &inany_loopback4, NULL, port);
-	if (c->ifi6)
-		udp_listen(c, PIF_SPLICE, &inany_loopback6, NULL, port);
-}
-
-/**
- * udp_port_rebind() - Rebind ports to match forward maps
- * @c:		Execution context
- * @outbound:	True to remap outbound forwards, otherwise inbound
- *
- * Must be called in namespace context if @outbound is true.
- */
-static void udp_port_rebind(struct ctx *c, bool outbound)
-{
-	int (*socks)[NUM_PORTS] = outbound ? udp_splice_ns : udp_splice_init;
-	const uint8_t *fmap
-		= outbound ? c->udp.fwd_out.map : c->udp.fwd_in.map;
-	unsigned port;
-
-	for (port = 0; port < NUM_PORTS; port++) {
-		if (!bitmap_isset(fmap, port)) {
-			if (socks[V4][port] >= 0) {
-				close(socks[V4][port]);
-				socks[V4][port] = -1;
-			}
-
-			if (socks[V6][port] >= 0) {
-				close(socks[V6][port]);
-				socks[V6][port] = -1;
-			}
-
-			continue;
-		}
-
-		if ((c->ifi4 && socks[V4][port] == -1) ||
-		    (c->ifi6 && socks[V6][port] == -1)) {
-			if (outbound)
-				udp_ns_listen(c, port);
-			else
-				udp_listen(c, PIF_HOST, NULL, NULL, port);
-		}
-	}
-}
-
-/**
- * udp_port_rebind_outbound() - Rebind ports in namespace
- * @arg:	Execution context
- *
- * Called with NS_CALL()
- *
- * Return: 0
- */
-static int udp_port_rebind_outbound(void *arg)
-{
-	struct ctx *c = (struct ctx *)arg;
-
-	ns_enter(c);
-	udp_port_rebind(c, true);
-
-	return 0;
-}
-
-/**
- * udp_port_rebind_all() - Rebind ports to match forward maps (in host & ns)
- * @c:		Execution context
- */
-void udp_port_rebind_all(struct ctx *c)
-{
-	ASSERT(c->mode == MODE_PASTA && !c->no_udp);
-
-	if (c->udp.fwd_out.mode == FWD_AUTO)
-		NS_CALL(udp_port_rebind_outbound, c);
-
-	if (c->udp.fwd_in.mode == FWD_AUTO)
-		udp_port_rebind(c, false);
-}
-
 /**
  * udp_init() - Initialise per-socket data, and sockets in namespace
  * @c:		Execution context
@@ -1307,9 +1215,14 @@ int udp_init(struct ctx *c)
 
 	udp_iov_init(c);
 
+	if (fwd_listen_sync(c, &c->udp.fwd_in, PIF_HOST, IPPROTO_UDP) < 0)
+		return -1;
+
 	if (c->mode == MODE_PASTA) {
 		udp_splice_iov_init();
-		NS_CALL(udp_port_rebind_outbound, c);
+		if (fwd_listen_sync(c, &c->udp.fwd_out,
+				    PIF_SPLICE, IPPROTO_UDP) < 0)
+			return -1;
 	}
 
 	return 0;
diff --git a/udp.h b/udp.h
index 94c698e2..73efe036 100644
--- a/udp.h
+++ b/udp.h
@@ -19,7 +19,6 @@ int udp_listen(const struct ctx *c, uint8_t pif,
 	       const union inany_addr *addr, const char *ifname,
 	       in_port_t port);
 int udp_init(struct ctx *c);
-void udp_port_rebind_all(struct ctx *c);
 void udp_update_l2_buf(const unsigned char *eth_d);
 
 /**
-- 
2.52.0


  parent reply	other threads:[~2026-01-08  2:29 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-08  2:29 [PATCH v3 00/14] Introduce forwarding table David Gibson
2026-01-08  2:29 ` [PATCH v3 01/14] inany: Extend inany_ntop() to treat NULL as a fully unspecified address David Gibson
2026-01-08 13:16   ` Laurent Vivier
2026-01-08  2:29 ` [PATCH v3 02/14] conf, fwd: Keep a table of our port forwarding configuration David Gibson
2026-01-08  2:29 ` [PATCH v3 03/14] conf: Accurately record ifname and address for outbound forwards David Gibson
2026-01-08  2:29 ` [PATCH v3 04/14] conf, fwd: Record "auto" port forwards in forwarding table David Gibson
2026-01-08  2:29 ` [PATCH v3 05/14] fwd: Make space to store listening sockets in forward table David Gibson
2026-01-08  2:29 ` [PATCH v3 06/14] ip: Add ipproto_name() function David Gibson
2026-01-08 13:22   ` Laurent Vivier
2026-01-08 23:12     ` David Gibson
2026-01-08  2:29 ` David Gibson [this message]
2026-01-08  2:29 ` [PATCH v3 08/14] tcp, udp: Remove old auto-forwarding socket arrays David Gibson
2026-01-08  2:29 ` [PATCH v3 09/14] conf, fwd: Check forwarding table for conflicting rules David Gibson
2026-01-08  2:29 ` [PATCH v3 10/14] fwd: Generate auto-forward exclusions from socket fd tables David Gibson
2026-01-08  2:29 ` [PATCH v3 11/14] flow, fwd: Consult rules table when forwarding a new flow from socket David Gibson
2026-01-08  2:29 ` [PATCH v3 12/14] fwd: Remap ports based directly on forwarding rule David Gibson
2026-01-08  2:29 ` [PATCH v3 13/14] fwd, tcp, udp: Add forwarding rule to listening socket epoll references David Gibson
2026-01-08  2:29 ` [PATCH v3 14/14] flow, fwd: Optimise forwarding rule lookup using epoll ref when possible David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260108022948.2657573-8-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).