On Tue, Jan 13, 2026 at 12:26:36AM +0100, Stefano Brivio wrote: > On Thu, 8 Jan 2026 13:29:41 +1100 > David Gibson wrote: > > > Previously we created inbound listening sockets as we parsed the forwarding > > options (-t, -u) whereas outbound listening sockets were created during > > {tcp,udp}_init(). Now that we have a data structure recording the full > > details of the listening options we can move all listening socket creation > > to {tcp,udp}_init(). This means that errors for either direction are > > detected and reported the same way. > > > > Introduce fwd_listen_sync() which synchronizes the state of listening > > sockets to the forwarding rules table, both for fixed and automatic > > forwards. > > > > This does cause a change in semantics for "exclude only" port > > specifications. Previously an option like -t ~6000 wouldn't cause a > > fatal error, as long as we could bind at least one port. Now, it > > requires at least one port for each generated rule; that is for each > > of the contiguous blocks of ports the specification resolves to. With > > typical ephemeral ports settings that's one port each in 1..5999, > > 6001..32767 and 61000..65535. > > > > Preserving the exact behaviour for this case would require a considerably > > more complex data structure, so I'm hoping this is a sufficiently niche > > case for the change to be acceptable. > > I guess so too, I wouldn't really worry. > > Well, I'm not sure if it works, but one relatively simple idea could be > to have a "with_prev" bit in the rule struct representing the fact that > the current rule was derived from the same port specification as the > previous rule, which implies they would need to be deleted all together > (but we can happily enforce that). > > Then, in the fwd_listen_sync_() loop, before reporting failure, you > would check the next entry: if the "with_prev" bit is set, report > failure only if we fail (keeping a local boolean flag) for all the > entries up to the first one with "with_prev" unset. I'll keep that approach in mind if it seems like we need it. > I would be inclined to say it's worth it if it's that simple, but I > haven't tried, so I might be very well missing something. I also considered making WEAK mean we'd always continue on listen failures, even if all of them fail. Maybe that's a bit unexpected? But it would allow an option to "forward single port X, if you can" which seems like it might be useful. > > > > > Signed-off-by: David Gibson > > --- > > conf.c | 27 ---------- > > fwd.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > > fwd.h | 3 ++ > > ip.c | 1 - > > tcp.c | 122 ++--------------------------------------- > > tcp.h | 1 - > > udp.c | 99 +++------------------------------- > > udp.h | 1 - > > 8 files changed, 177 insertions(+), 244 deletions(-) > > > > diff --git a/conf.c b/conf.c > > index 0bcf80d7..57693b3f 100644 > > --- a/conf.c > > +++ b/conf.c > > @@ -148,9 +148,7 @@ static void conf_ports_range_except(const struct ctx *c, char optname, > > uint8_t flags) > > { > > unsigned delta = to - first; > > - bool bound_one = false; > > unsigned base, i; > > - int fd; > > > > if (first == 0) { > > die("Can't forward port 0 for option '-%c %s'", > > @@ -179,28 +177,6 @@ static void conf_ports_range_except(const struct ctx *c, char optname, > > warn( > > "Altering mapping of already mapped port number: %s", optarg); > > } > > - > > - if (!(flags & FWD_SCAN) && optname == 't') > > - fd = tcp_listen(c, PIF_HOST, addr, ifname, i); > > - else if (!(flags & FWD_SCAN) && optname == 'u') > > - fd = udp_listen(c, PIF_HOST, addr, ifname, i); > > - else > > - /* No way to check in advance for -T and -U */ > > - fd = 0; > > - > > - if (fd == -ENFILE || fd == -EMFILE) { > > - die( > > -"Can't open enough sockets for port specifier: %s", > > - optarg); > > - } > > - > > - if (fd >= 0) { > > - bound_one = true; > > - } else if (!(flags & FWD_WEAK)) { > > - die( > > -"Failed to bind port %u (%s) for option '-%c %s'", > > - i, strerror_(-fd), optname, optarg); > > - } > > } > > > > if ((optname == 'T' || optname == 'U') && c->no_bindtodevice) { > > @@ -226,9 +202,6 @@ static void conf_ports_range_except(const struct ctx *c, char optname, > > } > > base = i - 1; > > } > > - > > - if (!bound_one) > > - die("Failed to bind any port for '-%c %s'", optname, optarg); > > } > > > > /** > > diff --git a/fwd.c b/fwd.c > > index f27a4220..70ef73a3 100644 > > --- a/fwd.c > > +++ b/fwd.c > > @@ -22,6 +22,7 @@ > > #include > > > > #include "util.h" > > +#include "epoll_ctl.h" > > #include "ip.h" > > #include "siphash.h" > > #include "inany.h" > > @@ -420,6 +421,160 @@ void fwd_rules_print(const struct fwd_ports *fwd) > > } > > } > > > > +/** fwd_sync_one() - Create or remove listening sockets for a forward entry > > + * @c: Execution context > > + * @rule: Forwarding rule > > + * @pif: Interface to create listening sockets for > > + * @proto: Protocol to listen for > > + * @scanmap: Bitmap of ports to listen for on FWD_SCAN entries > > + * > > + * Return: 0 on success, -1 on failure > > + */ > > +static int fwd_sync_one(const struct ctx *c, const struct fwd_rule *rule, > > + uint8_t pif, uint8_t proto, const uint8_t *scanmap) > > +{ > > + const union inany_addr *addr = fwd_rule_addr(rule); > > + const char *ifname = rule->ifname; > > + bool bound_one = false; > > + unsigned port; > > + > > + ASSERT(pif_is_socket(pif)); > > + > > + if (!*ifname) > > + ifname = NULL; > > + > > + for (port = rule->first; port <= rule->last; port++) { > > + int fd = rule->socks[port - rule->first]; > > + > > + if ((rule->flags & FWD_SCAN) && !bitmap_isset(scanmap, port)) { > > + /* We don't want to listen on this port */ > > + if (fd >= 0) { > > + /* We already are, so stop */ > > + epoll_del(c->epollfd, fd); > > + close(fd); > > + rule->socks[port - rule->first] = -1; > > + } > > + continue; > > + } > > + > > + if (fd >= 0) /* Already listening, nothing to do */ { > > + bound_one = true; > > + continue; > > + } > > + > > + if (proto == IPPROTO_TCP) > > + fd = tcp_listen(c, pif, addr, ifname, port); > > + else if (proto == IPPROTO_UDP) > > + fd = udp_listen(c, pif, addr, ifname, port); > > + else > > + ASSERT(0); > > + > > + if (fd < 0) { > > + char astr[INANY_ADDRSTRLEN] = ""; > > Should we perhaps make this "*" for consistency with fwd_rules_print()? Good idea, that simplifies things a bit too. This code predates my extension to inany_ntop(), and I forgot to rework it to take advantage. > > + > > + if (addr) > > + inany_ntop(addr, astr, sizeof(astr)); > > + > > + warn("Listen failed for %s %s port %s%s%s%s%u: %s", > > + pif_name(pif), ipproto_name(proto), > > + astr, ifname ? "%" : "", ifname ? ifname : "", > > + addr || ifname ? "/" : "", port, strerror_(-fd)); > > + > > + if (!(rule->flags & FWD_WEAK)) > > + return -1; > > + > > + continue; > > + } > > + > > + rule->socks[port - rule->first] = fd; > > + bound_one = true; > > + } > > + > > + if (!bound_one && !(rule->flags & FWD_SCAN)) { > > + char astr[INANY_ADDRSTRLEN] = ""; > > Same here. Done. > > + > > + if (addr) > > + inany_ntop(addr, astr, sizeof(astr)); > > + > > + warn("All listens failed for %s %s %s%s%s%s%u-%u", > > + pif_name(pif), ipproto_name(proto), > > + astr, ifname ? "%" : "", ifname ? ifname : "", > > + addr || ifname ? "/" : "", rule->first, rule->last); > > + return -1; > > + } > > + > > + return 0; > > +} > > + > > +/** struct fwd_listen_args - arguments for fwd_listen_init_() > > + * @c: Execution context > > + * @fwd: Forwarding information > > + * @scanmap: Bitmap of ports to auto-forward > > + * @pif: Interface to create listening sockets for > > + * @proto: Protocol > > + * @ret: Return code > > + */ > > +struct fwd_listen_args { > > + const struct ctx *c; > > + const struct fwd_ports *fwd; > > + const uint8_t *scanmap; > > + uint8_t pif; > > + uint8_t proto; > > + int ret; > > +}; > > + > > +/** fwd_listen_sync_() - Update listening sockets to match forwards > > + * @arg: struct fwd_listen_args with arguments > > + * > > + * Returns: zero > > + */ > > +static int fwd_listen_sync_(void *arg) > > +{ > > + struct fwd_listen_args *a = arg; > > + unsigned i; > > + > > + if (a->pif == PIF_SPLICE) > > + ns_enter(a->c); > > + > > + for (i = 0; i < a->fwd->count; i++) { > > + a->ret = fwd_sync_one(a->c, &a->fwd->rules[i], > > + a->pif, a->proto, a->fwd->map); > > + if (a->ret < 0) > > + break; > > + } > > + > > + return 0; > > +} > > + > > +/** fwd_listen_sync() - Update listening sockets to match forwards > > This has the same description as fwd_listen_sync_() and it might be > quite hard to understand the difference if one is not used to spot the > "void *arg" argument. What about: > > /** fwd_listen_sync() - Call fwd_listen_sync_() in the intended namespace > > ? Fair point, done. > > > + * @c: Execution context > > + * @fwd: Forwarding information > > + * @pif: Interface to create listening sockets for > > + * @proto: Protocol > > + * > > + * Return: 0 on success, -1 on failure > > + */ > > +int fwd_listen_sync(const struct ctx *c, const struct fwd_ports *fwd, > > + uint8_t pif, uint8_t proto) > > +{ > > + struct fwd_listen_args a = { > > + .c = c, .fwd = fwd, .pif = pif, .proto = proto, > > + }; > > + > > + if (pif == PIF_SPLICE) > > + NS_CALL(fwd_listen_sync_, &a); > > + else > > + fwd_listen_sync_(&a); > > + > > + if (a.ret < 0) { > > + err("Couldn't listen on requested %s ports", > > + ipproto_name(proto)); > > + return -1; > > + } > > + > > + return 0; > > +} > > + > > /* See enum in kernel's include/net/tcp_states.h */ > > #define UDP_LISTEN 0x07 > > #define TCP_LISTEN 0x0a > > @@ -578,10 +733,14 @@ void fwd_scan_ports_timer(struct ctx *c, const struct timespec *now) > > > > fwd_scan_ports(c); > > > > - if (!c->no_tcp) > > - tcp_port_rebind_all(c); > > - if (!c->no_udp) > > - udp_port_rebind_all(c); > > + if (!c->no_tcp) { > > + fwd_listen_sync(c, &c->tcp.fwd_in, PIF_HOST, IPPROTO_TCP); > > + fwd_listen_sync(c, &c->tcp.fwd_out, PIF_SPLICE, IPPROTO_TCP); > > + } > > + if (!c->no_udp) { > > + fwd_listen_sync(c, &c->udp.fwd_in, PIF_HOST, IPPROTO_UDP); > > + fwd_listen_sync(c, &c->udp.fwd_out, PIF_SPLICE, IPPROTO_UDP); > > + } > > } > > > > /** > > diff --git a/fwd.h b/fwd.h > > index 3ddcb91d..f84e7c01 100644 > > --- a/fwd.h > > +++ b/fwd.h > > @@ -108,6 +108,9 @@ void fwd_rules_print(const struct fwd_ports *fwd); > > void fwd_scan_ports_init(struct ctx *c); > > void fwd_scan_ports_timer(struct ctx * c, const struct timespec *now); > > > > +int fwd_listen_sync(const struct ctx *c, const struct fwd_ports *fwd, > > + uint8_t pif, uint8_t proto); > > + > > bool nat_inbound(const struct ctx *c, const union inany_addr *addr, > > union inany_addr *translated); > > uint8_t fwd_nat_from_tap(const struct ctx *c, uint8_t proto, > > diff --git a/ip.c b/ip.c > > index f1d224bd..fc26dab2 100644 > > --- a/ip.c > > +++ b/ip.c > > @@ -78,7 +78,6 @@ found: > > * /etc/protocols and might allocate, which isn't possible for us once > > * self-isolated. > > */ > > -/* cppcheck-suppress unusedFunction */ > > const char *ipproto_name(uint8_t proto) > > { > > switch (proto) { > > diff --git a/tcp.c b/tcp.c > > index 57faed4b..976f0ab7 100644 > > --- a/tcp.c > > +++ b/tcp.c > > @@ -2732,50 +2732,6 @@ int tcp_listen(const struct ctx *c, uint8_t pif, > > return s; > > } > > > > -/** > > - * tcp_ns_listen() - Init socket to listen for spliced outbound connections > > - * @c: Execution context > > - * @port: Port, host order > > - */ > > -static void tcp_ns_listen(const struct ctx *c, in_port_t port) > > -{ > > - ASSERT(!c->no_tcp); > > - > > - if (!c->no_bindtodevice) { > > - tcp_listen(c, PIF_SPLICE, NULL, "lo", port); > > - return; > > - } > > - > > - if (c->ifi4) > > - tcp_listen(c, PIF_SPLICE, &inany_loopback4, NULL, port); > > - if (c->ifi6) > > - tcp_listen(c, PIF_SPLICE, &inany_loopback6, NULL, port); > > -} > > - > > -/** > > - * tcp_ns_socks_init() - Bind sockets in namespace for outbound connections > > - * @arg: Execution context > > - * > > - * Return: 0 > > - */ > > -/* cppcheck-suppress [constParameterCallback, unmatchedSuppression] */ > > -static int tcp_ns_socks_init(void *arg) > > -{ > > - const struct ctx *c = (const struct ctx *)arg; > > - unsigned port; > > - > > - ns_enter(c); > > - > > - for (port = 0; port < NUM_PORTS; port++) { > > - if (!bitmap_isset(c->tcp.fwd_out.map, port)) > > - continue; > > - > > - tcp_ns_listen(c, port); > > - } > > - > > - return 0; > > -} > > - > > /** > > * tcp_sock_refill_pool() - Refill one pool of pre-opened sockets > > * @pool: Pool of sockets to refill > > @@ -2919,10 +2875,13 @@ int tcp_init(struct ctx *c) > > > > tcp_sock_refill_init(c); > > > > + if (fwd_listen_sync(c, &c->tcp.fwd_in, PIF_HOST, IPPROTO_TCP) < 0) > > + return -1; > > This needs an update to the function comment (which currently says > "Return: 0, doesn't return on failure"). > > * Return: 0, doesn't return on failure Fixed. > > if (c->mode == MODE_PASTA) { > > tcp_splice_init(c); > > - > > - NS_CALL(tcp_ns_socks_init, c); > > + if (fwd_listen_sync(c, &c->tcp.fwd_out, > > + PIF_SPLICE, IPPROTO_TCP) < 0) > > + return -1; > > } > > > > peek_offset_cap = (!c->ifi4 || tcp_probe_peek_offset_cap(AF_INET)) && > > @@ -2941,77 +2900,6 @@ int tcp_init(struct ctx *c) > > return 0; > > } > > > > -/** > > - * tcp_port_rebind() - Rebind ports to match forward maps > > - * @c: Execution context > > - * @outbound: True to remap outbound forwards, otherwise inbound > > - * > > - * Must be called in namespace context if @outbound is true. > > - */ > > -static void tcp_port_rebind(struct ctx *c, bool outbound) > > -{ > > - const uint8_t *fmap = outbound ? c->tcp.fwd_out.map : c->tcp.fwd_in.map; > > - int (*socks)[IP_VERSIONS] = outbound ? tcp_sock_ns : tcp_sock_init_ext; > > - unsigned port; > > - > > - for (port = 0; port < NUM_PORTS; port++) { > > - if (!bitmap_isset(fmap, port)) { > > - if (socks[port][V4] >= 0) { > > - close(socks[port][V4]); > > - socks[port][V4] = -1; > > - } > > - > > - if (socks[port][V6] >= 0) { > > - close(socks[port][V6]); > > - socks[port][V6] = -1; > > - } > > - > > - continue; > > - } > > - > > - if ((c->ifi4 && socks[port][V4] == -1) || > > - (c->ifi6 && socks[port][V6] == -1)) { > > - if (outbound) > > - tcp_ns_listen(c, port); > > - else > > - tcp_listen(c, PIF_HOST, NULL, NULL, port); > > - } > > - } > > -} > > - > > -/** > > - * tcp_port_rebind_outbound() - Rebind ports in namespace > > - * @arg: Execution context > > - * > > - * Called with NS_CALL() > > - * > > - * Return: 0 > > - */ > > -static int tcp_port_rebind_outbound(void *arg) > > -{ > > - struct ctx *c = (struct ctx *)arg; > > - > > - ns_enter(c); > > - tcp_port_rebind(c, true); > > - > > - return 0; > > -} > > - > > -/** > > - * tcp_port_rebind_all() - Rebind ports to match forward maps (in host & ns) > > - * @c: Execution context > > - */ > > -void tcp_port_rebind_all(struct ctx *c) > > -{ > > - ASSERT(c->mode == MODE_PASTA && !c->no_tcp); > > - > > - if (c->tcp.fwd_out.mode == FWD_AUTO) > > - NS_CALL(tcp_port_rebind_outbound, c); > > - > > - if (c->tcp.fwd_in.mode == FWD_AUTO) > > - tcp_port_rebind(c, false); > > -} > > - > > /** > > * tcp_timer() - Periodic tasks: port detection, closed connections, pool refill > > * @c: Execution context > > diff --git a/tcp.h b/tcp.h > > index ef1e3544..45f97d93 100644 > > --- a/tcp.h > > +++ b/tcp.h > > @@ -22,7 +22,6 @@ int tcp_listen(const struct ctx *c, uint8_t pif, > > const union inany_addr *addr, const char *ifname, > > in_port_t port); > > int tcp_init(struct ctx *c); > > -void tcp_port_rebind_all(struct ctx *c); > > void tcp_timer(const struct ctx *c, const struct timespec *now); > > void tcp_defer_handler(struct ctx *c); > > > > diff --git a/udp.c b/udp.c > > index d7dcb1d2..7c5546df 100644 > > --- a/udp.c > > +++ b/udp.c > > @@ -1203,98 +1203,6 @@ static void udp_splice_iov_init(void) > > } > > } > > > > -/** > > - * udp_ns_listen() - Init socket to listen for spliced outbound connections > > - * @c: Execution context > > - * @port: Port, host order > > - */ > > -static void udp_ns_listen(const struct ctx *c, in_port_t port) > > -{ > > - ASSERT(!c->no_udp); > > - > > - if (!c->no_bindtodevice) { > > - udp_listen(c, PIF_SPLICE, NULL, "lo", port); > > - return; > > - } > > - > > - if (c->ifi4) > > - udp_listen(c, PIF_SPLICE, &inany_loopback4, NULL, port); > > - if (c->ifi6) > > - udp_listen(c, PIF_SPLICE, &inany_loopback6, NULL, port); > > -} > > - > > -/** > > - * udp_port_rebind() - Rebind ports to match forward maps > > - * @c: Execution context > > - * @outbound: True to remap outbound forwards, otherwise inbound > > - * > > - * Must be called in namespace context if @outbound is true. > > - */ > > -static void udp_port_rebind(struct ctx *c, bool outbound) > > -{ > > - int (*socks)[NUM_PORTS] = outbound ? udp_splice_ns : udp_splice_init; > > - const uint8_t *fmap > > - = outbound ? c->udp.fwd_out.map : c->udp.fwd_in.map; > > - unsigned port; > > - > > - for (port = 0; port < NUM_PORTS; port++) { > > - if (!bitmap_isset(fmap, port)) { > > - if (socks[V4][port] >= 0) { > > - close(socks[V4][port]); > > - socks[V4][port] = -1; > > - } > > - > > - if (socks[V6][port] >= 0) { > > - close(socks[V6][port]); > > - socks[V6][port] = -1; > > - } > > - > > - continue; > > - } > > - > > - if ((c->ifi4 && socks[V4][port] == -1) || > > - (c->ifi6 && socks[V6][port] == -1)) { > > - if (outbound) > > - udp_ns_listen(c, port); > > - else > > - udp_listen(c, PIF_HOST, NULL, NULL, port); > > - } > > - } > > -} > > - > > -/** > > - * udp_port_rebind_outbound() - Rebind ports in namespace > > - * @arg: Execution context > > - * > > - * Called with NS_CALL() > > - * > > - * Return: 0 > > - */ > > -static int udp_port_rebind_outbound(void *arg) > > -{ > > - struct ctx *c = (struct ctx *)arg; > > - > > - ns_enter(c); > > - udp_port_rebind(c, true); > > - > > - return 0; > > -} > > - > > -/** > > - * udp_port_rebind_all() - Rebind ports to match forward maps (in host & ns) > > - * @c: Execution context > > - */ > > -void udp_port_rebind_all(struct ctx *c) > > -{ > > - ASSERT(c->mode == MODE_PASTA && !c->no_udp); > > - > > - if (c->udp.fwd_out.mode == FWD_AUTO) > > - NS_CALL(udp_port_rebind_outbound, c); > > - > > - if (c->udp.fwd_in.mode == FWD_AUTO) > > - udp_port_rebind(c, false); > > -} > > - > > /** > > * udp_init() - Initialise per-socket data, and sockets in namespace > > * @c: Execution context > > @@ -1307,9 +1215,14 @@ int udp_init(struct ctx *c) > > > > udp_iov_init(c); > > > > + if (fwd_listen_sync(c, &c->udp.fwd_in, PIF_HOST, IPPROTO_UDP) < 0) > > + return -1; > > Same here, update to the function comment needed. Fixed. > > > + > > if (c->mode == MODE_PASTA) { > > udp_splice_iov_init(); > > - NS_CALL(udp_port_rebind_outbound, c); > > + if (fwd_listen_sync(c, &c->udp.fwd_out, > > + PIF_SPLICE, IPPROTO_UDP) < 0) > > + return -1; > > } > > > > return 0; > > diff --git a/udp.h b/udp.h > > index 94c698e2..73efe036 100644 > > --- a/udp.h > > +++ b/udp.h > > @@ -19,7 +19,6 @@ int udp_listen(const struct ctx *c, uint8_t pif, > > const union inany_addr *addr, const char *ifname, > > in_port_t port); > > int udp_init(struct ctx *c); > > -void udp_port_rebind_all(struct ctx *c); > > void udp_update_l2_buf(const unsigned char *eth_d); > > > > /** > > -- > Stefano > -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson