On Tue, Jan 13, 2026 at 11:12:35PM +0100, Stefano Brivio wrote: > On Thu, 8 Jan 2026 13:29:47 +1100 > David Gibson wrote: > > > Now that we have a table of all our forwarding rules, every listening > > socket can be associated with a specific rule. Add an index allowing us to > > locate that rule from the socket's epoll reference. We don't use it yet, > > but we'll use it to optimise rule lookup when forwarding new flows. > > > > Signed-off-by: David Gibson > > --- > > fwd.c | 15 ++++++++++----- > > fwd.h | 5 +++++ > > tcp.c | 4 +++- > > tcp.h | 5 ++--- > > udp.c | 4 +++- > > udp.h | 5 ++--- > > 6 files changed, 25 insertions(+), 13 deletions(-) > > > > diff --git a/fwd.c b/fwd.c > > index 7c4575ff..6727d26f 100644 > > --- a/fwd.c > > +++ b/fwd.c > > @@ -474,6 +474,7 @@ void fwd_rules_print(const struct fwd_ports *fwd) > > > > /** fwd_sync_one() - Create or remove listening sockets for a forward entry > > * @c: Execution context > > + * @fwd: Forwarding table > > * @rule: Forwarding rule > > * @pif: Interface to create listening sockets for > > * @proto: Protocol to listen for > > @@ -481,19 +482,23 @@ void fwd_rules_print(const struct fwd_ports *fwd) > > * > > * Return: 0 on success, -1 on failure > > */ > > -static int fwd_sync_one(const struct ctx *c, const struct fwd_rule *rule, > > +static int fwd_sync_one(const struct ctx *c, > > + const struct fwd_ports *fwd, const struct fwd_rule *rule, > > uint8_t pif, uint8_t proto, const uint8_t *scanmap) > > { > > const union inany_addr *addr = fwd_rule_addr(rule); > > const char *ifname = rule->ifname; > > bool bound_one = false; > > - unsigned port; > > + unsigned port, idx; > > > > ASSERT(pif_is_socket(pif)); > > > > if (!*ifname) > > ifname = NULL; > > > > + idx = rule - fwd->rules; > > + ASSERT(idx < MAX_FWD_RULES); > > + > > for (port = rule->first; port <= rule->last; port++) { > > int fd = rule->socks[port - rule->first]; > > > > @@ -514,9 +519,9 @@ static int fwd_sync_one(const struct ctx *c, const struct fwd_rule *rule, > > } > > > > if (proto == IPPROTO_TCP) > > - fd = tcp_listen(c, pif, addr, ifname, port); > > + fd = tcp_listen(c, pif, idx, addr, ifname, port); > > else if (proto == IPPROTO_UDP) > > - fd = udp_listen(c, pif, addr, ifname, port); > > + fd = udp_listen(c, pif, idx, addr, ifname, port); > > else > > ASSERT(0); > > > > @@ -588,7 +593,7 @@ static int fwd_listen_sync_(void *arg) > > ns_enter(a->c); > > > > for (i = 0; i < a->fwd->count; i++) { > > - a->ret = fwd_sync_one(a->c, &a->fwd->rules[i], > > + a->ret = fwd_sync_one(a->c, a->fwd, &a->fwd->rules[i], > > a->pif, a->proto, a->fwd->map); > > if (a->ret < 0) > > break; > > diff --git a/fwd.h b/fwd.h > > index cfe9ed46..435f422a 100644 > > --- a/fwd.h > > +++ b/fwd.h > > @@ -48,14 +48,19 @@ struct fwd_rule { > > * union fwd_listen_ref - information about a single listening socket > > * @port: Bound port number of the socket > > * @pif: pif in which the socket is listening > > + * @rule: Index of forwarding rule > > */ > > union fwd_listen_ref { > > struct { > > in_port_t port; > > uint8_t pif; > > +#define FWD_RULE_BITS 8 > > + unsigned rule :FWD_RULE_BITS; > > }; > > uint32_t u32; > > }; > > +static_assert(sizeof(union fwd_listen_ref) == sizeof(uint32_t)); > > Why do we need this, specifically? It goes into the data field of the epoll_ref so it has to be exactly 32-bits. With the bitfields, it's maybe not instantly obvious that the structure isn't larger than that. In particular, this relies on the compiler not inserting padding between @pif and @rule; since alignof(unsigned) == 4, typically, I was concerned it might. Even if that is guaranteed by the C standard, I think it's nicer not to require the reader to know that. > > +static_assert(MAX_FWD_RULES <= (1U << FWD_RULE_BITS)); > > I start wondering if instead of having a 'rule' field supporting 256 > rules, with 128 as maximum number of rules, we could just have 256 as > maximum number of rules and use the usual MAX_FROM_BITS() macro to keep > things simpler. Good idea, done. Btw, as a later change, I'm considering merging the four forwarding tables into one. If that's done we don't need @pif in the epoll_ref any more (it will be in the rule), and we'll have 16-bits of space if we need to expand the rule table > After all, it's not really rules[] taking space: Certainly. > struct fwd_ports { > enum fwd_ports_mode mode; /* 0 4 */ > int scan4; /* 4 4 */ > int scan6; /* 8 4 */ > unsigned int count; /* 12 4 */ > struct fwd_rule rules[128]; /* 16 7168 */ > /* --- cacheline 112 boundary (7168 bytes) was 16 bytes ago --- */ > uint8_t map[8192]; /* 7184 8192 */ > /* --- cacheline 240 boundary (15360 bytes) was 16 bytes ago --- */ > unsigned int listen_sock_count; /* 15376 4 */ > int listen_socks[196608]; /* 15380 786432 */ > > /* size: 801816, cachelines: 12529, members: 8 */ > /* padding: 4 */ > /* last cacheline: 24 bytes */ > }; > > > enum fwd_ports_mode { > > FWD_UNSET = 0, > > diff --git a/tcp.c b/tcp.c > > index e9b440da..fc03e38f 100644 > > --- a/tcp.c > > +++ b/tcp.c > > @@ -2672,18 +2672,20 @@ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref, > > * tcp_listen() - Create listening socket > > * @c: Execution context > > * @pif: Interface to open the socket for (PIF_HOST or PIF_SPLICE) > > + * @rule: Index of relevant forwarding rule > > * @addr: Pointer to address for binding, NULL for any > > * @ifname: Name of interface to bind to, NULL for any > > * @port: Port, host order > > * > > * Return: Socket fd on success, negative error code on failure > > */ > > -int tcp_listen(const struct ctx *c, uint8_t pif, > > +int tcp_listen(const struct ctx *c, uint8_t pif, unsigned rule, > > const union inany_addr *addr, const char *ifname, in_port_t port) > > { > > union fwd_listen_ref ref = { > > .port = port, > > .pif = pif, > > + .rule = rule, > > }; > > int s; > > > > diff --git a/tcp.h b/tcp.h > > index 45f97d93..24b90870 100644 > > --- a/tcp.h > > +++ b/tcp.h > > @@ -18,9 +18,8 @@ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref, > > int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, > > const void *saddr, const void *daddr, uint32_t flow_lbl, > > const struct pool *p, int idx, const struct timespec *now); > > -int tcp_listen(const struct ctx *c, uint8_t pif, > > - const union inany_addr *addr, const char *ifname, > > - in_port_t port); > > +int tcp_listen(const struct ctx *c, uint8_t pif, unsigned rule, > > + const union inany_addr *addr, const char *ifname, in_port_t port); > > int tcp_init(struct ctx *c); > > void tcp_timer(const struct ctx *c, const struct timespec *now); > > void tcp_defer_handler(struct ctx *c); > > diff --git a/udp.c b/udp.c > > index 92a87198..761221f6 100644 > > --- a/udp.c > > +++ b/udp.c > > @@ -1115,18 +1115,20 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, > > * udp_listen() - Initialise listening socket for a given port > > * @c: Execution context > > * @pif: Interface to open the socket for (PIF_HOST or PIF_SPLICE) > > + * @rule: Index of relevant forwarding rule > > * @addr: Pointer to address for binding, NULL if not configured > > * @ifname: Name of interface to bind to, NULL if not configured > > * @port: Port, host order > > * > > * Return: Socket fd on success, negative error code on failure > > */ > > -int udp_listen(const struct ctx *c, uint8_t pif, > > +int udp_listen(const struct ctx *c, uint8_t pif, unsigned rule, > > const union inany_addr *addr, const char *ifname, in_port_t port) > > { > > union fwd_listen_ref ref = { > > .pif = pif, > > .port = port, > > + .rule = rule, > > }; > > int s; > > > > diff --git a/udp.h b/udp.h > > index 3c6f90a9..2b91d728 100644 > > --- a/udp.h > > +++ b/udp.h > > @@ -14,9 +14,8 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, > > sa_family_t af, const void *saddr, const void *daddr, > > uint8_t ttl, const struct pool *p, int idx, > > const struct timespec *now); > > -int udp_listen(const struct ctx *c, uint8_t pif, > > - const union inany_addr *addr, const char *ifname, > > - in_port_t port); > > +int udp_listen(const struct ctx *c, uint8_t pif, unsigned rule, > > + const union inany_addr *addr, const char *ifname, in_port_t port); > > int udp_init(struct ctx *c); > > void udp_update_l2_buf(const unsigned char *eth_d); > > > > -- > Stefano > -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson