public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v4 14/14] flow, fwd: Optimise forwarding rule lookup using epoll ref when possible
Date: Thu, 15 Jan 2026 19:50:45 +1100	[thread overview]
Message-ID: <20260115085045.3309818-15-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20260115085045.3309818-1-david@gibson.dropbear.id.au>

Now that listening sockets include a reference to the forwarding rule
which created them we can, in many cases, avoid a linear search of the
forwarding table when we want to find the relevant rule.  Instead we
can take the rule index from the socket's epoll reference, and use
that to immediately find the correct rule.

This is conceptually simple, but requires a moderate amount of
plumbing to get the index from the reference through to the rule
lookup.  We still allow fall back to linear search if we don't have
the index, and this may (rarely) be used in the udp_flush_flow() case,
where we could get packets for one flow on a different flow's socket,
rather than through a listening socket as usual.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 flow.c         |  7 ++++---
 flow_table.h   |  2 +-
 fwd.c          | 19 ++++++++++++++++++-
 fwd.h          |  4 +++-
 icmp.c         |  2 +-
 tcp.c          |  4 ++--
 udp.c          | 14 +++++++++-----
 udp_flow.c     | 14 ++++++++------
 udp_flow.h     |  2 +-
 udp_internal.h |  4 ++--
 10 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/flow.c b/flow.c
index 99e4f7f6..3d2c211b 100644
--- a/flow.c
+++ b/flow.c
@@ -519,12 +519,13 @@ struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif,
  * flow_target() - Determine where flow should forward to, and move to TGT
  * @c:		Execution context
  * @flow:	Flow to forward
+ * @rule_hint:	Index of relevant forwarding rule, or -1 if unknown
  * @proto:	Protocol
  *
  * Return: pointer to the target flowside information
  */
 struct flowside *flow_target(const struct ctx *c, union flow *flow,
-			     uint8_t proto)
+			     int rule_hint, uint8_t proto)
 {
 	char estr[INANY_ADDRSTRLEN], ostr[INANY_ADDRSTRLEN];
 	struct flow_common *f = &flow->f;
@@ -553,7 +554,7 @@ struct flowside *flow_target(const struct ctx *c, union flow *flow,
 		else
 			goto nofwd;
 
-		if (!(rule = fwd_rule_search(fwd, ini)))
+		if (!(rule = fwd_rule_search(fwd, ini, rule_hint)))
 			goto norule;
 
 		tgtpif = fwd_nat_from_splice(rule, proto, ini, tgt);
@@ -567,7 +568,7 @@ struct flowside *flow_target(const struct ctx *c, union flow *flow,
 		else
 			goto nofwd;
 
-		if (!(rule = fwd_rule_search(fwd, ini)))
+		if (!(rule = fwd_rule_search(fwd, ini, rule_hint)))
 			goto norule;
 
 		tgtpif = fwd_nat_from_host(c, rule, proto, ini, tgt);
diff --git a/flow_table.h b/flow_table.h
index 5ee13acc..73de13ba 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -207,7 +207,7 @@ const struct flowside *flow_target_af(union flow *flow, uint8_t pif,
 				      const void *saddr, in_port_t sport,
 				      const void *daddr, in_port_t dport);
 struct flowside *flow_target(const struct ctx *c, union flow *flow,
-			     uint8_t proto);
+			     int rule_hint, uint8_t proto);
 
 union flow *flow_set_type(union flow *flow, enum flow_type type);
 #define FLOW_SET_TYPE(flow_, t_, var_)	(&flow_set_type((flow_), (t_))->var_)
diff --git a/fwd.c b/fwd.c
index 7ca2d1f2..fca49ae1 100644
--- a/fwd.c
+++ b/fwd.c
@@ -430,14 +430,31 @@ static bool fwd_rule_match(const struct fwd_rule *rule,
  * fwd_rule_search() - Find a rule which matches a prospective flow
  * @fwd:	Forwarding table
  * @ini:	Initiating side flow information
+ * @hint:	Index of the rule in table, if known, otherwise FWD_NO_HINT
  *
  * Returns: first matching rule, or NULL if there is none
  */
 const struct fwd_rule *fwd_rule_search(const struct fwd_ports *fwd,
-				       const struct flowside *ini)
+				       const struct flowside *ini,
+				       int hint)
 {
 	unsigned i;
 
+	if (hint >= 0) {
+		char ostr[INANY_ADDRSTRLEN], rstr[INANY_ADDRSTRLEN];
+		const struct fwd_rule *rule = &fwd->rules[hint];
+
+		ASSERT((unsigned)hint < fwd->count);
+		if (fwd_rule_match(rule, ini))
+			return rule;
+
+		debug("Incorrect rule hint: %s:%hu does not match %s:%hu-%hu",
+		      inany_ntop(&ini->oaddr, ostr, sizeof(ostr)), ini->oport,
+		      inany_ntop(fwd_rule_addr(rule), rstr, sizeof(rstr)),
+		      rule->first, rule->last);
+		return NULL;
+	}
+
 	for (i = 0; i < fwd->count; i++) {
 		if (fwd_rule_match(&fwd->rules[i], ini))
 			return &fwd->rules[i];
diff --git a/fwd.h b/fwd.h
index 5d914ec4..a5dc89db 100644
--- a/fwd.h
+++ b/fwd.h
@@ -46,6 +46,7 @@ struct fwd_rule {
 
 #define FWD_RULE_BITS	8
 #define MAX_FWD_RULES	MAX_FROM_BITS(FWD_RULE_BITS)
+#define FWD_NO_HINT	(-1)
 
 /**
  * union fwd_listen_ref - information about a single listening socket
@@ -108,7 +109,8 @@ void fwd_rule_add(struct fwd_ports *fwd, uint8_t flags,
 		  const union inany_addr *addr, const char *ifname,
 		  in_port_t first, in_port_t last, in_port_t to);
 const struct fwd_rule *fwd_rule_search(const struct fwd_ports *fwd,
-				       const struct flowside *ini);
+				       const struct flowside *ini,
+				       int hint);
 void fwd_rules_print(const struct fwd_ports *fwd);
 
 void fwd_scan_ports_init(struct ctx *c);
diff --git a/icmp.c b/icmp.c
index eb7f11be..328fd6eb 100644
--- a/icmp.c
+++ b/icmp.c
@@ -182,7 +182,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
 		return NULL;
 
 	flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, id);
-	if (!(tgt = flow_target(c, flow, proto)))
+	if (!(tgt = flow_target(c, flow, FWD_NO_HINT, proto)))
 		goto cancel;
 
 	if (flow->f.pif[TGTSIDE] != PIF_HOST) {
diff --git a/tcp.c b/tcp.c
index f49d953a..45dde5a0 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1656,7 +1656,7 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
 	ini = flow_initiate_af(flow, PIF_TAP,
 			       af, saddr, srcport, daddr, dstport);
 
-	if (!(tgt = flow_target(c, flow, IPPROTO_TCP)))
+	if (!(tgt = flow_target(c, flow, FWD_NO_HINT, IPPROTO_TCP)))
 		goto cancel;
 
 	if (flow->f.pif[TGTSIDE] != PIF_HOST) {
@@ -2495,7 +2495,7 @@ void tcp_listen_handler(const struct ctx *c, union epoll_ref ref,
 		goto cancel;
 	}
 
-	if (!flow_target(c, flow, IPPROTO_TCP))
+	if (!flow_target(c, flow, ref.listen.rule, IPPROTO_TCP))
 		goto cancel;
 
 	switch (flow->f.pif[TGTSIDE]) {
diff --git a/udp.c b/udp.c
index 7cb10a22..b2383e20 100644
--- a/udp.c
+++ b/udp.c
@@ -838,12 +838,13 @@ static void udp_buf_sock_to_tap(const struct ctx *c, int s, int n,
  * udp_sock_fwd() - Forward datagrams from a possibly unconnected socket
  * @c:		Execution context
  * @s:		Socket to forward from
+ * @rule_hint:	Forwarding rule to use, or -1 if unknown
  * @frompif:	Interface to which @s belongs
  * @port:	Our (local) port number of @s
  * @now:	Current timestamp
  */
-void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif,
-		  in_port_t port, const struct timespec *now)
+void udp_sock_fwd(const struct ctx *c, int s, int rule_hint,
+		  uint8_t frompif, in_port_t port, const struct timespec *now)
 {
 	union sockaddr_inany src;
 	union inany_addr dst;
@@ -868,7 +869,8 @@ void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif,
 			continue;
 		}
 
-		tosidx = udp_flow_from_sock(c, frompif, &dst, port, &src, now);
+		tosidx = udp_flow_from_sock(c, frompif, &dst, port, &src,
+					    rule_hint, now);
 		topif = pif_at_sidx(tosidx);
 
 		if (pif_is_socket(topif)) {
@@ -910,8 +912,10 @@ void udp_listen_sock_handler(const struct ctx *c,
 			     union epoll_ref ref, uint32_t events,
 			     const struct timespec *now)
 {
-	if (events & (EPOLLERR | EPOLLIN))
-		udp_sock_fwd(c, ref.fd, ref.listen.pif, ref.listen.port, now);
+	if (events & (EPOLLERR | EPOLLIN)) {
+		udp_sock_fwd(c, ref.fd, ref.listen.rule,
+			     ref.listen.pif, ref.listen.port, now);
+	}
 }
 
 /**
diff --git a/udp_flow.c b/udp_flow.c
index 80b15433..bcf31e3c 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -131,6 +131,7 @@ static int udp_flow_sock(const struct ctx *c,
  * udp_flow_new() - Common setup for a new UDP flow
  * @c:		Execution context
  * @flow:	Initiated flow
+ * @rule_hint:	Index of forwarding rule, or -1 if unknown
  * @now:	Timestamp
  *
  * Return: sidx for the target side of the new UDP flow, or FLOW_SIDX_NONE
@@ -139,13 +140,13 @@ static int udp_flow_sock(const struct ctx *c,
  * #syscalls getsockname
  */
 static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
-				const struct timespec *now)
+				int rule_hint, const struct timespec *now)
 {
 	struct udp_flow *uflow = NULL;
 	const struct flowside *tgt;
 	unsigned sidei;
 
-	if (!(tgt = flow_target(c, flow, IPPROTO_UDP)))
+	if (!(tgt = flow_target(c, flow, rule_hint, IPPROTO_UDP)))
 		goto cancel;
 
 	uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp);
@@ -208,6 +209,7 @@ cancel:
  * @dst:	Our (local) address to which the datagram is arriving
  * @port:	Our (local) port number to which the datagram is arriving
  * @s_in:	Source socket address, filled in by recvmmsg()
+ * @rule_hint:	Index of forwarding rule, or -1 if unknown
  * @now:	Timestamp
  *
  * #syscalls fcntl arm:fcntl64 ppc64:fcntl64|fcntl i686:fcntl64
@@ -218,7 +220,7 @@ cancel:
 flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif,
 			       const union inany_addr *dst, in_port_t port,
 			       const union sockaddr_inany *s_in,
-			       const struct timespec *now)
+			       int rule_hint, const struct timespec *now)
 {
 	const struct flowside *ini;
 	struct udp_flow *uflow;
@@ -252,7 +254,7 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif,
 		return FLOW_SIDX_NONE;
 	}
 
-	return udp_flow_new(c, flow, now);
+	return udp_flow_new(c, flow, rule_hint, now);
 }
 
 /**
@@ -308,7 +310,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
 		return FLOW_SIDX_NONE;
 	}
 
-	return udp_flow_new(c, flow, now);
+	return udp_flow_new(c, flow, FWD_NO_HINT, now);
 }
 
 /**
@@ -324,7 +326,7 @@ static void udp_flush_flow(const struct ctx *c,
 {
 	/* We don't know exactly where the datagrams will come from, but we know
 	 * they'll have an interface and oport matching this flow */
-	udp_sock_fwd(c, uflow->s[sidei], uflow->f.pif[sidei],
+	udp_sock_fwd(c, uflow->s[sidei], -1, uflow->f.pif[sidei],
 		     uflow->f.side[sidei].oport, now);
 }
 
diff --git a/udp_flow.h b/udp_flow.h
index 4c528e95..14e0f920 100644
--- a/udp_flow.h
+++ b/udp_flow.h
@@ -35,7 +35,7 @@ struct udp_flow *udp_at_sidx(flow_sidx_t sidx);
 flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif,
 			       const union inany_addr *dst, in_port_t port,
 			       const union sockaddr_inany *s_in,
-			       const struct timespec *now);
+			       int rule_hint, const struct timespec *now);
 flow_sidx_t udp_flow_from_tap(const struct ctx *c,
 			      uint8_t pif, sa_family_t af,
 			      const void *saddr, const void *daddr,
diff --git a/udp_internal.h b/udp_internal.h
index f7d09bb6..0a8fe490 100644
--- a/udp_internal.h
+++ b/udp_internal.h
@@ -28,7 +28,7 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
 size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
 		       const struct flowside *toside, size_t dlen,
 		       bool no_udp_csum);
-void udp_sock_fwd(const struct ctx *c, int s, uint8_t frompif,
-		  in_port_t port, const struct timespec *now);
+void udp_sock_fwd(const struct ctx *c, int s, int rule_hint,
+		  uint8_t frompif, in_port_t port, const struct timespec *now);
 
 #endif /* UDP_INTERNAL_H */
-- 
2.52.0


      parent reply	other threads:[~2026-01-15  8:50 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-15  8:50 [PATCH v4 00/14] Introduce forwarding table David Gibson
2026-01-15  8:50 ` [PATCH v4 01/14] inany: Extend inany_ntop() to treat NULL as a fully unspecified address David Gibson
2026-01-15  8:50 ` [PATCH v4 02/14] conf, fwd: Keep a table of our port forwarding configuration David Gibson
2026-01-15 23:01   ` Stefano Brivio
2026-01-16  0:20     ` David Gibson
2026-01-16  0:25       ` Stefano Brivio
2026-01-16  0:48         ` David Gibson
2026-01-15  8:50 ` [PATCH v4 03/14] conf: Accurately record ifname and address for outbound forwards David Gibson
2026-01-15  8:50 ` [PATCH v4 04/14] conf, fwd: Record "auto" port forwards in forwarding table David Gibson
2026-01-15  8:50 ` [PATCH v4 05/14] fwd: Make space to store listening sockets in forward table David Gibson
2026-01-15  8:50 ` [PATCH v4 06/14] ip: Add ipproto_name() function David Gibson
2026-01-15  8:50 ` [PATCH v4 07/14] fwd, tcp, udp: Set up listening sockets based on forward table David Gibson
2026-01-15  8:50 ` [PATCH v4 08/14] tcp, udp: Remove old auto-forwarding socket arrays David Gibson
2026-01-15  8:50 ` [PATCH v4 09/14] conf, fwd: Check forwarding table for conflicting rules David Gibson
2026-01-15  8:50 ` [PATCH v4 10/14] fwd: Generate auto-forward exclusions from socket fd tables David Gibson
2026-01-15  8:50 ` [PATCH v4 11/14] flow, fwd: Consult rules table when forwarding a new flow from socket David Gibson
2026-01-15  8:50 ` [PATCH v4 12/14] fwd: Remap ports based directly on forwarding rule David Gibson
2026-01-15  8:50 ` [PATCH v4 13/14] fwd, tcp, udp: Add forwarding rule to listening socket epoll references David Gibson
2026-01-15  8:50 ` David Gibson [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260115085045.3309818-15-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).