public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Jon Maloy <jmaloy@redhat.com>
To: passt-dev@passt.top, sbrivio@redhat.com, lvivier@redhat.com,
	dgibson@redhat.com, jmaloy@redhat.com
Subject: [PATCH v2 1/2] Add ttl to outgoing flow structure lookups
Date: Thu, 27 Mar 2025 12:51:54 -0400	[thread overview]
Message-ID: <20250327165155.3354811-2-jmaloy@redhat.com> (raw)
In-Reply-To: <20250327165155.3354811-1-jmaloy@redhat.com>

As preparation for future enhancements we add ttl/hop limit as creation
and lookup criteria for outgoing flows. This comes in addition to the
regular 4-tuple which is currently used.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 flow.c       | 17 ++++++++++++-----
 flow.h       |  8 ++++++--
 flow_table.h |  3 ++-
 icmp.c       | 15 ++++++++++-----
 icmp.h       |  2 +-
 packet.h     |  2 ++
 tap.c        | 25 ++++++++++++++++++-------
 tcp.c        |  6 +++---
 udp.c        |  8 ++++++--
 udp.h        |  3 ++-
 udp_flow.c   |  7 ++++---
 udp_flow.h   |  2 +-
 12 files changed, 67 insertions(+), 31 deletions(-)

diff --git a/flow.c b/flow.c
index 8622242..6796f73 100644
--- a/flow.c
+++ b/flow.c
@@ -137,10 +137,12 @@ static struct timespec flow_timer_run;
  * @eport:	Endpoint port
  * @oaddr:	Our address (pointer to in_addr or in6_addr)
  * @oport:	Our port
+ * @ttl:	TTL/hop limit for packets in flow
  */
 static void flowside_from_af(struct flowside *side, sa_family_t af,
 			     const void *eaddr, in_port_t eport,
-			     const void *oaddr, in_port_t oport)
+			     const void *oaddr, in_port_t oport,
+			     uint8_t ttl)
 {
 	if (oaddr)
 		inany_from_af(&side->oaddr, af, oaddr);
@@ -153,6 +155,8 @@ static void flowside_from_af(struct flowside *side, sa_family_t af,
 	else
 		side->eaddr = inany_any6;
 	side->eport = eport;
+
+	side->ttl = ttl;
 }
 
 /**
@@ -376,17 +380,19 @@ static void flow_initiate_(union flow *flow, uint8_t pif)
  * @sport:	Endpoint port
  * @daddr:	Destination address (pointer to in_addr or in6_addr)
  * @dport:	Destination port
+ * @ttl:	TTL/hop_limit for packets in flow
  *
  * Return: pointer to the initiating flowside information
  */
 const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif,
 					sa_family_t af,
 					const void *saddr, in_port_t sport,
-					const void *daddr, in_port_t dport)
+					const void *daddr, in_port_t dport,
+					uint8_t ttl)
 {
 	struct flowside *ini = &flow->f.side[INISIDE];
 
-	flowside_from_af(ini, af, saddr, sport, daddr, dport);
+	flowside_from_af(ini, af, saddr, sport, daddr, dport, ttl);
 	flow_initiate_(flow, pif);
 	return ini;
 }
@@ -731,17 +737,18 @@ static flow_sidx_t flowside_lookup(const struct ctx *c, uint8_t proto,
  * @oaddr:	Our guest side address (guest remote address)
  * @eport:	Guest side endpoint port (guest local port)
  * @oport:	Our guest side port (guest remote port)
+ * @ttl:	TTL/hop_limit of flow we are looking for
  *
  * Return: sidx of the matching flow & side, FLOW_SIDX_NONE if not found
  */
 flow_sidx_t flow_lookup_af(const struct ctx *c,
 			   uint8_t proto, uint8_t pif, sa_family_t af,
 			   const void *eaddr, const void *oaddr,
-			   in_port_t eport, in_port_t oport)
+			   in_port_t eport, in_port_t oport, uint8_t ttl)
 {
 	struct flowside side;
 
-	flowside_from_af(&side, af, eaddr, eport, oaddr, oport);
+	flowside_from_af(&side, af, eaddr, eport, oaddr, oport, ttl);
 	return flowside_lookup(c, proto, pif, &side);
 }
 
diff --git a/flow.h b/flow.h
index dcf7645..2ba4a94 100644
--- a/flow.h
+++ b/flow.h
@@ -143,12 +143,14 @@ extern const uint8_t flow_proto[];
  * @oaddr:	Our address (local address from passt's PoV)
  * @eport:	Endpoint port
  * @oport:	Our port
+ * @ttl:	TTL/hop limit for this flow
  */
 struct flowside {
 	union inany_addr	oaddr;
 	union inany_addr	eaddr;
 	in_port_t		oport;
 	in_port_t		eport;
+	uint8_t			ttl;
 };
 
 /**
@@ -163,7 +165,8 @@ static inline bool flowside_eq(const struct flowside *left,
 	return inany_equals(&left->eaddr, &right->eaddr) &&
 	       left->eport == right->eport &&
 	       inany_equals(&left->oaddr, &right->oaddr) &&
-	       left->oport == right->oport;
+	       left->oport == right->oport &&
+	       left->ttl == right->ttl;
 }
 
 int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
@@ -241,7 +244,8 @@ void flow_hash_remove(const struct ctx *c, flow_sidx_t sidx);
 flow_sidx_t flow_lookup_af(const struct ctx *c,
 			   uint8_t proto, uint8_t pif, sa_family_t af,
 			   const void *eaddr, const void *oaddr,
-			   in_port_t eport, in_port_t oport);
+			   in_port_t eport, in_port_t oport,
+			   uint8_t ttl);
 flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif,
 			   const void *esa, in_port_t oport);
 
diff --git a/flow_table.h b/flow_table.h
index fd2c57b..0b5b431 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -196,7 +196,8 @@ void flow_alloc_cancel(union flow *flow);
 const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif,
 					sa_family_t af,
 					const void *saddr, in_port_t sport,
-					const void *daddr, in_port_t dport);
+					const void *daddr, in_port_t dport,
+					uint8_t ttl);
 struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif,
 				  const union sockaddr_inany *ssa,
 				  in_port_t dport);
diff --git a/icmp.c b/icmp.c
index 7e2b342..cbaa000 100644
--- a/icmp.c
+++ b/icmp.c
@@ -162,12 +162,14 @@ static void icmp_ping_close(const struct ctx *c,
  * @id:		ICMP id for the new socket
  * @saddr:	Source address
  * @daddr:	Destination address
+ * @ttl:	TTL/hop_imit
  *
  * Return: Newly opened ping flow, or NULL on failure
  */
 static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
 					    sa_family_t af, uint16_t id,
-					    const void *saddr, const void *daddr)
+					    const void *saddr, const void *daddr,
+					    uint8_t ttl)
 {
 	uint8_t proto = af == AF_INET ? IPPROTO_ICMP : IPPROTO_ICMPV6;
 	uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
@@ -179,7 +181,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
 	if (!flow)
 		return NULL;
 
-	flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, id);
+	flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, ttl, id);
 	if (!(tgt = flow_target(c, flow, proto)))
 		goto cancel;
 
@@ -235,7 +237,7 @@ cancel:
  * Return: count of consumed packets (always 1, even if malformed)
  */
 int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
-		     const void *saddr, const void *daddr,
+		     const void *saddr, const void *daddr, uint8_t ttl,
 		     const struct pool *p, const struct timespec *now)
 {
 	struct icmp_ping_flow *pingf;
@@ -286,11 +288,14 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 	}
 
 	flow = flow_at_sidx(flow_lookup_af(c, proto, PIF_TAP,
-					   af, saddr, daddr, id, id));
+					   af, saddr, daddr, ttl, id, id));
 
 	if (flow)
 		pingf = &flow->ping;
-	else if (!(pingf = icmp_ping_new(c, af, id, saddr, daddr)))
+	else
+		pingf = icmp_ping_new(c, af, id, saddr, daddr, ttl);
+
+	if (!pingf)
 		return 1;
 
 	tgt = &pingf->f.side[TGTSIDE];
diff --git a/icmp.h b/icmp.h
index 5ce22b5..18168ab 100644
--- a/icmp.h
+++ b/icmp.h
@@ -13,7 +13,7 @@ struct icmp_ping_flow;
 
 void icmp_sock_handler(const struct ctx *c, union epoll_ref ref);
 int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
-		     const void *saddr, const void *daddr,
+		     const void *saddr, const void *daddr, uint8_t ttl,
 		     const struct pool *p, const struct timespec *now);
 void icmp_init(void);
 
diff --git a/packet.h b/packet.h
index c94780a..e84e123 100644
--- a/packet.h
+++ b/packet.h
@@ -11,6 +11,8 @@
 /* Maximum size of a single packet stored in pool, including headers */
 #define PACKET_MAX_LEN	((size_t)UINT16_MAX)
 
+#define DEFAULT_TTL 64
+
 /**
  * struct pool - Generic pool of packets stored in a buffer
  * @buf:	Buffer storing packet descriptors,
diff --git a/tap.c b/tap.c
index 3a6fcbe..c7d82ca 100644
--- a/tap.c
+++ b/tap.c
@@ -563,6 +563,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf);
  * @dest:	Destination port
  * @saddr:	Source address
  * @daddr:	Destination address
+ * @ttl:	Time to live
  * @msg:	Array of messages that can be handled in a single call
  */
 static struct tap4_l4_t {
@@ -574,6 +575,8 @@ static struct tap4_l4_t {
 	struct in_addr saddr;
 	struct in_addr daddr;
 
+	uint8_t ttl;
+
 	struct pool_l4_t p;
 } tap4_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */];
 
@@ -586,6 +589,7 @@ static struct tap4_l4_t {
  * @dest:	Destination port
  * @saddr:	Source address
  * @daddr:	Destination address
+ * @hop_limit:	Hop limiit
  * @msg:	Array of messages that can be handled in a single call
  */
 static struct tap6_l4_t {
@@ -598,6 +602,8 @@ static struct tap6_l4_t {
 	struct in6_addr saddr;
 	struct in6_addr daddr;
 
+	uint8_t hop_limit;
+
 	struct pool_l4_t p;
 } tap6_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */];
 
@@ -761,7 +767,7 @@ resume:
 			packet_add(pkt, l4len, l4h);
 			icmp_tap_handler(c, PIF_TAP, AF_INET,
 					 &iph->saddr, &iph->daddr,
-					 pkt, now);
+					 iph->ttl, pkt, now);
 			continue;
 		}
 
@@ -786,7 +792,8 @@ resume:
 #define L4_MATCH(iph, uh, seq)							\
 	((seq)->protocol == (iph)->protocol &&					\
 	 (seq)->source   == (uh)->source    && (seq)->dest  == (uh)->dest &&	\
-	 (seq)->saddr.s_addr == (iph)->saddr && (seq)->daddr.s_addr == (iph)->daddr)
+	 (seq)->saddr.s_addr == (iph)->saddr &&				\
+	 (seq)->daddr.s_addr == (iph)->daddr && (seq)->ttl == (iph)->ttl)
 
 #define L4_SET(iph, uh, seq)						\
 	do {								\
@@ -795,6 +802,7 @@ resume:
 		(seq)->dest		= (uh)->dest;			\
 		(seq)->saddr.s_addr	= (iph)->saddr;			\
 		(seq)->daddr.s_addr	= (iph)->daddr;			\
+		(seq)->ttl		= (iph)->ttl;				\
 	} while (0)
 
 		if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < UIO_MAXIOV)
@@ -843,7 +851,7 @@ append:
 			for (k = 0; k < p->count; )
 				k += udp_tap_handler(c, PIF_TAP, AF_INET,
 						     &seq->saddr, &seq->daddr,
-						     p, k, now);
+						     seq->ttl, p, k, now);
 		}
 	}
 
@@ -878,6 +886,7 @@ resume:
 		const struct ethhdr *eh;
 		const struct udphdr *uh;
 		struct ipv6hdr *ip6h;
+		uint8_t hop_limit;
 		uint8_t proto;
 		char *l4h;
 
@@ -891,7 +900,7 @@ resume:
 
 		saddr = &ip6h->saddr;
 		daddr = &ip6h->daddr;
-
+		hop_limit = ip6h->hop_limit;
 		plen = ntohs(ip6h->payload_len);
 		if (plen != check)
 			continue;
@@ -938,7 +947,7 @@ resume:
 			tap_packet_debug(NULL, ip6h, NULL, proto, NULL, 1);
 
 			icmp_tap_handler(c, PIF_TAP, AF_INET6,
-					 saddr, daddr, pkt, now);
+					 saddr, daddr, hop_limit, pkt, now);
 			continue;
 		}
 
@@ -966,7 +975,8 @@ resume:
 		 (seq)->dest == (uh)->dest                 &&		\
 		 (seq)->flow_lbl == ip6_get_flow_lbl(ip6h) &&		\
 		 IN6_ARE_ADDR_EQUAL(&(seq)->saddr, saddr)  &&		\
-		 IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr))
+		 IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr)  &&		\
+		 (seq)->hop_limit == ip6h->hop_limit)
 
 #define L4_SET(ip6h, proto, uh, seq)					\
 	do {								\
@@ -976,6 +986,7 @@ resume:
 		(seq)->flow_lbl	= ip6_get_flow_lbl(ip6h);		\
 		(seq)->saddr	= *saddr;				\
 		(seq)->daddr	= *daddr;				\
+		(seq)->hop_limit = ip6h->hop_limit;			\
 	} while (0)
 
 		if (seq && L4_MATCH(ip6h, proto, uh, seq) &&
@@ -1026,7 +1037,7 @@ append:
 			for (k = 0; k < p->count; )
 				k += udp_tap_handler(c, PIF_TAP, AF_INET6,
 						     &seq->saddr, &seq->daddr,
-						     p, k, now);
+						     seq->hop_limit, p, k, now);
 		}
 	}
 
diff --git a/tcp.c b/tcp.c
index fa1d885..5751d21 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1446,8 +1446,8 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
 	if (!(flow = flow_alloc()))
 		return;
 
-	ini = flow_initiate_af(flow, PIF_TAP,
-			       af, saddr, srcport, daddr, dstport);
+	ini = flow_initiate_af(flow, PIF_TAP, af, saddr, srcport,
+			       daddr, dstport, DEFAULT_TTL);
 
 	if (!(tgt = flow_target(c, flow, IPPROTO_TCP)))
 		goto cancel;
@@ -1977,7 +1977,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
 	opts = packet_get(p, idx, sizeof(*th), optlen, NULL);
 
 	sidx = flow_lookup_af(c, IPPROTO_TCP, PIF_TAP, af, saddr, daddr,
-			      ntohs(th->source), ntohs(th->dest));
+			      ntohs(th->source), ntohs(th->dest), DEFAULT_TTL);
 	flow = flow_at_sidx(sidx);
 
 	/* New connection from tap */
diff --git a/udp.c b/udp.c
index 0c223b4..8a2c593 100644
--- a/udp.c
+++ b/udp.c
@@ -847,6 +847,7 @@ fail:
  * @af:		Address family, AF_INET or AF_INET6
  * @saddr:	Source address
  * @daddr:	Destination address
+ * @ttl:	TTL for packets to be sent in this call
  * @p:		Pool of UDP packets, with UDP headers
  * @idx:	Index of first packet to process
  * @now:	Current timestamp
@@ -857,7 +858,8 @@ fail:
  */
 int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		    sa_family_t af, const void *saddr, const void *daddr,
-		    const struct pool *p, int idx, const struct timespec *now)
+		    uint8_t ttl, const struct pool *p, int idx,
+		    const struct timespec *now)
 {
 	const struct flowside *toside;
 	struct mmsghdr mm[UIO_MAXIOV];
@@ -883,7 +885,9 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 	src = ntohs(uh->source);
 	dst = ntohs(uh->dest);
 
-	tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr, src, dst, now);
+	tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr,
+				   src, dst, ttl, now);
+
 	if (!(uflow = udp_at_sidx(tosidx))) {
 		char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
 
diff --git a/udp.h b/udp.h
index de2df6d..041fad4 100644
--- a/udp.h
+++ b/udp.h
@@ -15,7 +15,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 			    uint32_t events, const struct timespec *now);
 int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		    sa_family_t af, const void *saddr, const void *daddr,
-		    const struct pool *p, int idx, const struct timespec *now);
+		    uint8_t  ttl, const struct pool *p, int idx,
+		    const struct timespec *now);
 int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr,
 		  const char *ifname, in_port_t port);
 int udp_init(struct ctx *c);
diff --git a/udp_flow.c b/udp_flow.c
index bf4b896..db5f709 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -236,6 +236,7 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
  * @daddr:	Destination address guest side
  * @srcport:	Source port on guest side
  * @dstport:	Destination port on guest side
+ * @ttl:	TTL for this flow
  *
  * Return: sidx for the destination side of the flow for this packet, or
  *         FLOW_SIDX_NONE if we couldn't find or create a flow.
@@ -244,7 +245,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
 			      uint8_t pif, sa_family_t af,
 			      const void *saddr, const void *daddr,
 			      in_port_t srcport, in_port_t dstport,
-			      const struct timespec *now)
+			      uint8_t ttl, const struct timespec *now)
 {
 	const struct flowside *ini;
 	struct udp_flow *uflow;
@@ -254,7 +255,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
 	ASSERT(pif == PIF_TAP);
 
 	sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr,
-			      srcport, dstport);
+			      srcport, dstport, ttl);
 	if ((uflow = udp_at_sidx(sidx))) {
 		uflow->ts = now->tv_sec;
 		return flow_sidx_opposite(sidx);
@@ -271,7 +272,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
 	}
 
 	ini = flow_initiate_af(flow, PIF_TAP, af, saddr, srcport,
-			       daddr, dstport);
+			       daddr, dstport, ttl);
 
 	if (inany_is_unspecified(&ini->eaddr) || ini->eport == 0 ||
 	    inany_is_unspecified(&ini->oaddr) || ini->oport == 0) {
diff --git a/udp_flow.h b/udp_flow.h
index 9a1b059..7b40594 100644
--- a/udp_flow.h
+++ b/udp_flow.h
@@ -31,7 +31,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
 			      uint8_t pif, sa_family_t af,
 			      const void *saddr, const void *daddr,
 			      in_port_t srcport, in_port_t dstport,
-			      const struct timespec *now);
+			      uint8_t ttl, const struct timespec *now);
 void udp_flow_close(const struct ctx *c, struct udp_flow *uflow);
 bool udp_flow_defer(const struct udp_flow *uflow);
 bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
-- 
@@ -31,7 +31,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
 			      uint8_t pif, sa_family_t af,
 			      const void *saddr, const void *daddr,
 			      in_port_t srcport, in_port_t dstport,
-			      const struct timespec *now);
+			      uint8_t ttl, const struct timespec *now);
 void udp_flow_close(const struct ctx *c, struct udp_flow *uflow);
 bool udp_flow_defer(const struct udp_flow *uflow);
 bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
-- 
2.48.1


  reply	other threads:[~2025-03-27 16:52 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-27 16:51 [PATCH v2 0/2] udp: add guest-to-remote traceroute Jon Maloy
2025-03-27 16:51 ` Jon Maloy [this message]
2025-03-28  1:50   ` [PATCH v2 1/2] Add ttl to outgoing flow structure lookups David Gibson
2025-03-27 16:51 ` [PATCH v2 2/2] udp: support traceroute with udp Jon Maloy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250327165155.3354811-2-jmaloy@redhat.com \
    --to=jmaloy@redhat.com \
    --cc=dgibson@redhat.com \
    --cc=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).