public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH v7] udp: support traceroute in direction tap-socket
@ 2025-04-05 19:21 Jon Maloy
  2025-04-07  4:55 ` David Gibson
  0 siblings, 1 reply; 2+ messages in thread
From: Jon Maloy @ 2025-04-05 19:21 UTC (permalink / raw)
  To: sbrivio, dgibson, jmaloy, passt-dev

Now that ICMP pass-through from socket-to-tap is in place, it is
easy to support UDP based traceroute functionality in direction
tap-to-socket.

We fix that  in this commit.

Link: https://bugs.passt.top/show_bug.cgi?id=64
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
v2: - Using ancillary data instead of setsockopt to transfer outgoing
      TTL.
    - Support IPv6
v3: - Storing ttl per packet instead of per flow. This may not be
      elegant, but much less intrusive than changing the flow
      criteria. This eliminates the need for the extra, flow-changing
      patch we introduced in v2.
v4: - Going back to something similar to the original solution, but
      storing current ttl in struct udp_flow, plus ensuring that all
      packets in a struct tap4_l4_t/tap6_l4_t instance have the same
      ttl. After input from David Gibson.
v5: - Some minor fixes after feedback from Stefano Brivio.
v6: - Ensured that socket TTL is initialized at flow creation.
    - Ensured that setting of hop limit on IPv6 socket works.
v7: - Setting of IPv6 hop_limit fails sometimes unless it is 4 bytes.
---
 tap.c      | 17 +++++++++++++----
 udp.c      | 22 +++++++++++++++++++++-
 udp.h      |  3 ++-
 udp_flow.c |  1 +
 udp_flow.h |  4 +++-
 5 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/tap.c b/tap.c
index 3a6fcbe..d630f6d 100644
--- a/tap.c
+++ b/tap.c
@@ -559,6 +559,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf);
  * struct l4_seq4_t - Message sequence for one protocol handler call, IPv4
  * @msgs:	Count of messages in sequence
  * @protocol:	Protocol number
+ * @ttl:	Time to live
  * @source:	Source port
  * @dest:	Destination port
  * @saddr:	Source address
@@ -567,6 +568,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf);
  */
 static struct tap4_l4_t {
 	uint8_t protocol;
+	uint8_t ttl;
 
 	uint16_t source;
 	uint16_t dest;
@@ -586,6 +588,7 @@ static struct tap4_l4_t {
  * @dest:	Destination port
  * @saddr:	Source address
  * @daddr:	Destination address
+ * @hop_limit:	Hop limit
  * @msg:	Array of messages that can be handled in a single call
  */
 static struct tap6_l4_t {
@@ -598,6 +601,8 @@ static struct tap6_l4_t {
 	struct in6_addr saddr;
 	struct in6_addr daddr;
 
+	uint8_t hop_limit;
+
 	struct pool_l4_t p;
 } tap6_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */];
 
@@ -786,7 +791,8 @@ resume:
 #define L4_MATCH(iph, uh, seq)							\
 	((seq)->protocol == (iph)->protocol &&					\
 	 (seq)->source   == (uh)->source    && (seq)->dest  == (uh)->dest &&	\
-	 (seq)->saddr.s_addr == (iph)->saddr && (seq)->daddr.s_addr == (iph)->daddr)
+	 (seq)->saddr.s_addr == (iph)->saddr &&				\
+	 (seq)->daddr.s_addr == (iph)->daddr && (seq)->ttl == (iph)->ttl)
 
 #define L4_SET(iph, uh, seq)						\
 	do {								\
@@ -795,6 +801,7 @@ resume:
 		(seq)->dest		= (uh)->dest;			\
 		(seq)->saddr.s_addr	= (iph)->saddr;			\
 		(seq)->daddr.s_addr	= (iph)->daddr;			\
+		(seq)->ttl		= (iph)->ttl;			\
 	} while (0)
 
 		if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < UIO_MAXIOV)
@@ -843,7 +850,7 @@ append:
 			for (k = 0; k < p->count; )
 				k += udp_tap_handler(c, PIF_TAP, AF_INET,
 						     &seq->saddr, &seq->daddr,
-						     p, k, now);
+						     seq->ttl, p, k, now);
 		}
 	}
 
@@ -966,7 +973,8 @@ resume:
 		 (seq)->dest == (uh)->dest                 &&		\
 		 (seq)->flow_lbl == ip6_get_flow_lbl(ip6h) &&		\
 		 IN6_ARE_ADDR_EQUAL(&(seq)->saddr, saddr)  &&		\
-		 IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr))
+		 IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr)  &&		\
+		 (seq)->hop_limit == (ip6h)->hop_limit)
 
 #define L4_SET(ip6h, proto, uh, seq)					\
 	do {								\
@@ -976,6 +984,7 @@ resume:
 		(seq)->flow_lbl	= ip6_get_flow_lbl(ip6h);		\
 		(seq)->saddr	= *saddr;				\
 		(seq)->daddr	= *daddr;				\
+		(seq)->hop_limit = (ip6h)->hop_limit;			\
 	} while (0)
 
 		if (seq && L4_MATCH(ip6h, proto, uh, seq) &&
@@ -1026,7 +1035,7 @@ append:
 			for (k = 0; k < p->count; )
 				k += udp_tap_handler(c, PIF_TAP, AF_INET6,
 						     &seq->saddr, &seq->daddr,
-						     p, k, now);
+						     seq->hop_limit, p, k, now);
 		}
 	}
 
diff --git a/udp.c b/udp.c
index b275db3..3600336 100644
--- a/udp.c
+++ b/udp.c
@@ -818,6 +818,7 @@ fail:
  * @af:		Address family, AF_INET or AF_INET6
  * @saddr:	Source address
  * @daddr:	Destination address
+ * @ttl:	TTL or hop limit for packets to be sent in this call
  * @p:		Pool of UDP packets, with UDP headers
  * @idx:	Index of first packet to process
  * @now:	Current timestamp
@@ -828,7 +829,8 @@ fail:
  */
 int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		    sa_family_t af, const void *saddr, const void *daddr,
-		    const struct pool *p, int idx, const struct timespec *now)
+		    uint8_t ttl, const struct pool *p, int idx,
+		    const struct timespec *now)
 {
 	const struct flowside *toside;
 	struct mmsghdr mm[UIO_MAXIOV];
@@ -907,6 +909,24 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		mm[i].msg_hdr.msg_controllen = 0;
 		mm[i].msg_hdr.msg_flags = 0;
 
+		if (ttl != uflow->ttl[tosidx.sidei]) {
+			uflow->ttl[tosidx.sidei] = ttl;
+			if (af == AF_INET) {
+				if (setsockopt(s, IPPROTO_IP, IP_TTL,
+					       &ttl, sizeof(ttl)) < 0)
+					flow_perror(uflow,
+						    "setsockopt IP_TTL");
+			} else {
+				/* IPv6 hop_limit cannot be only 1 byte */
+				int hop_limit = ttl;
+
+				if (setsockopt(s, SOL_IPV6, IPV6_UNICAST_HOPS,
+					       &hop_limit, sizeof(hop_limit)) < 0)
+					flow_perror(uflow,
+						    "setsockopt IPV6_UNICAST_HOPS");
+			}
+		}
+
 		count++;
 	}
 
diff --git a/udp.h b/udp.h
index 8fc4283..8f8531a 100644
--- a/udp.h
+++ b/udp.h
@@ -15,7 +15,8 @@ void udp_sock_handler(const struct ctx *c, union epoll_ref ref,
 		      uint32_t events, const struct timespec *now);
 int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		    sa_family_t af, const void *saddr, const void *daddr,
-		    const struct pool *p, int idx, const struct timespec *now);
+		    uint8_t ttl, const struct pool *p, int idx,
+		    const struct timespec *now);
 int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr,
 		  const char *ifname, in_port_t port);
 int udp_init(struct ctx *c);
diff --git a/udp_flow.c b/udp_flow.c
index af15d7f..d84d170 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -136,6 +136,7 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
 	uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp);
 	uflow->ts = now->tv_sec;
 	uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1;
+	uflow->ttl[INISIDE] = uflow->ttl[TGTSIDE] = 0;
 
 	flow_foreach_sidei(sidei) {
 		if (pif_is_socket(uflow->f.pif[sidei]))
diff --git a/udp_flow.h b/udp_flow.h
index d518737..90d3b29 100644
--- a/udp_flow.h
+++ b/udp_flow.h
@@ -8,13 +8,14 @@
 #define UDP_FLOW_H
 
 /**
- * struct udp - Descriptor for a flow of UDP packets
+ * struct udp_flow - Descriptor for a flow of UDP packets
  * @f:		Generic flow information
  * @closed:	Flow is already closed
  * @flush0:	@s[0] may have datagrams queued for other flows
  * @flush1:	@s[1] may have datagrams queued for other flows
  * @ts:		Activity timestamp
  * @s:		Socket fd (or -1) for each side of the flow
+ * @ttl:	TTL or hop_limit for both sides
  */
 struct udp_flow {
 	/* Must be first element */
@@ -24,6 +25,7 @@ struct udp_flow {
 	bool flush0, flush1 :1;
 	time_t ts;
 	int s[SIDES];
+	uint8_t ttl[SIDES];
 };
 
 struct udp_flow *udp_at_sidx(flow_sidx_t sidx);
-- 
@@ -8,13 +8,14 @@
 #define UDP_FLOW_H
 
 /**
- * struct udp - Descriptor for a flow of UDP packets
+ * struct udp_flow - Descriptor for a flow of UDP packets
  * @f:		Generic flow information
  * @closed:	Flow is already closed
  * @flush0:	@s[0] may have datagrams queued for other flows
  * @flush1:	@s[1] may have datagrams queued for other flows
  * @ts:		Activity timestamp
  * @s:		Socket fd (or -1) for each side of the flow
+ * @ttl:	TTL or hop_limit for both sides
  */
 struct udp_flow {
 	/* Must be first element */
@@ -24,6 +25,7 @@ struct udp_flow {
 	bool flush0, flush1 :1;
 	time_t ts;
 	int s[SIDES];
+	uint8_t ttl[SIDES];
 };
 
 struct udp_flow *udp_at_sidx(flow_sidx_t sidx);
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v7] udp: support traceroute in direction tap-socket
  2025-04-05 19:21 [PATCH v7] udp: support traceroute in direction tap-socket Jon Maloy
@ 2025-04-07  4:55 ` David Gibson
  0 siblings, 0 replies; 2+ messages in thread
From: David Gibson @ 2025-04-07  4:55 UTC (permalink / raw)
  To: Jon Maloy; +Cc: sbrivio, dgibson, passt-dev

[-- Attachment #1: Type: text/plain, Size: 8420 bytes --]

On Sat, Apr 05, 2025 at 03:21:26PM -0400, Jon Maloy wrote:
> Now that ICMP pass-through from socket-to-tap is in place, it is
> easy to support UDP based traceroute functionality in direction
> tap-to-socket.
> 
> We fix that  in this commit.
> 
> Link: https://bugs.passt.top/show_bug.cgi?id=64
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
> v2: - Using ancillary data instead of setsockopt to transfer outgoing
>       TTL.
>     - Support IPv6
> v3: - Storing ttl per packet instead of per flow. This may not be
>       elegant, but much less intrusive than changing the flow
>       criteria. This eliminates the need for the extra, flow-changing
>       patch we introduced in v2.
> v4: - Going back to something similar to the original solution, but
>       storing current ttl in struct udp_flow, plus ensuring that all
>       packets in a struct tap4_l4_t/tap6_l4_t instance have the same
>       ttl. After input from David Gibson.
> v5: - Some minor fixes after feedback from Stefano Brivio.
> v6: - Ensured that socket TTL is initialized at flow creation.
>     - Ensured that setting of hop limit on IPv6 socket works.
> v7: - Setting of IPv6 hop_limit fails sometimes unless it is 4 bytes.
> ---
>  tap.c      | 17 +++++++++++++----
>  udp.c      | 22 +++++++++++++++++++++-
>  udp.h      |  3 ++-
>  udp_flow.c |  1 +
>  udp_flow.h |  4 +++-
>  5 files changed, 40 insertions(+), 7 deletions(-)
> 
> diff --git a/tap.c b/tap.c
> index 3a6fcbe..d630f6d 100644
> --- a/tap.c
> +++ b/tap.c
> @@ -559,6 +559,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf);
>   * struct l4_seq4_t - Message sequence for one protocol handler call, IPv4
>   * @msgs:	Count of messages in sequence
>   * @protocol:	Protocol number
> + * @ttl:	Time to live
>   * @source:	Source port
>   * @dest:	Destination port
>   * @saddr:	Source address
> @@ -567,6 +568,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf);
>   */
>  static struct tap4_l4_t {
>  	uint8_t protocol;
> +	uint8_t ttl;
>  
>  	uint16_t source;
>  	uint16_t dest;
> @@ -586,6 +588,7 @@ static struct tap4_l4_t {
>   * @dest:	Destination port
>   * @saddr:	Source address
>   * @daddr:	Destination address
> + * @hop_limit:	Hop limit
>   * @msg:	Array of messages that can be handled in a single call
>   */
>  static struct tap6_l4_t {
> @@ -598,6 +601,8 @@ static struct tap6_l4_t {
>  	struct in6_addr saddr;
>  	struct in6_addr daddr;
>  
> +	uint8_t hop_limit;
> +
>  	struct pool_l4_t p;
>  } tap6_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */];
>  
> @@ -786,7 +791,8 @@ resume:
>  #define L4_MATCH(iph, uh, seq)							\
>  	((seq)->protocol == (iph)->protocol &&					\
>  	 (seq)->source   == (uh)->source    && (seq)->dest  == (uh)->dest &&	\
> -	 (seq)->saddr.s_addr == (iph)->saddr && (seq)->daddr.s_addr == (iph)->daddr)
> +	 (seq)->saddr.s_addr == (iph)->saddr &&				\
> +	 (seq)->daddr.s_addr == (iph)->daddr && (seq)->ttl == (iph)->ttl)
>  
>  #define L4_SET(iph, uh, seq)						\
>  	do {								\
> @@ -795,6 +801,7 @@ resume:
>  		(seq)->dest		= (uh)->dest;			\
>  		(seq)->saddr.s_addr	= (iph)->saddr;			\
>  		(seq)->daddr.s_addr	= (iph)->daddr;			\
> +		(seq)->ttl		= (iph)->ttl;			\
>  	} while (0)
>  
>  		if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < UIO_MAXIOV)
> @@ -843,7 +850,7 @@ append:
>  			for (k = 0; k < p->count; )
>  				k += udp_tap_handler(c, PIF_TAP, AF_INET,
>  						     &seq->saddr, &seq->daddr,
> -						     p, k, now);
> +						     seq->ttl, p, k, now);
>  		}
>  	}
>  
> @@ -966,7 +973,8 @@ resume:
>  		 (seq)->dest == (uh)->dest                 &&		\
>  		 (seq)->flow_lbl == ip6_get_flow_lbl(ip6h) &&		\
>  		 IN6_ARE_ADDR_EQUAL(&(seq)->saddr, saddr)  &&		\
> -		 IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr))
> +		 IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr)  &&		\
> +		 (seq)->hop_limit == (ip6h)->hop_limit)
>  
>  #define L4_SET(ip6h, proto, uh, seq)					\
>  	do {								\
> @@ -976,6 +984,7 @@ resume:
>  		(seq)->flow_lbl	= ip6_get_flow_lbl(ip6h);		\
>  		(seq)->saddr	= *saddr;				\
>  		(seq)->daddr	= *daddr;				\
> +		(seq)->hop_limit = (ip6h)->hop_limit;			\
>  	} while (0)
>  
>  		if (seq && L4_MATCH(ip6h, proto, uh, seq) &&
> @@ -1026,7 +1035,7 @@ append:
>  			for (k = 0; k < p->count; )
>  				k += udp_tap_handler(c, PIF_TAP, AF_INET6,
>  						     &seq->saddr, &seq->daddr,
> -						     p, k, now);
> +						     seq->hop_limit, p, k, now);
>  		}
>  	}
>  
> diff --git a/udp.c b/udp.c
> index b275db3..3600336 100644
> --- a/udp.c
> +++ b/udp.c
> @@ -818,6 +818,7 @@ fail:
>   * @af:		Address family, AF_INET or AF_INET6
>   * @saddr:	Source address
>   * @daddr:	Destination address
> + * @ttl:	TTL or hop limit for packets to be sent in this call
>   * @p:		Pool of UDP packets, with UDP headers
>   * @idx:	Index of first packet to process
>   * @now:	Current timestamp
> @@ -828,7 +829,8 @@ fail:
>   */
>  int udp_tap_handler(const struct ctx *c, uint8_t pif,
>  		    sa_family_t af, const void *saddr, const void *daddr,
> -		    const struct pool *p, int idx, const struct timespec *now)
> +		    uint8_t ttl, const struct pool *p, int idx,
> +		    const struct timespec *now)
>  {
>  	const struct flowside *toside;
>  	struct mmsghdr mm[UIO_MAXIOV];
> @@ -907,6 +909,24 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
>  		mm[i].msg_hdr.msg_controllen = 0;
>  		mm[i].msg_hdr.msg_flags = 0;
>  
> +		if (ttl != uflow->ttl[tosidx.sidei]) {
> +			uflow->ttl[tosidx.sidei] = ttl;
> +			if (af == AF_INET) {
> +				if (setsockopt(s, IPPROTO_IP, IP_TTL,
> +					       &ttl, sizeof(ttl)) < 0)
> +					flow_perror(uflow,
> +						    "setsockopt IP_TTL");
> +			} else {
> +				/* IPv6 hop_limit cannot be only 1 byte */
> +				int hop_limit = ttl;
> +
> +				if (setsockopt(s, SOL_IPV6, IPV6_UNICAST_HOPS,
> +					       &hop_limit, sizeof(hop_limit)) < 0)
> +					flow_perror(uflow,
> +						    "setsockopt IPV6_UNICAST_HOPS");
> +			}
> +		}
> +
>  		count++;
>  	}
>  
> diff --git a/udp.h b/udp.h
> index 8fc4283..8f8531a 100644
> --- a/udp.h
> +++ b/udp.h
> @@ -15,7 +15,8 @@ void udp_sock_handler(const struct ctx *c, union epoll_ref ref,
>  		      uint32_t events, const struct timespec *now);
>  int udp_tap_handler(const struct ctx *c, uint8_t pif,
>  		    sa_family_t af, const void *saddr, const void *daddr,
> -		    const struct pool *p, int idx, const struct timespec *now);
> +		    uint8_t ttl, const struct pool *p, int idx,
> +		    const struct timespec *now);
>  int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr,
>  		  const char *ifname, in_port_t port);
>  int udp_init(struct ctx *c);
> diff --git a/udp_flow.c b/udp_flow.c
> index af15d7f..d84d170 100644
> --- a/udp_flow.c
> +++ b/udp_flow.c
> @@ -136,6 +136,7 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
>  	uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp);
>  	uflow->ts = now->tv_sec;
>  	uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1;
> +	uflow->ttl[INISIDE] = uflow->ttl[TGTSIDE] = 0;
>  
>  	flow_foreach_sidei(sidei) {
>  		if (pif_is_socket(uflow->f.pif[sidei]))
> diff --git a/udp_flow.h b/udp_flow.h
> index d518737..90d3b29 100644
> --- a/udp_flow.h
> +++ b/udp_flow.h
> @@ -8,13 +8,14 @@
>  #define UDP_FLOW_H
>  
>  /**
> - * struct udp - Descriptor for a flow of UDP packets
> + * struct udp_flow - Descriptor for a flow of UDP packets
>   * @f:		Generic flow information
>   * @closed:	Flow is already closed
>   * @flush0:	@s[0] may have datagrams queued for other flows
>   * @flush1:	@s[1] may have datagrams queued for other flows
>   * @ts:		Activity timestamp
>   * @s:		Socket fd (or -1) for each side of the flow
> + * @ttl:	TTL or hop_limit for both sides
>   */
>  struct udp_flow {
>  	/* Must be first element */
> @@ -24,6 +25,7 @@ struct udp_flow {
>  	bool flush0, flush1 :1;
>  	time_t ts;
>  	int s[SIDES];
> +	uint8_t ttl[SIDES];
>  };
>  
>  struct udp_flow *udp_at_sidx(flow_sidx_t sidx);

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-04-07  4:55 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-04-05 19:21 [PATCH v7] udp: support traceroute in direction tap-socket Jon Maloy
2025-04-07  4:55 ` David Gibson

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).