public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH 0/2] udp: add guest-to-remote traceroute for IPv4
@ 2025-03-15 15:32 Jon Maloy
  2025-03-15 15:32 ` [PATCH 1/2] udp: correct source address for ICMP messages Jon Maloy
  2025-03-15 15:32 ` [PATCH 2/2] udp: support traceroute for IPv4 Jon Maloy
  0 siblings, 2 replies; 3+ messages in thread
From: Jon Maloy @ 2025-03-15 15:32 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

We add support for UDP/IPv4 traceroute in the tap-sock direction.
More will follow when this one is settled.

Jon Maloy (2):
  udp: correct source address for ICMP messages
  udp: support traceroute for IPv4

 packet.h |  2 ++
 tap.c    |  8 ++++++--
 udp.c    | 36 ++++++++++++++++++++++++------------
 udp.h    |  3 ++-
 4 files changed, 34 insertions(+), 15 deletions(-)

-- 
2.48.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] udp: correct source address for ICMP messages
  2025-03-15 15:32 [PATCH 0/2] udp: add guest-to-remote traceroute for IPv4 Jon Maloy
@ 2025-03-15 15:32 ` Jon Maloy
  2025-03-15 15:32 ` [PATCH 2/2] udp: support traceroute for IPv4 Jon Maloy
  1 sibling, 0 replies; 3+ messages in thread
From: Jon Maloy @ 2025-03-15 15:32 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

While developing traceroute forwarding tap-to-sock we found that
struct msghdr.msg_name for the ICMPs in the opposite direction always
contains the destination address of the original UDP message, and not,
as one might expect, the one of the host which created the error message.

Study of the kernel code reveals that this address instead is appended
as extra data after the received struct sock_extended_err area.

We now change the ICMP receive code accordingly.

Fixes: 55431f0077b6 ("udp: create and send ICMPv4 to local peer when applicable")
Fixes: 68b04182e07d ("udp: create and send ICMPv6 to local peer when applicable")

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 udp.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/udp.c b/udp.c
index 80520cb..271e570 100644
--- a/udp.c
+++ b/udp.c
@@ -510,10 +510,13 @@ static void udp_send_conn_fail_icmp6(const struct ctx *c,
  */
 static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
 {
-	const struct sock_extended_err *ee;
+	struct errhdr {
+		struct sock_extended_err ee;
+		union sockaddr_inany saddr;
+	};
+	const struct errhdr *eh;
 	const struct cmsghdr *hdr;
-	union sockaddr_inany saddr;
-	char buf[CMSG_SPACE(sizeof(*ee))];
+	char buf[CMSG_SPACE(sizeof(struct errhdr))];
 	char data[ICMP6_MAX_DLEN];
 	int s = ref.fd;
 	struct iovec iov = {
@@ -521,11 +524,11 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
 		.iov_len = sizeof(data)
 	};
 	struct msghdr mh = {
-		.msg_name = &saddr,
-		.msg_namelen = sizeof(saddr),
+		.msg_name = 0,
+		.msg_namelen = 0,
 		.msg_iov = &iov,
 		.msg_iovlen = 1,
-		.msg_control = buf,
+		.msg_control = buf,//(void *)&errhdr,
 		.msg_controllen = sizeof(buf),
 	};
 	ssize_t rc;
@@ -553,7 +556,7 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
 		return -1;
 	}
 
-	ee = (const struct sock_extended_err *)CMSG_DATA(hdr);
+	eh = (const struct errhdr *)CMSG_DATA(hdr);
 	if (ref.type == EPOLL_TYPE_UDP_REPLY) {
 		flow_sidx_t sidx = flow_sidx_opposite(ref.flowside);
 		const struct flowside *toside = flowside_at_sidx(sidx);
@@ -561,18 +564,19 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
 
 		if (hdr->cmsg_level == IPPROTO_IP) {
 			dlen = MIN(dlen, ICMP4_MAX_DLEN);
-			udp_send_conn_fail_icmp4(c, ee, toside, saddr.sa4.sin_addr,
+			udp_send_conn_fail_icmp4(c, &eh->ee, toside,
+						 eh->saddr.sa4.sin_addr,
 						 data, dlen);
 		} else if (hdr->cmsg_level == IPPROTO_IPV6) {
-			udp_send_conn_fail_icmp6(c, ee, toside,
-						 &saddr.sa6.sin6_addr,
+			udp_send_conn_fail_icmp6(c, &eh->ee, toside,
+						 &eh->saddr.sa6.sin6_addr,
 						 data, dlen, sidx.flowi);
 		}
 	} else {
 		trace("Ignoring received IP_RECVERR cmsg on listener socket");
 	}
 	debug("%s error on UDP socket %i: %s",
-	      str_ee_origin(ee), s, strerror_(ee->ee_errno));
+	      str_ee_origin(&eh->ee), s, strerror_(eh->ee.ee_errno));
 
 	return 1;
 }
-- 
@@ -510,10 +510,13 @@ static void udp_send_conn_fail_icmp6(const struct ctx *c,
  */
 static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
 {
-	const struct sock_extended_err *ee;
+	struct errhdr {
+		struct sock_extended_err ee;
+		union sockaddr_inany saddr;
+	};
+	const struct errhdr *eh;
 	const struct cmsghdr *hdr;
-	union sockaddr_inany saddr;
-	char buf[CMSG_SPACE(sizeof(*ee))];
+	char buf[CMSG_SPACE(sizeof(struct errhdr))];
 	char data[ICMP6_MAX_DLEN];
 	int s = ref.fd;
 	struct iovec iov = {
@@ -521,11 +524,11 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
 		.iov_len = sizeof(data)
 	};
 	struct msghdr mh = {
-		.msg_name = &saddr,
-		.msg_namelen = sizeof(saddr),
+		.msg_name = 0,
+		.msg_namelen = 0,
 		.msg_iov = &iov,
 		.msg_iovlen = 1,
-		.msg_control = buf,
+		.msg_control = buf,//(void *)&errhdr,
 		.msg_controllen = sizeof(buf),
 	};
 	ssize_t rc;
@@ -553,7 +556,7 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
 		return -1;
 	}
 
-	ee = (const struct sock_extended_err *)CMSG_DATA(hdr);
+	eh = (const struct errhdr *)CMSG_DATA(hdr);
 	if (ref.type == EPOLL_TYPE_UDP_REPLY) {
 		flow_sidx_t sidx = flow_sidx_opposite(ref.flowside);
 		const struct flowside *toside = flowside_at_sidx(sidx);
@@ -561,18 +564,19 @@ static int udp_sock_recverr(const struct ctx *c, union epoll_ref ref)
 
 		if (hdr->cmsg_level == IPPROTO_IP) {
 			dlen = MIN(dlen, ICMP4_MAX_DLEN);
-			udp_send_conn_fail_icmp4(c, ee, toside, saddr.sa4.sin_addr,
+			udp_send_conn_fail_icmp4(c, &eh->ee, toside,
+						 eh->saddr.sa4.sin_addr,
 						 data, dlen);
 		} else if (hdr->cmsg_level == IPPROTO_IPV6) {
-			udp_send_conn_fail_icmp6(c, ee, toside,
-						 &saddr.sa6.sin6_addr,
+			udp_send_conn_fail_icmp6(c, &eh->ee, toside,
+						 &eh->saddr.sa6.sin6_addr,
 						 data, dlen, sidx.flowi);
 		}
 	} else {
 		trace("Ignoring received IP_RECVERR cmsg on listener socket");
 	}
 	debug("%s error on UDP socket %i: %s",
-	      str_ee_origin(ee), s, strerror_(ee->ee_errno));
+	      str_ee_origin(&eh->ee), s, strerror_(eh->ee.ee_errno));
 
 	return 1;
 }
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] udp: support traceroute for IPv4
  2025-03-15 15:32 [PATCH 0/2] udp: add guest-to-remote traceroute for IPv4 Jon Maloy
  2025-03-15 15:32 ` [PATCH 1/2] udp: correct source address for ICMP messages Jon Maloy
@ 2025-03-15 15:32 ` Jon Maloy
  1 sibling, 0 replies; 3+ messages in thread
From: Jon Maloy @ 2025-03-15 15:32 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

Now that ICMP pass-through from socket-to-tap is in place, it is
easy to support UDP based traceroute functionality in direction
tap-to-socket.

We fix that for IPv4 in this commit.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 packet.h |  2 ++
 tap.c    |  8 ++++++--
 udp.c    | 10 +++++++++-
 udp.h    |  3 ++-
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/packet.h b/packet.h
index d099f02..e406c6d 100644
--- a/packet.h
+++ b/packet.h
@@ -9,6 +9,8 @@
 /* Maximum size of a single packet stored in pool, including headers */
 #define PACKET_MAX_LEN	UINT16_MAX
 
+#define DEFAULT_TTL 64
+
 /**
  * struct pool - Generic pool of packets stored in a buffer
  * @buf:	Buffer storing packet descriptors,
diff --git a/tap.c b/tap.c
index 182a115..7d0970d 100644
--- a/tap.c
+++ b/tap.c
@@ -544,6 +544,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf);
  * @dest:	Destination port
  * @saddr:	Source address
  * @daddr:	Destination address
+ * @ttl:	Time to live
  * @msg:	Array of messages that can be handled in a single call
  */
 static struct tap4_l4_t {
@@ -555,6 +556,8 @@ static struct tap4_l4_t {
 	struct in_addr saddr;
 	struct in_addr daddr;
 
+	uint8_t ttl;
+
 	struct pool_l4_t p;
 } tap4_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */];
 
@@ -776,6 +779,7 @@ resume:
 		(seq)->dest		= (uh)->dest;			\
 		(seq)->saddr.s_addr	= (iph)->saddr;			\
 		(seq)->daddr.s_addr	= (iph)->daddr;			\
+		(seq)->ttl		= (iph)->ttl;				\
 	} while (0)
 
 		if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < UIO_MAXIOV)
@@ -824,7 +828,7 @@ append:
 			for (k = 0; k < p->count; )
 				k += udp_tap_handler(c, PIF_TAP, AF_INET,
 						     &seq->saddr, &seq->daddr,
-						     p, k, now);
+						     seq->ttl, p, k, now);
 		}
 	}
 
@@ -1007,7 +1011,7 @@ append:
 			for (k = 0; k < p->count; )
 				k += udp_tap_handler(c, PIF_TAP, AF_INET6,
 						     &seq->saddr, &seq->daddr,
-						     p, k, now);
+						     DEFAULT_TTL, p, k, now);
 		}
 	}
 
diff --git a/udp.c b/udp.c
index 271e570..1ee289c 100644
--- a/udp.c
+++ b/udp.c
@@ -844,6 +844,7 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
  * @af:		Address family, AF_INET or AF_INET6
  * @saddr:	Source address
  * @daddr:	Destination address
+ * @ttl:	TTL for packets to be sent in this call
  * @p:		Pool of UDP packets, with UDP headers
  * @idx:	Index of first packet to process
  * @now:	Current timestamp
@@ -854,7 +855,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
  */
 int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		    sa_family_t af, const void *saddr, const void *daddr,
-		    const struct pool *p, int idx, const struct timespec *now)
+		    uint8_t ttl, const struct pool *p, int idx,
+		    const struct timespec *now)
 {
 	const struct flowside *toside;
 	struct mmsghdr mm[UIO_MAXIOV];
@@ -933,6 +935,12 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		mm[i].msg_hdr.msg_controllen = 0;
 		mm[i].msg_hdr.msg_flags = 0;
 
+		if (ttl <= 30) {
+			if (setsockopt(s, IPPROTO_IP, IP_TTL,
+				       &ttl, sizeof(ttl)) < 0)
+				perror("setsockopt (IP_TTL)");
+		}
+
 		count++;
 	}
 
diff --git a/udp.h b/udp.h
index de2df6d..041fad4 100644
--- a/udp.h
+++ b/udp.h
@@ -15,7 +15,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 			    uint32_t events, const struct timespec *now);
 int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		    sa_family_t af, const void *saddr, const void *daddr,
-		    const struct pool *p, int idx, const struct timespec *now);
+		    uint8_t  ttl, const struct pool *p, int idx,
+		    const struct timespec *now);
 int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr,
 		  const char *ifname, in_port_t port);
 int udp_init(struct ctx *c);
-- 
@@ -15,7 +15,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 			    uint32_t events, const struct timespec *now);
 int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		    sa_family_t af, const void *saddr, const void *daddr,
-		    const struct pool *p, int idx, const struct timespec *now);
+		    uint8_t  ttl, const struct pool *p, int idx,
+		    const struct timespec *now);
 int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr,
 		  const char *ifname, in_port_t port);
 int udp_init(struct ctx *c);
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-03-15 15:32 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-15 15:32 [PATCH 0/2] udp: add guest-to-remote traceroute for IPv4 Jon Maloy
2025-03-15 15:32 ` [PATCH 1/2] udp: correct source address for ICMP messages Jon Maloy
2025-03-15 15:32 ` [PATCH 2/2] udp: support traceroute for IPv4 Jon Maloy

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).