public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH v3] udp: support traceroute
@ 2025-03-30 21:06 Jon Maloy
  2025-03-31  5:23 ` David Gibson
  0 siblings, 1 reply; 2+ messages in thread
From: Jon Maloy @ 2025-03-30 21:06 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

Now that ICMP pass-through from socket-to-tap is in place, it is
easy to support UDP based traceroute functionality in direction
tap-to-socket.

We fix that in this commit.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
v2: - Using ancillary data instead of setsockopt to transfer outgoing
      TTL.
    - Support IPv6
v3: - Storing ttl per packet instead of per flow. This may not be
      elegant, but much less intrusive than changing the flow
      criteria. This eliminates the need for the extra, flow-changing
      patch we introduced in v2.
---
 packet.c | 28 +++++++++++++++++-----------
 packet.h | 30 ++++++++++++++++++++++--------
 tap.c    |  3 ++-
 udp.c    | 28 ++++++++++++++++++++++++----
 udp.h    |  3 ++-
 5 files changed, 67 insertions(+), 25 deletions(-)

diff --git a/packet.c b/packet.c
index 72c6158..36a32fe 100644
--- a/packet.c
+++ b/packet.c
@@ -89,11 +89,12 @@ bool pool_full(const struct pool *p)
  * @p:		Existing pool
  * @len:	Length of new descriptor
  * @start:	Start of data
+ * @ttl:	TTL/hop_limit for this packet
  * @func:	For tracing: name of calling function
  * @line:	For tracing: caller line of function call
  */
 void packet_add_do(struct pool *p, size_t len, const char *start,
-		   const char *func, int line)
+		   const uint8_t ttl, const char *func, int line)
 {
 	size_t idx = p->count;
 
@@ -106,8 +107,9 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
 	if (packet_check_range(p, start, len, func, line))
 		return;
 
-	p->pkt[idx].iov_base = (void *)start;
-	p->pkt[idx].iov_len = len;
+	p->pkt[idx].iov.iov_base = (void *)start;
+	p->pkt[idx].iov.iov_len = len;
+	p->pkt[idx].ttl = ttl;
 
 	p->count++;
 }
@@ -125,7 +127,8 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
  * Return: pointer to start of data range, NULL on invalid range or descriptor
  */
 void *packet_get_try_do(const struct pool *p, size_t idx, size_t offset,
-			size_t len, size_t *left, const char *func, int line)
+			size_t len, size_t *left, uint8_t *ttl,
+			const char *func, int line)
 {
 	char *ptr;
 
@@ -139,18 +142,21 @@ void *packet_get_try_do(const struct pool *p, size_t idx, size_t offset,
 		return NULL;
 	}
 
-	if (offset > p->pkt[idx].iov_len ||
-	    len > (p->pkt[idx].iov_len - offset))
+	if (offset > p->pkt[idx].iov.iov_len ||
+	    len > (p->pkt[idx].iov.iov_len - offset))
 		return NULL;
 
-	ptr = (char *)p->pkt[idx].iov_base + offset;
+	ptr = (char *)p->pkt[idx].iov.iov_base + offset;
 
 	ASSERT_WITH_MSG(!packet_check_range(p, ptr, len, func, line),
 			"Corrupt packet pool, %s:%i", func, line);
 
 	if (left)
-		*left = p->pkt[idx].iov_len - offset - len;
+		*left = p->pkt[idx].iov.iov_len - offset - len;
 
+	if (ttl)
+		*ttl =  p->pkt[idx].ttl;
+;
 	return ptr;
 }
 
@@ -168,14 +174,14 @@ void *packet_get_try_do(const struct pool *p, size_t idx, size_t offset,
  */
 void *packet_get_do(const struct pool *p, const size_t idx,
 		    size_t offset, size_t len, size_t *left,
-		    const char *func, int line)
+		    uint8_t *ttl, const char *func, int line)
 {
-	void *r = packet_get_try_do(p, idx, offset, len, left, func, line);
+	void *r = packet_get_try_do(p, idx, offset, len, left, ttl, func, line);
 
 	if (!r) {
 		trace("missing packet data length %zu, offset %zu from "
 		      "length %zu, %s:%i",
-		      len, offset, p->pkt[idx].iov_len, func, line);
+		      len, offset, p->pkt[idx].iov.iov_len, func, line);
 	}
 
 	return r;
diff --git a/packet.h b/packet.h
index c94780a..1f5142c 100644
--- a/packet.h
+++ b/packet.h
@@ -11,6 +11,8 @@
 /* Maximum size of a single packet stored in pool, including headers */
 #define PACKET_MAX_LEN	((size_t)UINT16_MAX)
 
+#define DEFAULT_TTL 64
+
 /**
  * struct pool - Generic pool of packets stored in a buffer
  * @buf:	Buffer storing packet descriptors,
@@ -26,28 +28,36 @@ struct pool {
 	size_t buf_size;
 	size_t size;
 	size_t count;
-	struct iovec pkt[];
+	struct {
+		struct iovec iov;
+		uint8_t ttl;
+		uint8_t pad[3];
+	} pkt[];
 };
 
 int vu_packet_check_range(void *buf, const char *ptr, size_t len);
 void packet_add_do(struct pool *p, size_t len, const char *start,
-		   const char *func, int line);
+		   const uint8_t ttl, const char *func, int line);
 void *packet_get_try_do(const struct pool *p, const size_t idx,
 			size_t offset, size_t len, size_t *left,
-			const char *func, int line);
+			uint8_t *ttl, const char *func, int line);
 void *packet_get_do(const struct pool *p, const size_t idx,
 		    size_t offset, size_t len, size_t *left,
-		    const char *func, int line);
+		    uint8_t *ttl, const char *func, int line);
 bool pool_full(const struct pool *p);
 void pool_flush(struct pool *p);
 
 #define packet_add(p, len, start)					\
-	packet_add_do(p, len, start, __func__, __LINE__)
+	packet_add_do(p, len, start, DEFAULT_TTL, __func__, __LINE__)
+#define packet_add_ttl(p, len, start, ttl)					\
+	packet_add_do(p, len, start, ttl, __func__, __LINE__)
 
 #define packet_get_try(p, idx, offset, len, left)			\
-	packet_get_try_do(p, idx, offset, len, left, __func__, __LINE__)
+	packet_get_try_do(p, idx, offset, len, left, NULL, __func__, __LINE__)
 #define packet_get(p, idx, offset, len, left)				\
-	packet_get_do(p, idx, offset, len, left, __func__, __LINE__)
+	packet_get_do(p, idx, offset, len, left, NULL, __func__, __LINE__)
+#define packet_get_ttl(p, idx, offset, len, left, ttl)				\
+	packet_get_do(p, idx, offset, len, left, ttl, __func__, __LINE__)
 
 #define PACKET_POOL_DECL(_name, _size, _buf)				\
 struct _name ## _t {							\
@@ -55,7 +65,11 @@ struct _name ## _t {							\
 	size_t buf_size;						\
 	size_t size;							\
 	size_t count;							\
-	struct iovec pkt[_size];					\
+	struct {							\
+	struct iovec iov;						\
+		uint8_t ttl;						\
+		uint8_t pad[3];						\
+	} pkt[_size];							\
 }
 
 #define PACKET_POOL_INIT_NOCAST(_size, _buf, _buf_size)			\
diff --git a/tap.c b/tap.c
index 3a6fcbe..ac9b3df 100644
--- a/tap.c
+++ b/tap.c
@@ -563,6 +563,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf);
  * @dest:	Destination port
  * @saddr:	Source address
  * @daddr:	Destination address
+ * @ttl:	TTL/hop_limit for packet
  * @msg:	Array of messages that can be handled in a single call
  */
 static struct tap4_l4_t {
@@ -821,7 +822,7 @@ resume:
 #undef L4_SET
 
 append:
-		packet_add((struct pool *)&seq->p, l4len, l4h);
+		packet_add_ttl((struct pool *)&seq->p, l4len, l4h, iph->ttl);
 	}
 
 	for (j = 0, seq = tap4_l4; j < seq_count; j++, seq++) {
diff --git a/udp.c b/udp.c
index 39431d7..5fbba49 100644
--- a/udp.c
+++ b/udp.c
@@ -859,8 +859,10 @@ fail:
  */
 int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		    sa_family_t af, const void *saddr, const void *daddr,
-		    const struct pool *p, int idx, const struct timespec *now)
+		    const struct pool *p, int idx,
+		    const struct timespec *now)
 {
+	char ancillary[CMSG_SPACE(sizeof(int))];
 	const struct flowside *toside;
 	struct mmsghdr mm[UIO_MAXIOV];
 	union sockaddr_inany to_sa;
@@ -885,7 +887,9 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 	src = ntohs(uh->source);
 	dst = ntohs(uh->dest);
 
-	tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr, src, dst, now);
+	tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr,
+				   src, dst, now);
+
 	if (!(uflow = udp_at_sidx(tosidx))) {
 		char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
 
@@ -915,8 +919,9 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 	for (i = 0; i < (int)p->count - idx; i++) {
 		struct udphdr *uh_send;
 		size_t len;
+		uint8_t ttl;
 
-		uh_send = packet_get(p, idx + i, 0, sizeof(*uh), &len);
+		uh_send = packet_get_ttl(p, idx + i, 0, sizeof(*uh), &len, &ttl);
 		if (!uh_send)
 			return p->count - idx;
 
@@ -926,7 +931,6 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		if (len) {
 			m[i].iov_base = (char *)(uh_send + 1);
 			m[i].iov_len = len;
-
 			mm[i].msg_hdr.msg_iov = m + i;
 			mm[i].msg_hdr.msg_iovlen = 1;
 		} else {
@@ -938,6 +942,22 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		mm[i].msg_hdr.msg_controllen = 0;
 		mm[i].msg_hdr.msg_flags = 0;
 
+		if (ttl != DEFAULT_TTL) {
+			struct cmsghdr *cmsg = (void *) ancillary;
+
+			if (af == AF_INET) {
+				cmsg->cmsg_level = IPPROTO_IP;
+				cmsg->cmsg_type = IP_TTL;
+			} else {
+				cmsg->cmsg_level = IPPROTO_IPV6;
+				cmsg->cmsg_type = IPV6_HOPLIMIT;
+			}
+			cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+			*((int *) CMSG_DATA(cmsg)) = ttl;
+			mm[i].msg_hdr.msg_control = ancillary;
+			mm[i].msg_hdr.msg_controllen = sizeof(ancillary);
+		}
+
 		count++;
 	}
 
diff --git a/udp.h b/udp.h
index de2df6d..6adbfcd 100644
--- a/udp.h
+++ b/udp.h
@@ -15,7 +15,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 			    uint32_t events, const struct timespec *now);
 int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		    sa_family_t af, const void *saddr, const void *daddr,
-		    const struct pool *p, int idx, const struct timespec *now);
+		    const struct pool *p, int idx,
+		    const struct timespec *now);
 int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr,
 		  const char *ifname, in_port_t port);
 int udp_init(struct ctx *c);
-- 
@@ -15,7 +15,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 			    uint32_t events, const struct timespec *now);
 int udp_tap_handler(const struct ctx *c, uint8_t pif,
 		    sa_family_t af, const void *saddr, const void *daddr,
-		    const struct pool *p, int idx, const struct timespec *now);
+		    const struct pool *p, int idx,
+		    const struct timespec *now);
 int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr,
 		  const char *ifname, in_port_t port);
 int udp_init(struct ctx *c);
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-03-31  5:25 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-03-30 21:06 [PATCH v3] udp: support traceroute Jon Maloy
2025-03-31  5:23 ` David Gibson

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).