From: Jon Maloy <jmaloy@redhat.com>
To: passt-dev@passt.top, sbrivio@redhat.com, lvivier@redhat.com,
dgibson@redhat.com, jmaloy@redhat.com
Subject: [PATCH v3] udp: support traceroute
Date: Sun, 30 Mar 2025 17:06:28 -0400 [thread overview]
Message-ID: <20250330210628.47752-1-jmaloy@redhat.com> (raw)
Now that ICMP pass-through from socket-to-tap is in place, it is
easy to support UDP based traceroute functionality in direction
tap-to-socket.
We fix that in this commit.
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
v2: - Using ancillary data instead of setsockopt to transfer outgoing
TTL.
- Support IPv6
v3: - Storing ttl per packet instead of per flow. This may not be
elegant, but much less intrusive than changing the flow
criteria. This eliminates the need for the extra, flow-changing
patch we introduced in v2.
---
packet.c | 28 +++++++++++++++++-----------
packet.h | 30 ++++++++++++++++++++++--------
tap.c | 3 ++-
udp.c | 28 ++++++++++++++++++++++++----
udp.h | 3 ++-
5 files changed, 67 insertions(+), 25 deletions(-)
diff --git a/packet.c b/packet.c
index 72c6158..36a32fe 100644
--- a/packet.c
+++ b/packet.c
@@ -89,11 +89,12 @@ bool pool_full(const struct pool *p)
* @p: Existing pool
* @len: Length of new descriptor
* @start: Start of data
+ * @ttl: TTL/hop_limit for this packet
* @func: For tracing: name of calling function
* @line: For tracing: caller line of function call
*/
void packet_add_do(struct pool *p, size_t len, const char *start,
- const char *func, int line)
+ const uint8_t ttl, const char *func, int line)
{
size_t idx = p->count;
@@ -106,8 +107,9 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
if (packet_check_range(p, start, len, func, line))
return;
- p->pkt[idx].iov_base = (void *)start;
- p->pkt[idx].iov_len = len;
+ p->pkt[idx].iov.iov_base = (void *)start;
+ p->pkt[idx].iov.iov_len = len;
+ p->pkt[idx].ttl = ttl;
p->count++;
}
@@ -125,7 +127,8 @@ void packet_add_do(struct pool *p, size_t len, const char *start,
* Return: pointer to start of data range, NULL on invalid range or descriptor
*/
void *packet_get_try_do(const struct pool *p, size_t idx, size_t offset,
- size_t len, size_t *left, const char *func, int line)
+ size_t len, size_t *left, uint8_t *ttl,
+ const char *func, int line)
{
char *ptr;
@@ -139,18 +142,21 @@ void *packet_get_try_do(const struct pool *p, size_t idx, size_t offset,
return NULL;
}
- if (offset > p->pkt[idx].iov_len ||
- len > (p->pkt[idx].iov_len - offset))
+ if (offset > p->pkt[idx].iov.iov_len ||
+ len > (p->pkt[idx].iov.iov_len - offset))
return NULL;
- ptr = (char *)p->pkt[idx].iov_base + offset;
+ ptr = (char *)p->pkt[idx].iov.iov_base + offset;
ASSERT_WITH_MSG(!packet_check_range(p, ptr, len, func, line),
"Corrupt packet pool, %s:%i", func, line);
if (left)
- *left = p->pkt[idx].iov_len - offset - len;
+ *left = p->pkt[idx].iov.iov_len - offset - len;
+ if (ttl)
+ *ttl = p->pkt[idx].ttl;
+;
return ptr;
}
@@ -168,14 +174,14 @@ void *packet_get_try_do(const struct pool *p, size_t idx, size_t offset,
*/
void *packet_get_do(const struct pool *p, const size_t idx,
size_t offset, size_t len, size_t *left,
- const char *func, int line)
+ uint8_t *ttl, const char *func, int line)
{
- void *r = packet_get_try_do(p, idx, offset, len, left, func, line);
+ void *r = packet_get_try_do(p, idx, offset, len, left, ttl, func, line);
if (!r) {
trace("missing packet data length %zu, offset %zu from "
"length %zu, %s:%i",
- len, offset, p->pkt[idx].iov_len, func, line);
+ len, offset, p->pkt[idx].iov.iov_len, func, line);
}
return r;
diff --git a/packet.h b/packet.h
index c94780a..1f5142c 100644
--- a/packet.h
+++ b/packet.h
@@ -11,6 +11,8 @@
/* Maximum size of a single packet stored in pool, including headers */
#define PACKET_MAX_LEN ((size_t)UINT16_MAX)
+#define DEFAULT_TTL 64
+
/**
* struct pool - Generic pool of packets stored in a buffer
* @buf: Buffer storing packet descriptors,
@@ -26,28 +28,36 @@ struct pool {
size_t buf_size;
size_t size;
size_t count;
- struct iovec pkt[];
+ struct {
+ struct iovec iov;
+ uint8_t ttl;
+ uint8_t pad[3];
+ } pkt[];
};
int vu_packet_check_range(void *buf, const char *ptr, size_t len);
void packet_add_do(struct pool *p, size_t len, const char *start,
- const char *func, int line);
+ const uint8_t ttl, const char *func, int line);
void *packet_get_try_do(const struct pool *p, const size_t idx,
size_t offset, size_t len, size_t *left,
- const char *func, int line);
+ uint8_t *ttl, const char *func, int line);
void *packet_get_do(const struct pool *p, const size_t idx,
size_t offset, size_t len, size_t *left,
- const char *func, int line);
+ uint8_t *ttl, const char *func, int line);
bool pool_full(const struct pool *p);
void pool_flush(struct pool *p);
#define packet_add(p, len, start) \
- packet_add_do(p, len, start, __func__, __LINE__)
+ packet_add_do(p, len, start, DEFAULT_TTL, __func__, __LINE__)
+#define packet_add_ttl(p, len, start, ttl) \
+ packet_add_do(p, len, start, ttl, __func__, __LINE__)
#define packet_get_try(p, idx, offset, len, left) \
- packet_get_try_do(p, idx, offset, len, left, __func__, __LINE__)
+ packet_get_try_do(p, idx, offset, len, left, NULL, __func__, __LINE__)
#define packet_get(p, idx, offset, len, left) \
- packet_get_do(p, idx, offset, len, left, __func__, __LINE__)
+ packet_get_do(p, idx, offset, len, left, NULL, __func__, __LINE__)
+#define packet_get_ttl(p, idx, offset, len, left, ttl) \
+ packet_get_do(p, idx, offset, len, left, ttl, __func__, __LINE__)
#define PACKET_POOL_DECL(_name, _size, _buf) \
struct _name ## _t { \
@@ -55,7 +65,11 @@ struct _name ## _t { \
size_t buf_size; \
size_t size; \
size_t count; \
- struct iovec pkt[_size]; \
+ struct { \
+ struct iovec iov; \
+ uint8_t ttl; \
+ uint8_t pad[3]; \
+ } pkt[_size]; \
}
#define PACKET_POOL_INIT_NOCAST(_size, _buf, _buf_size) \
diff --git a/tap.c b/tap.c
index 3a6fcbe..ac9b3df 100644
--- a/tap.c
+++ b/tap.c
@@ -563,6 +563,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf);
* @dest: Destination port
* @saddr: Source address
* @daddr: Destination address
+ * @ttl: TTL/hop_limit for packet
* @msg: Array of messages that can be handled in a single call
*/
static struct tap4_l4_t {
@@ -821,7 +822,7 @@ resume:
#undef L4_SET
append:
- packet_add((struct pool *)&seq->p, l4len, l4h);
+ packet_add_ttl((struct pool *)&seq->p, l4len, l4h, iph->ttl);
}
for (j = 0, seq = tap4_l4; j < seq_count; j++, seq++) {
diff --git a/udp.c b/udp.c
index 39431d7..5fbba49 100644
--- a/udp.c
+++ b/udp.c
@@ -859,8 +859,10 @@ fail:
*/
int udp_tap_handler(const struct ctx *c, uint8_t pif,
sa_family_t af, const void *saddr, const void *daddr,
- const struct pool *p, int idx, const struct timespec *now)
+ const struct pool *p, int idx,
+ const struct timespec *now)
{
+ char ancillary[CMSG_SPACE(sizeof(int))];
const struct flowside *toside;
struct mmsghdr mm[UIO_MAXIOV];
union sockaddr_inany to_sa;
@@ -885,7 +887,9 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
src = ntohs(uh->source);
dst = ntohs(uh->dest);
- tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr, src, dst, now);
+ tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr,
+ src, dst, now);
+
if (!(uflow = udp_at_sidx(tosidx))) {
char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
@@ -915,8 +919,9 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
for (i = 0; i < (int)p->count - idx; i++) {
struct udphdr *uh_send;
size_t len;
+ uint8_t ttl;
- uh_send = packet_get(p, idx + i, 0, sizeof(*uh), &len);
+ uh_send = packet_get_ttl(p, idx + i, 0, sizeof(*uh), &len, &ttl);
if (!uh_send)
return p->count - idx;
@@ -926,7 +931,6 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
if (len) {
m[i].iov_base = (char *)(uh_send + 1);
m[i].iov_len = len;
-
mm[i].msg_hdr.msg_iov = m + i;
mm[i].msg_hdr.msg_iovlen = 1;
} else {
@@ -938,6 +942,22 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
mm[i].msg_hdr.msg_controllen = 0;
mm[i].msg_hdr.msg_flags = 0;
+ if (ttl != DEFAULT_TTL) {
+ struct cmsghdr *cmsg = (void *) ancillary;
+
+ if (af == AF_INET) {
+ cmsg->cmsg_level = IPPROTO_IP;
+ cmsg->cmsg_type = IP_TTL;
+ } else {
+ cmsg->cmsg_level = IPPROTO_IPV6;
+ cmsg->cmsg_type = IPV6_HOPLIMIT;
+ }
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ *((int *) CMSG_DATA(cmsg)) = ttl;
+ mm[i].msg_hdr.msg_control = ancillary;
+ mm[i].msg_hdr.msg_controllen = sizeof(ancillary);
+ }
+
count++;
}
diff --git a/udp.h b/udp.h
index de2df6d..6adbfcd 100644
--- a/udp.h
+++ b/udp.h
@@ -15,7 +15,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
uint32_t events, const struct timespec *now);
int udp_tap_handler(const struct ctx *c, uint8_t pif,
sa_family_t af, const void *saddr, const void *daddr,
- const struct pool *p, int idx, const struct timespec *now);
+ const struct pool *p, int idx,
+ const struct timespec *now);
int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr,
const char *ifname, in_port_t port);
int udp_init(struct ctx *c);
--
@@ -15,7 +15,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
uint32_t events, const struct timespec *now);
int udp_tap_handler(const struct ctx *c, uint8_t pif,
sa_family_t af, const void *saddr, const void *daddr,
- const struct pool *p, int idx, const struct timespec *now);
+ const struct pool *p, int idx,
+ const struct timespec *now);
int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr,
const char *ifname, in_port_t port);
int udp_init(struct ctx *c);
--
2.48.1
next reply other threads:[~2025-03-30 21:06 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-30 21:06 Jon Maloy [this message]
2025-03-31 5:23 ` [PATCH v3] udp: support traceroute David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250330210628.47752-1-jmaloy@redhat.com \
--to=jmaloy@redhat.com \
--cc=dgibson@redhat.com \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).