From: Jon Maloy <jmaloy@redhat.com>
To: passt-dev@passt.top, sbrivio@redhat.com, lvivier@redhat.com,
dgibson@redhat.com, jmaloy@redhat.com
Subject: [PATCH v2 1/2] Add ttl to outgoing flow structure lookups
Date: Thu, 27 Mar 2025 12:51:54 -0400 [thread overview]
Message-ID: <20250327165155.3354811-2-jmaloy@redhat.com> (raw)
In-Reply-To: <20250327165155.3354811-1-jmaloy@redhat.com>
As preparation for future enhancements we add ttl/hop limit as creation
and lookup criteria for outgoing flows. This comes in addition to the
regular 4-tuple which is currently used.
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
flow.c | 17 ++++++++++++-----
flow.h | 8 ++++++--
flow_table.h | 3 ++-
icmp.c | 15 ++++++++++-----
icmp.h | 2 +-
packet.h | 2 ++
tap.c | 25 ++++++++++++++++++-------
tcp.c | 6 +++---
udp.c | 8 ++++++--
udp.h | 3 ++-
udp_flow.c | 7 ++++---
udp_flow.h | 2 +-
12 files changed, 67 insertions(+), 31 deletions(-)
diff --git a/flow.c b/flow.c
index 8622242..6796f73 100644
--- a/flow.c
+++ b/flow.c
@@ -137,10 +137,12 @@ static struct timespec flow_timer_run;
* @eport: Endpoint port
* @oaddr: Our address (pointer to in_addr or in6_addr)
* @oport: Our port
+ * @ttl: TTL/hop limit for packets in flow
*/
static void flowside_from_af(struct flowside *side, sa_family_t af,
const void *eaddr, in_port_t eport,
- const void *oaddr, in_port_t oport)
+ const void *oaddr, in_port_t oport,
+ uint8_t ttl)
{
if (oaddr)
inany_from_af(&side->oaddr, af, oaddr);
@@ -153,6 +155,8 @@ static void flowside_from_af(struct flowside *side, sa_family_t af,
else
side->eaddr = inany_any6;
side->eport = eport;
+
+ side->ttl = ttl;
}
/**
@@ -376,17 +380,19 @@ static void flow_initiate_(union flow *flow, uint8_t pif)
* @sport: Endpoint port
* @daddr: Destination address (pointer to in_addr or in6_addr)
* @dport: Destination port
+ * @ttl: TTL/hop_limit for packets in flow
*
* Return: pointer to the initiating flowside information
*/
const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif,
sa_family_t af,
const void *saddr, in_port_t sport,
- const void *daddr, in_port_t dport)
+ const void *daddr, in_port_t dport,
+ uint8_t ttl)
{
struct flowside *ini = &flow->f.side[INISIDE];
- flowside_from_af(ini, af, saddr, sport, daddr, dport);
+ flowside_from_af(ini, af, saddr, sport, daddr, dport, ttl);
flow_initiate_(flow, pif);
return ini;
}
@@ -731,17 +737,18 @@ static flow_sidx_t flowside_lookup(const struct ctx *c, uint8_t proto,
* @oaddr: Our guest side address (guest remote address)
* @eport: Guest side endpoint port (guest local port)
* @oport: Our guest side port (guest remote port)
+ * @ttl: TTL/hop_limit of flow we are looking for
*
* Return: sidx of the matching flow & side, FLOW_SIDX_NONE if not found
*/
flow_sidx_t flow_lookup_af(const struct ctx *c,
uint8_t proto, uint8_t pif, sa_family_t af,
const void *eaddr, const void *oaddr,
- in_port_t eport, in_port_t oport)
+ in_port_t eport, in_port_t oport, uint8_t ttl)
{
struct flowside side;
- flowside_from_af(&side, af, eaddr, eport, oaddr, oport);
+ flowside_from_af(&side, af, eaddr, eport, oaddr, oport, ttl);
return flowside_lookup(c, proto, pif, &side);
}
diff --git a/flow.h b/flow.h
index dcf7645..2ba4a94 100644
--- a/flow.h
+++ b/flow.h
@@ -143,12 +143,14 @@ extern const uint8_t flow_proto[];
* @oaddr: Our address (local address from passt's PoV)
* @eport: Endpoint port
* @oport: Our port
+ * @ttl: TTL/hop limit for this flow
*/
struct flowside {
union inany_addr oaddr;
union inany_addr eaddr;
in_port_t oport;
in_port_t eport;
+ uint8_t ttl;
};
/**
@@ -163,7 +165,8 @@ static inline bool flowside_eq(const struct flowside *left,
return inany_equals(&left->eaddr, &right->eaddr) &&
left->eport == right->eport &&
inany_equals(&left->oaddr, &right->oaddr) &&
- left->oport == right->oport;
+ left->oport == right->oport &&
+ left->ttl == right->ttl;
}
int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif,
@@ -241,7 +244,8 @@ void flow_hash_remove(const struct ctx *c, flow_sidx_t sidx);
flow_sidx_t flow_lookup_af(const struct ctx *c,
uint8_t proto, uint8_t pif, sa_family_t af,
const void *eaddr, const void *oaddr,
- in_port_t eport, in_port_t oport);
+ in_port_t eport, in_port_t oport,
+ uint8_t ttl);
flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif,
const void *esa, in_port_t oport);
diff --git a/flow_table.h b/flow_table.h
index fd2c57b..0b5b431 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -196,7 +196,8 @@ void flow_alloc_cancel(union flow *flow);
const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif,
sa_family_t af,
const void *saddr, in_port_t sport,
- const void *daddr, in_port_t dport);
+ const void *daddr, in_port_t dport,
+ uint8_t ttl);
struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif,
const union sockaddr_inany *ssa,
in_port_t dport);
diff --git a/icmp.c b/icmp.c
index 7e2b342..cbaa000 100644
--- a/icmp.c
+++ b/icmp.c
@@ -162,12 +162,14 @@ static void icmp_ping_close(const struct ctx *c,
* @id: ICMP id for the new socket
* @saddr: Source address
* @daddr: Destination address
+ * @ttl: TTL/hop_imit
*
* Return: Newly opened ping flow, or NULL on failure
*/
static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
sa_family_t af, uint16_t id,
- const void *saddr, const void *daddr)
+ const void *saddr, const void *daddr,
+ uint8_t ttl)
{
uint8_t proto = af == AF_INET ? IPPROTO_ICMP : IPPROTO_ICMPV6;
uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6;
@@ -179,7 +181,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
if (!flow)
return NULL;
- flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, id);
+ flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, ttl, id);
if (!(tgt = flow_target(c, flow, proto)))
goto cancel;
@@ -235,7 +237,7 @@ cancel:
* Return: count of consumed packets (always 1, even if malformed)
*/
int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
- const void *saddr, const void *daddr,
+ const void *saddr, const void *daddr, uint8_t ttl,
const struct pool *p, const struct timespec *now)
{
struct icmp_ping_flow *pingf;
@@ -286,11 +288,14 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
}
flow = flow_at_sidx(flow_lookup_af(c, proto, PIF_TAP,
- af, saddr, daddr, id, id));
+ af, saddr, daddr, ttl, id, id));
if (flow)
pingf = &flow->ping;
- else if (!(pingf = icmp_ping_new(c, af, id, saddr, daddr)))
+ else
+ pingf = icmp_ping_new(c, af, id, saddr, daddr, ttl);
+
+ if (!pingf)
return 1;
tgt = &pingf->f.side[TGTSIDE];
diff --git a/icmp.h b/icmp.h
index 5ce22b5..18168ab 100644
--- a/icmp.h
+++ b/icmp.h
@@ -13,7 +13,7 @@ struct icmp_ping_flow;
void icmp_sock_handler(const struct ctx *c, union epoll_ref ref);
int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
- const void *saddr, const void *daddr,
+ const void *saddr, const void *daddr, uint8_t ttl,
const struct pool *p, const struct timespec *now);
void icmp_init(void);
diff --git a/packet.h b/packet.h
index c94780a..e84e123 100644
--- a/packet.h
+++ b/packet.h
@@ -11,6 +11,8 @@
/* Maximum size of a single packet stored in pool, including headers */
#define PACKET_MAX_LEN ((size_t)UINT16_MAX)
+#define DEFAULT_TTL 64
+
/**
* struct pool - Generic pool of packets stored in a buffer
* @buf: Buffer storing packet descriptors,
diff --git a/tap.c b/tap.c
index 3a6fcbe..c7d82ca 100644
--- a/tap.c
+++ b/tap.c
@@ -563,6 +563,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf);
* @dest: Destination port
* @saddr: Source address
* @daddr: Destination address
+ * @ttl: Time to live
* @msg: Array of messages that can be handled in a single call
*/
static struct tap4_l4_t {
@@ -574,6 +575,8 @@ static struct tap4_l4_t {
struct in_addr saddr;
struct in_addr daddr;
+ uint8_t ttl;
+
struct pool_l4_t p;
} tap4_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */];
@@ -586,6 +589,7 @@ static struct tap4_l4_t {
* @dest: Destination port
* @saddr: Source address
* @daddr: Destination address
+ * @hop_limit: Hop limiit
* @msg: Array of messages that can be handled in a single call
*/
static struct tap6_l4_t {
@@ -598,6 +602,8 @@ static struct tap6_l4_t {
struct in6_addr saddr;
struct in6_addr daddr;
+ uint8_t hop_limit;
+
struct pool_l4_t p;
} tap6_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */];
@@ -761,7 +767,7 @@ resume:
packet_add(pkt, l4len, l4h);
icmp_tap_handler(c, PIF_TAP, AF_INET,
&iph->saddr, &iph->daddr,
- pkt, now);
+ iph->ttl, pkt, now);
continue;
}
@@ -786,7 +792,8 @@ resume:
#define L4_MATCH(iph, uh, seq) \
((seq)->protocol == (iph)->protocol && \
(seq)->source == (uh)->source && (seq)->dest == (uh)->dest && \
- (seq)->saddr.s_addr == (iph)->saddr && (seq)->daddr.s_addr == (iph)->daddr)
+ (seq)->saddr.s_addr == (iph)->saddr && \
+ (seq)->daddr.s_addr == (iph)->daddr && (seq)->ttl == (iph)->ttl)
#define L4_SET(iph, uh, seq) \
do { \
@@ -795,6 +802,7 @@ resume:
(seq)->dest = (uh)->dest; \
(seq)->saddr.s_addr = (iph)->saddr; \
(seq)->daddr.s_addr = (iph)->daddr; \
+ (seq)->ttl = (iph)->ttl; \
} while (0)
if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < UIO_MAXIOV)
@@ -843,7 +851,7 @@ append:
for (k = 0; k < p->count; )
k += udp_tap_handler(c, PIF_TAP, AF_INET,
&seq->saddr, &seq->daddr,
- p, k, now);
+ seq->ttl, p, k, now);
}
}
@@ -878,6 +886,7 @@ resume:
const struct ethhdr *eh;
const struct udphdr *uh;
struct ipv6hdr *ip6h;
+ uint8_t hop_limit;
uint8_t proto;
char *l4h;
@@ -891,7 +900,7 @@ resume:
saddr = &ip6h->saddr;
daddr = &ip6h->daddr;
-
+ hop_limit = ip6h->hop_limit;
plen = ntohs(ip6h->payload_len);
if (plen != check)
continue;
@@ -938,7 +947,7 @@ resume:
tap_packet_debug(NULL, ip6h, NULL, proto, NULL, 1);
icmp_tap_handler(c, PIF_TAP, AF_INET6,
- saddr, daddr, pkt, now);
+ saddr, daddr, hop_limit, pkt, now);
continue;
}
@@ -966,7 +975,8 @@ resume:
(seq)->dest == (uh)->dest && \
(seq)->flow_lbl == ip6_get_flow_lbl(ip6h) && \
IN6_ARE_ADDR_EQUAL(&(seq)->saddr, saddr) && \
- IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr))
+ IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr) && \
+ (seq)->hop_limit == ip6h->hop_limit)
#define L4_SET(ip6h, proto, uh, seq) \
do { \
@@ -976,6 +986,7 @@ resume:
(seq)->flow_lbl = ip6_get_flow_lbl(ip6h); \
(seq)->saddr = *saddr; \
(seq)->daddr = *daddr; \
+ (seq)->hop_limit = ip6h->hop_limit; \
} while (0)
if (seq && L4_MATCH(ip6h, proto, uh, seq) &&
@@ -1026,7 +1037,7 @@ append:
for (k = 0; k < p->count; )
k += udp_tap_handler(c, PIF_TAP, AF_INET6,
&seq->saddr, &seq->daddr,
- p, k, now);
+ seq->hop_limit, p, k, now);
}
}
diff --git a/tcp.c b/tcp.c
index fa1d885..5751d21 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1446,8 +1446,8 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
if (!(flow = flow_alloc()))
return;
- ini = flow_initiate_af(flow, PIF_TAP,
- af, saddr, srcport, daddr, dstport);
+ ini = flow_initiate_af(flow, PIF_TAP, af, saddr, srcport,
+ daddr, dstport, DEFAULT_TTL);
if (!(tgt = flow_target(c, flow, IPPROTO_TCP)))
goto cancel;
@@ -1977,7 +1977,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
opts = packet_get(p, idx, sizeof(*th), optlen, NULL);
sidx = flow_lookup_af(c, IPPROTO_TCP, PIF_TAP, af, saddr, daddr,
- ntohs(th->source), ntohs(th->dest));
+ ntohs(th->source), ntohs(th->dest), DEFAULT_TTL);
flow = flow_at_sidx(sidx);
/* New connection from tap */
diff --git a/udp.c b/udp.c
index 0c223b4..8a2c593 100644
--- a/udp.c
+++ b/udp.c
@@ -847,6 +847,7 @@ fail:
* @af: Address family, AF_INET or AF_INET6
* @saddr: Source address
* @daddr: Destination address
+ * @ttl: TTL for packets to be sent in this call
* @p: Pool of UDP packets, with UDP headers
* @idx: Index of first packet to process
* @now: Current timestamp
@@ -857,7 +858,8 @@ fail:
*/
int udp_tap_handler(const struct ctx *c, uint8_t pif,
sa_family_t af, const void *saddr, const void *daddr,
- const struct pool *p, int idx, const struct timespec *now)
+ uint8_t ttl, const struct pool *p, int idx,
+ const struct timespec *now)
{
const struct flowside *toside;
struct mmsghdr mm[UIO_MAXIOV];
@@ -883,7 +885,9 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif,
src = ntohs(uh->source);
dst = ntohs(uh->dest);
- tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr, src, dst, now);
+ tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr,
+ src, dst, ttl, now);
+
if (!(uflow = udp_at_sidx(tosidx))) {
char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
diff --git a/udp.h b/udp.h
index de2df6d..041fad4 100644
--- a/udp.h
+++ b/udp.h
@@ -15,7 +15,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
uint32_t events, const struct timespec *now);
int udp_tap_handler(const struct ctx *c, uint8_t pif,
sa_family_t af, const void *saddr, const void *daddr,
- const struct pool *p, int idx, const struct timespec *now);
+ uint8_t ttl, const struct pool *p, int idx,
+ const struct timespec *now);
int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr,
const char *ifname, in_port_t port);
int udp_init(struct ctx *c);
diff --git a/udp_flow.c b/udp_flow.c
index bf4b896..db5f709 100644
--- a/udp_flow.c
+++ b/udp_flow.c
@@ -236,6 +236,7 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
* @daddr: Destination address guest side
* @srcport: Source port on guest side
* @dstport: Destination port on guest side
+ * @ttl: TTL for this flow
*
* Return: sidx for the destination side of the flow for this packet, or
* FLOW_SIDX_NONE if we couldn't find or create a flow.
@@ -244,7 +245,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
uint8_t pif, sa_family_t af,
const void *saddr, const void *daddr,
in_port_t srcport, in_port_t dstport,
- const struct timespec *now)
+ uint8_t ttl, const struct timespec *now)
{
const struct flowside *ini;
struct udp_flow *uflow;
@@ -254,7 +255,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
ASSERT(pif == PIF_TAP);
sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr,
- srcport, dstport);
+ srcport, dstport, ttl);
if ((uflow = udp_at_sidx(sidx))) {
uflow->ts = now->tv_sec;
return flow_sidx_opposite(sidx);
@@ -271,7 +272,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
}
ini = flow_initiate_af(flow, PIF_TAP, af, saddr, srcport,
- daddr, dstport);
+ daddr, dstport, ttl);
if (inany_is_unspecified(&ini->eaddr) || ini->eport == 0 ||
inany_is_unspecified(&ini->oaddr) || ini->oport == 0) {
diff --git a/udp_flow.h b/udp_flow.h
index 9a1b059..7b40594 100644
--- a/udp_flow.h
+++ b/udp_flow.h
@@ -31,7 +31,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
uint8_t pif, sa_family_t af,
const void *saddr, const void *daddr,
in_port_t srcport, in_port_t dstport,
- const struct timespec *now);
+ uint8_t ttl, const struct timespec *now);
void udp_flow_close(const struct ctx *c, struct udp_flow *uflow);
bool udp_flow_defer(const struct udp_flow *uflow);
bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
--
@@ -31,7 +31,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
uint8_t pif, sa_family_t af,
const void *saddr, const void *daddr,
in_port_t srcport, in_port_t dstport,
- const struct timespec *now);
+ uint8_t ttl, const struct timespec *now);
void udp_flow_close(const struct ctx *c, struct udp_flow *uflow);
bool udp_flow_defer(const struct udp_flow *uflow);
bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
--
2.48.1
next prev parent reply other threads:[~2025-03-27 16:52 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-27 16:51 [PATCH v2 0/2] udp: add guest-to-remote traceroute Jon Maloy
2025-03-27 16:51 ` Jon Maloy [this message]
2025-03-28 1:50 ` [PATCH v2 1/2] Add ttl to outgoing flow structure lookups David Gibson
2025-03-27 16:51 ` [PATCH v2 2/2] udp: support traceroute with udp Jon Maloy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250327165155.3354811-2-jmaloy@redhat.com \
--to=jmaloy@redhat.com \
--cc=dgibson@redhat.com \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).