* [PATCH v2 0/2] udp: add guest-to-remote traceroute @ 2025-03-27 16:51 Jon Maloy 2025-03-27 16:51 ` [PATCH v2 1/2] Add ttl to outgoing flow structure lookups Jon Maloy 2025-03-27 16:51 ` [PATCH v2 2/2] udp: support traceroute with udp Jon Maloy 0 siblings, 2 replies; 4+ messages in thread From: Jon Maloy @ 2025-03-27 16:51 UTC (permalink / raw) To: passt-dev, sbrivio, lvivier, dgibson, jmaloy We add support for UDP traceroute in the tap-sock direction. More will follow when this one is settled. --- v2: - Added ttl/hop_limit as flow selection criteria - Using ancillary data instead of setsockopt() to pass on ttl/hop_limit. - Added support for IPv6 Jon Maloy (2): Add ttl to outgoing flow structure lookups udp: support traceroute with udp flow.c | 17 ++++++++++++----- flow.h | 8 ++++++-- flow_table.h | 3 ++- icmp.c | 15 ++++++++++----- icmp.h | 2 +- packet.h | 2 ++ tap.c | 25 ++++++++++++++++++------- tcp.c | 6 +++--- udp.c | 31 +++++++++++++++++++++++++++---- udp.h | 3 ++- udp_flow.c | 7 ++++--- udp_flow.h | 2 +- 12 files changed, 88 insertions(+), 33 deletions(-) -- 2.48.1 ^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v2 1/2] Add ttl to outgoing flow structure lookups 2025-03-27 16:51 [PATCH v2 0/2] udp: add guest-to-remote traceroute Jon Maloy @ 2025-03-27 16:51 ` Jon Maloy 2025-03-28 1:50 ` David Gibson 2025-03-27 16:51 ` [PATCH v2 2/2] udp: support traceroute with udp Jon Maloy 1 sibling, 1 reply; 4+ messages in thread From: Jon Maloy @ 2025-03-27 16:51 UTC (permalink / raw) To: passt-dev, sbrivio, lvivier, dgibson, jmaloy As preparation for future enhancements we add ttl/hop limit as creation and lookup criteria for outgoing flows. This comes in addition to the regular 4-tuple which is currently used. Signed-off-by: Jon Maloy <jmaloy@redhat.com> --- flow.c | 17 ++++++++++++----- flow.h | 8 ++++++-- flow_table.h | 3 ++- icmp.c | 15 ++++++++++----- icmp.h | 2 +- packet.h | 2 ++ tap.c | 25 ++++++++++++++++++------- tcp.c | 6 +++--- udp.c | 8 ++++++-- udp.h | 3 ++- udp_flow.c | 7 ++++--- udp_flow.h | 2 +- 12 files changed, 67 insertions(+), 31 deletions(-) diff --git a/flow.c b/flow.c index 8622242..6796f73 100644 --- a/flow.c +++ b/flow.c @@ -137,10 +137,12 @@ static struct timespec flow_timer_run; * @eport: Endpoint port * @oaddr: Our address (pointer to in_addr or in6_addr) * @oport: Our port + * @ttl: TTL/hop limit for packets in flow */ static void flowside_from_af(struct flowside *side, sa_family_t af, const void *eaddr, in_port_t eport, - const void *oaddr, in_port_t oport) + const void *oaddr, in_port_t oport, + uint8_t ttl) { if (oaddr) inany_from_af(&side->oaddr, af, oaddr); @@ -153,6 +155,8 @@ static void flowside_from_af(struct flowside *side, sa_family_t af, else side->eaddr = inany_any6; side->eport = eport; + + side->ttl = ttl; } /** @@ -376,17 +380,19 @@ static void flow_initiate_(union flow *flow, uint8_t pif) * @sport: Endpoint port * @daddr: Destination address (pointer to in_addr or in6_addr) * @dport: Destination port + * @ttl: TTL/hop_limit for packets in flow * * Return: pointer to the initiating flowside information */ const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif, sa_family_t af, const void *saddr, in_port_t sport, - const void *daddr, in_port_t dport) + const void *daddr, in_port_t dport, + uint8_t ttl) { struct flowside *ini = &flow->f.side[INISIDE]; - flowside_from_af(ini, af, saddr, sport, daddr, dport); + flowside_from_af(ini, af, saddr, sport, daddr, dport, ttl); flow_initiate_(flow, pif); return ini; } @@ -731,17 +737,18 @@ static flow_sidx_t flowside_lookup(const struct ctx *c, uint8_t proto, * @oaddr: Our guest side address (guest remote address) * @eport: Guest side endpoint port (guest local port) * @oport: Our guest side port (guest remote port) + * @ttl: TTL/hop_limit of flow we are looking for * * Return: sidx of the matching flow & side, FLOW_SIDX_NONE if not found */ flow_sidx_t flow_lookup_af(const struct ctx *c, uint8_t proto, uint8_t pif, sa_family_t af, const void *eaddr, const void *oaddr, - in_port_t eport, in_port_t oport) + in_port_t eport, in_port_t oport, uint8_t ttl) { struct flowside side; - flowside_from_af(&side, af, eaddr, eport, oaddr, oport); + flowside_from_af(&side, af, eaddr, eport, oaddr, oport, ttl); return flowside_lookup(c, proto, pif, &side); } diff --git a/flow.h b/flow.h index dcf7645..2ba4a94 100644 --- a/flow.h +++ b/flow.h @@ -143,12 +143,14 @@ extern const uint8_t flow_proto[]; * @oaddr: Our address (local address from passt's PoV) * @eport: Endpoint port * @oport: Our port + * @ttl: TTL/hop limit for this flow */ struct flowside { union inany_addr oaddr; union inany_addr eaddr; in_port_t oport; in_port_t eport; + uint8_t ttl; }; /** @@ -163,7 +165,8 @@ static inline bool flowside_eq(const struct flowside *left, return inany_equals(&left->eaddr, &right->eaddr) && left->eport == right->eport && inany_equals(&left->oaddr, &right->oaddr) && - left->oport == right->oport; + left->oport == right->oport && + left->ttl == right->ttl; } int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif, @@ -241,7 +244,8 @@ void flow_hash_remove(const struct ctx *c, flow_sidx_t sidx); flow_sidx_t flow_lookup_af(const struct ctx *c, uint8_t proto, uint8_t pif, sa_family_t af, const void *eaddr, const void *oaddr, - in_port_t eport, in_port_t oport); + in_port_t eport, in_port_t oport, + uint8_t ttl); flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif, const void *esa, in_port_t oport); diff --git a/flow_table.h b/flow_table.h index fd2c57b..0b5b431 100644 --- a/flow_table.h +++ b/flow_table.h @@ -196,7 +196,8 @@ void flow_alloc_cancel(union flow *flow); const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif, sa_family_t af, const void *saddr, in_port_t sport, - const void *daddr, in_port_t dport); + const void *daddr, in_port_t dport, + uint8_t ttl); struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif, const union sockaddr_inany *ssa, in_port_t dport); diff --git a/icmp.c b/icmp.c index 7e2b342..cbaa000 100644 --- a/icmp.c +++ b/icmp.c @@ -162,12 +162,14 @@ static void icmp_ping_close(const struct ctx *c, * @id: ICMP id for the new socket * @saddr: Source address * @daddr: Destination address + * @ttl: TTL/hop_imit * * Return: Newly opened ping flow, or NULL on failure */ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, sa_family_t af, uint16_t id, - const void *saddr, const void *daddr) + const void *saddr, const void *daddr, + uint8_t ttl) { uint8_t proto = af == AF_INET ? IPPROTO_ICMP : IPPROTO_ICMPV6; uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6; @@ -179,7 +181,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, if (!flow) return NULL; - flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, id); + flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, ttl, id); if (!(tgt = flow_target(c, flow, proto))) goto cancel; @@ -235,7 +237,7 @@ cancel: * Return: count of consumed packets (always 1, even if malformed) */ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, - const void *saddr, const void *daddr, + const void *saddr, const void *daddr, uint8_t ttl, const struct pool *p, const struct timespec *now) { struct icmp_ping_flow *pingf; @@ -286,11 +288,14 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, } flow = flow_at_sidx(flow_lookup_af(c, proto, PIF_TAP, - af, saddr, daddr, id, id)); + af, saddr, daddr, ttl, id, id)); if (flow) pingf = &flow->ping; - else if (!(pingf = icmp_ping_new(c, af, id, saddr, daddr))) + else + pingf = icmp_ping_new(c, af, id, saddr, daddr, ttl); + + if (!pingf) return 1; tgt = &pingf->f.side[TGTSIDE]; diff --git a/icmp.h b/icmp.h index 5ce22b5..18168ab 100644 --- a/icmp.h +++ b/icmp.h @@ -13,7 +13,7 @@ struct icmp_ping_flow; void icmp_sock_handler(const struct ctx *c, union epoll_ref ref); int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, - const void *saddr, const void *daddr, + const void *saddr, const void *daddr, uint8_t ttl, const struct pool *p, const struct timespec *now); void icmp_init(void); diff --git a/packet.h b/packet.h index c94780a..e84e123 100644 --- a/packet.h +++ b/packet.h @@ -11,6 +11,8 @@ /* Maximum size of a single packet stored in pool, including headers */ #define PACKET_MAX_LEN ((size_t)UINT16_MAX) +#define DEFAULT_TTL 64 + /** * struct pool - Generic pool of packets stored in a buffer * @buf: Buffer storing packet descriptors, diff --git a/tap.c b/tap.c index 3a6fcbe..c7d82ca 100644 --- a/tap.c +++ b/tap.c @@ -563,6 +563,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf); * @dest: Destination port * @saddr: Source address * @daddr: Destination address + * @ttl: Time to live * @msg: Array of messages that can be handled in a single call */ static struct tap4_l4_t { @@ -574,6 +575,8 @@ static struct tap4_l4_t { struct in_addr saddr; struct in_addr daddr; + uint8_t ttl; + struct pool_l4_t p; } tap4_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */]; @@ -586,6 +589,7 @@ static struct tap4_l4_t { * @dest: Destination port * @saddr: Source address * @daddr: Destination address + * @hop_limit: Hop limiit * @msg: Array of messages that can be handled in a single call */ static struct tap6_l4_t { @@ -598,6 +602,8 @@ static struct tap6_l4_t { struct in6_addr saddr; struct in6_addr daddr; + uint8_t hop_limit; + struct pool_l4_t p; } tap6_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */]; @@ -761,7 +767,7 @@ resume: packet_add(pkt, l4len, l4h); icmp_tap_handler(c, PIF_TAP, AF_INET, &iph->saddr, &iph->daddr, - pkt, now); + iph->ttl, pkt, now); continue; } @@ -786,7 +792,8 @@ resume: #define L4_MATCH(iph, uh, seq) \ ((seq)->protocol == (iph)->protocol && \ (seq)->source == (uh)->source && (seq)->dest == (uh)->dest && \ - (seq)->saddr.s_addr == (iph)->saddr && (seq)->daddr.s_addr == (iph)->daddr) + (seq)->saddr.s_addr == (iph)->saddr && \ + (seq)->daddr.s_addr == (iph)->daddr && (seq)->ttl == (iph)->ttl) #define L4_SET(iph, uh, seq) \ do { \ @@ -795,6 +802,7 @@ resume: (seq)->dest = (uh)->dest; \ (seq)->saddr.s_addr = (iph)->saddr; \ (seq)->daddr.s_addr = (iph)->daddr; \ + (seq)->ttl = (iph)->ttl; \ } while (0) if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < UIO_MAXIOV) @@ -843,7 +851,7 @@ append: for (k = 0; k < p->count; ) k += udp_tap_handler(c, PIF_TAP, AF_INET, &seq->saddr, &seq->daddr, - p, k, now); + seq->ttl, p, k, now); } } @@ -878,6 +886,7 @@ resume: const struct ethhdr *eh; const struct udphdr *uh; struct ipv6hdr *ip6h; + uint8_t hop_limit; uint8_t proto; char *l4h; @@ -891,7 +900,7 @@ resume: saddr = &ip6h->saddr; daddr = &ip6h->daddr; - + hop_limit = ip6h->hop_limit; plen = ntohs(ip6h->payload_len); if (plen != check) continue; @@ -938,7 +947,7 @@ resume: tap_packet_debug(NULL, ip6h, NULL, proto, NULL, 1); icmp_tap_handler(c, PIF_TAP, AF_INET6, - saddr, daddr, pkt, now); + saddr, daddr, hop_limit, pkt, now); continue; } @@ -966,7 +975,8 @@ resume: (seq)->dest == (uh)->dest && \ (seq)->flow_lbl == ip6_get_flow_lbl(ip6h) && \ IN6_ARE_ADDR_EQUAL(&(seq)->saddr, saddr) && \ - IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr)) + IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr) && \ + (seq)->hop_limit == ip6h->hop_limit) #define L4_SET(ip6h, proto, uh, seq) \ do { \ @@ -976,6 +986,7 @@ resume: (seq)->flow_lbl = ip6_get_flow_lbl(ip6h); \ (seq)->saddr = *saddr; \ (seq)->daddr = *daddr; \ + (seq)->hop_limit = ip6h->hop_limit; \ } while (0) if (seq && L4_MATCH(ip6h, proto, uh, seq) && @@ -1026,7 +1037,7 @@ append: for (k = 0; k < p->count; ) k += udp_tap_handler(c, PIF_TAP, AF_INET6, &seq->saddr, &seq->daddr, - p, k, now); + seq->hop_limit, p, k, now); } } diff --git a/tcp.c b/tcp.c index fa1d885..5751d21 100644 --- a/tcp.c +++ b/tcp.c @@ -1446,8 +1446,8 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af, if (!(flow = flow_alloc())) return; - ini = flow_initiate_af(flow, PIF_TAP, - af, saddr, srcport, daddr, dstport); + ini = flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, + daddr, dstport, DEFAULT_TTL); if (!(tgt = flow_target(c, flow, IPPROTO_TCP))) goto cancel; @@ -1977,7 +1977,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, opts = packet_get(p, idx, sizeof(*th), optlen, NULL); sidx = flow_lookup_af(c, IPPROTO_TCP, PIF_TAP, af, saddr, daddr, - ntohs(th->source), ntohs(th->dest)); + ntohs(th->source), ntohs(th->dest), DEFAULT_TTL); flow = flow_at_sidx(sidx); /* New connection from tap */ diff --git a/udp.c b/udp.c index 0c223b4..8a2c593 100644 --- a/udp.c +++ b/udp.c @@ -847,6 +847,7 @@ fail: * @af: Address family, AF_INET or AF_INET6 * @saddr: Source address * @daddr: Destination address + * @ttl: TTL for packets to be sent in this call * @p: Pool of UDP packets, with UDP headers * @idx: Index of first packet to process * @now: Current timestamp @@ -857,7 +858,8 @@ fail: */ int udp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, const void *saddr, const void *daddr, - const struct pool *p, int idx, const struct timespec *now) + uint8_t ttl, const struct pool *p, int idx, + const struct timespec *now) { const struct flowside *toside; struct mmsghdr mm[UIO_MAXIOV]; @@ -883,7 +885,9 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, src = ntohs(uh->source); dst = ntohs(uh->dest); - tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr, src, dst, now); + tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr, + src, dst, ttl, now); + if (!(uflow = udp_at_sidx(tosidx))) { char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN]; diff --git a/udp.h b/udp.h index de2df6d..041fad4 100644 --- a/udp.h +++ b/udp.h @@ -15,7 +15,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events, const struct timespec *now); int udp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, const void *saddr, const void *daddr, - const struct pool *p, int idx, const struct timespec *now); + uint8_t ttl, const struct pool *p, int idx, + const struct timespec *now); int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr, const char *ifname, in_port_t port); int udp_init(struct ctx *c); diff --git a/udp_flow.c b/udp_flow.c index bf4b896..db5f709 100644 --- a/udp_flow.c +++ b/udp_flow.c @@ -236,6 +236,7 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref, * @daddr: Destination address guest side * @srcport: Source port on guest side * @dstport: Destination port on guest side + * @ttl: TTL for this flow * * Return: sidx for the destination side of the flow for this packet, or * FLOW_SIDX_NONE if we couldn't find or create a flow. @@ -244,7 +245,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c, uint8_t pif, sa_family_t af, const void *saddr, const void *daddr, in_port_t srcport, in_port_t dstport, - const struct timespec *now) + uint8_t ttl, const struct timespec *now) { const struct flowside *ini; struct udp_flow *uflow; @@ -254,7 +255,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c, ASSERT(pif == PIF_TAP); sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr, - srcport, dstport); + srcport, dstport, ttl); if ((uflow = udp_at_sidx(sidx))) { uflow->ts = now->tv_sec; return flow_sidx_opposite(sidx); @@ -271,7 +272,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c, } ini = flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, - daddr, dstport); + daddr, dstport, ttl); if (inany_is_unspecified(&ini->eaddr) || ini->eport == 0 || inany_is_unspecified(&ini->oaddr) || ini->oport == 0) { diff --git a/udp_flow.h b/udp_flow.h index 9a1b059..7b40594 100644 --- a/udp_flow.h +++ b/udp_flow.h @@ -31,7 +31,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c, uint8_t pif, sa_family_t af, const void *saddr, const void *daddr, in_port_t srcport, in_port_t dstport, - const struct timespec *now); + uint8_t ttl, const struct timespec *now); void udp_flow_close(const struct ctx *c, struct udp_flow *uflow); bool udp_flow_defer(const struct udp_flow *uflow); bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow, -- @@ -31,7 +31,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c, uint8_t pif, sa_family_t af, const void *saddr, const void *daddr, in_port_t srcport, in_port_t dstport, - const struct timespec *now); + uint8_t ttl, const struct timespec *now); void udp_flow_close(const struct ctx *c, struct udp_flow *uflow); bool udp_flow_defer(const struct udp_flow *uflow); bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow, -- 2.48.1 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v2 1/2] Add ttl to outgoing flow structure lookups 2025-03-27 16:51 ` [PATCH v2 1/2] Add ttl to outgoing flow structure lookups Jon Maloy @ 2025-03-28 1:50 ` David Gibson 0 siblings, 0 replies; 4+ messages in thread From: David Gibson @ 2025-03-28 1:50 UTC (permalink / raw) To: Jon Maloy; +Cc: passt-dev, sbrivio, lvivier, dgibson [-- Attachment #1: Type: text/plain, Size: 18504 bytes --] On Thu, Mar 27, 2025 at 12:51:54PM -0400, Jon Maloy wrote: > As preparation for future enhancements we add ttl/hop limit as creation > and lookup criteria for outgoing flows. This comes in addition to the > regular 4-tuple which is currently used. > > Signed-off-by: Jon Maloy <jmaloy@redhat.com> Ah... sorry. I think I was misleading when I suggested adding the TTL to the flow table. This change does not make sense. The TTL is a per-packet property, not a property of the flow. Packets which match the addresses and ports should be considered part of the same flow, even if the TTL is different. Deliberately changing the TTL during a flow would be weird, but it's not illegal. I think it could also happen by accident with certain multipath setups. When I suggested adding the TTL to the flow, I wasn't meaning adding it to flow_common or what we use to lookup flows. Instead I essentially meant a cache of the value we last set for IP_TTL on the socket. We can store that in struct udp_flow. It's probably neatest to use an array of two values, one for each side. We'll rarely have a socket on both sides (spliced), and when we do we can't use this approach anyway, but it'll make the code a bit cleaner to do it that way, because it will parallel the array of (up to) two sockets. > --- > flow.c | 17 ++++++++++++----- > flow.h | 8 ++++++-- > flow_table.h | 3 ++- > icmp.c | 15 ++++++++++----- > icmp.h | 2 +- > packet.h | 2 ++ > tap.c | 25 ++++++++++++++++++------- > tcp.c | 6 +++--- > udp.c | 8 ++++++-- > udp.h | 3 ++- > udp_flow.c | 7 ++++--- > udp_flow.h | 2 +- > 12 files changed, 67 insertions(+), 31 deletions(-) > > diff --git a/flow.c b/flow.c > index 8622242..6796f73 100644 > --- a/flow.c > +++ b/flow.c > @@ -137,10 +137,12 @@ static struct timespec flow_timer_run; > * @eport: Endpoint port > * @oaddr: Our address (pointer to in_addr or in6_addr) > * @oport: Our port > + * @ttl: TTL/hop limit for packets in flow > */ > static void flowside_from_af(struct flowside *side, sa_family_t af, > const void *eaddr, in_port_t eport, > - const void *oaddr, in_port_t oport) > + const void *oaddr, in_port_t oport, > + uint8_t ttl) > { > if (oaddr) > inany_from_af(&side->oaddr, af, oaddr); > @@ -153,6 +155,8 @@ static void flowside_from_af(struct flowside *side, sa_family_t af, > else > side->eaddr = inany_any6; > side->eport = eport; > + > + side->ttl = ttl; > } > > /** > @@ -376,17 +380,19 @@ static void flow_initiate_(union flow *flow, uint8_t pif) > * @sport: Endpoint port > * @daddr: Destination address (pointer to in_addr or in6_addr) > * @dport: Destination port > + * @ttl: TTL/hop_limit for packets in flow > * > * Return: pointer to the initiating flowside information > */ > const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif, > sa_family_t af, > const void *saddr, in_port_t sport, > - const void *daddr, in_port_t dport) > + const void *daddr, in_port_t dport, > + uint8_t ttl) > { > struct flowside *ini = &flow->f.side[INISIDE]; > > - flowside_from_af(ini, af, saddr, sport, daddr, dport); > + flowside_from_af(ini, af, saddr, sport, daddr, dport, ttl); > flow_initiate_(flow, pif); > return ini; > } > @@ -731,17 +737,18 @@ static flow_sidx_t flowside_lookup(const struct ctx *c, uint8_t proto, > * @oaddr: Our guest side address (guest remote address) > * @eport: Guest side endpoint port (guest local port) > * @oport: Our guest side port (guest remote port) > + * @ttl: TTL/hop_limit of flow we are looking for > * > * Return: sidx of the matching flow & side, FLOW_SIDX_NONE if not found > */ > flow_sidx_t flow_lookup_af(const struct ctx *c, > uint8_t proto, uint8_t pif, sa_family_t af, > const void *eaddr, const void *oaddr, > - in_port_t eport, in_port_t oport) > + in_port_t eport, in_port_t oport, uint8_t ttl) > { > struct flowside side; > > - flowside_from_af(&side, af, eaddr, eport, oaddr, oport); > + flowside_from_af(&side, af, eaddr, eport, oaddr, oport, ttl); > return flowside_lookup(c, proto, pif, &side); > } > > diff --git a/flow.h b/flow.h > index dcf7645..2ba4a94 100644 > --- a/flow.h > +++ b/flow.h > @@ -143,12 +143,14 @@ extern const uint8_t flow_proto[]; > * @oaddr: Our address (local address from passt's PoV) > * @eport: Endpoint port > * @oport: Our port > + * @ttl: TTL/hop limit for this flow > */ > struct flowside { > union inany_addr oaddr; > union inany_addr eaddr; > in_port_t oport; > in_port_t eport; > + uint8_t ttl; > }; > > /** > @@ -163,7 +165,8 @@ static inline bool flowside_eq(const struct flowside *left, > return inany_equals(&left->eaddr, &right->eaddr) && > left->eport == right->eport && > inany_equals(&left->oaddr, &right->oaddr) && > - left->oport == right->oport; > + left->oport == right->oport && > + left->ttl == right->ttl; > } > > int flowside_sock_l4(const struct ctx *c, enum epoll_type type, uint8_t pif, > @@ -241,7 +244,8 @@ void flow_hash_remove(const struct ctx *c, flow_sidx_t sidx); > flow_sidx_t flow_lookup_af(const struct ctx *c, > uint8_t proto, uint8_t pif, sa_family_t af, > const void *eaddr, const void *oaddr, > - in_port_t eport, in_port_t oport); > + in_port_t eport, in_port_t oport, > + uint8_t ttl); > flow_sidx_t flow_lookup_sa(const struct ctx *c, uint8_t proto, uint8_t pif, > const void *esa, in_port_t oport); > > diff --git a/flow_table.h b/flow_table.h > index fd2c57b..0b5b431 100644 > --- a/flow_table.h > +++ b/flow_table.h > @@ -196,7 +196,8 @@ void flow_alloc_cancel(union flow *flow); > const struct flowside *flow_initiate_af(union flow *flow, uint8_t pif, > sa_family_t af, > const void *saddr, in_port_t sport, > - const void *daddr, in_port_t dport); > + const void *daddr, in_port_t dport, > + uint8_t ttl); > struct flowside *flow_initiate_sa(union flow *flow, uint8_t pif, > const union sockaddr_inany *ssa, > in_port_t dport); > diff --git a/icmp.c b/icmp.c > index 7e2b342..cbaa000 100644 > --- a/icmp.c > +++ b/icmp.c > @@ -162,12 +162,14 @@ static void icmp_ping_close(const struct ctx *c, > * @id: ICMP id for the new socket > * @saddr: Source address > * @daddr: Destination address > + * @ttl: TTL/hop_imit > * > * Return: Newly opened ping flow, or NULL on failure > */ > static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, > sa_family_t af, uint16_t id, > - const void *saddr, const void *daddr) > + const void *saddr, const void *daddr, > + uint8_t ttl) > { > uint8_t proto = af == AF_INET ? IPPROTO_ICMP : IPPROTO_ICMPV6; > uint8_t flowtype = af == AF_INET ? FLOW_PING4 : FLOW_PING6; > @@ -179,7 +181,7 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c, > if (!flow) > return NULL; > > - flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, id); > + flow_initiate_af(flow, PIF_TAP, af, saddr, id, daddr, ttl, id); > if (!(tgt = flow_target(c, flow, proto))) > goto cancel; > > @@ -235,7 +237,7 @@ cancel: > * Return: count of consumed packets (always 1, even if malformed) > */ > int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, > - const void *saddr, const void *daddr, > + const void *saddr, const void *daddr, uint8_t ttl, > const struct pool *p, const struct timespec *now) > { > struct icmp_ping_flow *pingf; > @@ -286,11 +288,14 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, > } > > flow = flow_at_sidx(flow_lookup_af(c, proto, PIF_TAP, > - af, saddr, daddr, id, id)); > + af, saddr, daddr, ttl, id, id)); > > if (flow) > pingf = &flow->ping; > - else if (!(pingf = icmp_ping_new(c, af, id, saddr, daddr))) > + else > + pingf = icmp_ping_new(c, af, id, saddr, daddr, ttl); > + > + if (!pingf) > return 1; > > tgt = &pingf->f.side[TGTSIDE]; > diff --git a/icmp.h b/icmp.h > index 5ce22b5..18168ab 100644 > --- a/icmp.h > +++ b/icmp.h > @@ -13,7 +13,7 @@ struct icmp_ping_flow; > > void icmp_sock_handler(const struct ctx *c, union epoll_ref ref); > int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, > - const void *saddr, const void *daddr, > + const void *saddr, const void *daddr, uint8_t ttl, > const struct pool *p, const struct timespec *now); > void icmp_init(void); > > diff --git a/packet.h b/packet.h > index c94780a..e84e123 100644 > --- a/packet.h > +++ b/packet.h > @@ -11,6 +11,8 @@ > /* Maximum size of a single packet stored in pool, including headers */ > #define PACKET_MAX_LEN ((size_t)UINT16_MAX) > > +#define DEFAULT_TTL 64 > + > /** > * struct pool - Generic pool of packets stored in a buffer > * @buf: Buffer storing packet descriptors, > diff --git a/tap.c b/tap.c > index 3a6fcbe..c7d82ca 100644 > --- a/tap.c > +++ b/tap.c > @@ -563,6 +563,7 @@ PACKET_POOL_DECL(pool_l4, UIO_MAXIOV, pkt_buf); > * @dest: Destination port > * @saddr: Source address > * @daddr: Destination address > + * @ttl: Time to live > * @msg: Array of messages that can be handled in a single call > */ > static struct tap4_l4_t { > @@ -574,6 +575,8 @@ static struct tap4_l4_t { > struct in_addr saddr; > struct in_addr daddr; > > + uint8_t ttl; > + > struct pool_l4_t p; > } tap4_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */]; > > @@ -586,6 +589,7 @@ static struct tap4_l4_t { > * @dest: Destination port > * @saddr: Source address > * @daddr: Destination address > + * @hop_limit: Hop limiit > * @msg: Array of messages that can be handled in a single call > */ > static struct tap6_l4_t { > @@ -598,6 +602,8 @@ static struct tap6_l4_t { > struct in6_addr saddr; > struct in6_addr daddr; > > + uint8_t hop_limit; > + > struct pool_l4_t p; > } tap6_l4[TAP_SEQS /* Arbitrary: TAP_MSGS in theory, so limit in users */]; > > @@ -761,7 +767,7 @@ resume: > packet_add(pkt, l4len, l4h); > icmp_tap_handler(c, PIF_TAP, AF_INET, > &iph->saddr, &iph->daddr, > - pkt, now); > + iph->ttl, pkt, now); > continue; > } > > @@ -786,7 +792,8 @@ resume: > #define L4_MATCH(iph, uh, seq) \ > ((seq)->protocol == (iph)->protocol && \ > (seq)->source == (uh)->source && (seq)->dest == (uh)->dest && \ > - (seq)->saddr.s_addr == (iph)->saddr && (seq)->daddr.s_addr == (iph)->daddr) > + (seq)->saddr.s_addr == (iph)->saddr && \ > + (seq)->daddr.s_addr == (iph)->daddr && (seq)->ttl == (iph)->ttl) > > #define L4_SET(iph, uh, seq) \ > do { \ > @@ -795,6 +802,7 @@ resume: > (seq)->dest = (uh)->dest; \ > (seq)->saddr.s_addr = (iph)->saddr; \ > (seq)->daddr.s_addr = (iph)->daddr; \ > + (seq)->ttl = (iph)->ttl; \ > } while (0) > > if (seq && L4_MATCH(iph, uh, seq) && seq->p.count < UIO_MAXIOV) > @@ -843,7 +851,7 @@ append: > for (k = 0; k < p->count; ) > k += udp_tap_handler(c, PIF_TAP, AF_INET, > &seq->saddr, &seq->daddr, > - p, k, now); > + seq->ttl, p, k, now); > } > } > > @@ -878,6 +886,7 @@ resume: > const struct ethhdr *eh; > const struct udphdr *uh; > struct ipv6hdr *ip6h; > + uint8_t hop_limit; > uint8_t proto; > char *l4h; > > @@ -891,7 +900,7 @@ resume: > > saddr = &ip6h->saddr; > daddr = &ip6h->daddr; > - > + hop_limit = ip6h->hop_limit; > plen = ntohs(ip6h->payload_len); > if (plen != check) > continue; > @@ -938,7 +947,7 @@ resume: > tap_packet_debug(NULL, ip6h, NULL, proto, NULL, 1); > > icmp_tap_handler(c, PIF_TAP, AF_INET6, > - saddr, daddr, pkt, now); > + saddr, daddr, hop_limit, pkt, now); > continue; > } > > @@ -966,7 +975,8 @@ resume: > (seq)->dest == (uh)->dest && \ > (seq)->flow_lbl == ip6_get_flow_lbl(ip6h) && \ > IN6_ARE_ADDR_EQUAL(&(seq)->saddr, saddr) && \ > - IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr)) > + IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr) && \ > + (seq)->hop_limit == ip6h->hop_limit) > > #define L4_SET(ip6h, proto, uh, seq) \ > do { \ > @@ -976,6 +986,7 @@ resume: > (seq)->flow_lbl = ip6_get_flow_lbl(ip6h); \ > (seq)->saddr = *saddr; \ > (seq)->daddr = *daddr; \ > + (seq)->hop_limit = ip6h->hop_limit; \ > } while (0) > > if (seq && L4_MATCH(ip6h, proto, uh, seq) && > @@ -1026,7 +1037,7 @@ append: > for (k = 0; k < p->count; ) > k += udp_tap_handler(c, PIF_TAP, AF_INET6, > &seq->saddr, &seq->daddr, > - p, k, now); > + seq->hop_limit, p, k, now); > } > } > > diff --git a/tcp.c b/tcp.c > index fa1d885..5751d21 100644 > --- a/tcp.c > +++ b/tcp.c > @@ -1446,8 +1446,8 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af, > if (!(flow = flow_alloc())) > return; > > - ini = flow_initiate_af(flow, PIF_TAP, > - af, saddr, srcport, daddr, dstport); > + ini = flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, > + daddr, dstport, DEFAULT_TTL); > > if (!(tgt = flow_target(c, flow, IPPROTO_TCP))) > goto cancel; > @@ -1977,7 +1977,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af, > opts = packet_get(p, idx, sizeof(*th), optlen, NULL); > > sidx = flow_lookup_af(c, IPPROTO_TCP, PIF_TAP, af, saddr, daddr, > - ntohs(th->source), ntohs(th->dest)); > + ntohs(th->source), ntohs(th->dest), DEFAULT_TTL); > flow = flow_at_sidx(sidx); > > /* New connection from tap */ > diff --git a/udp.c b/udp.c > index 0c223b4..8a2c593 100644 > --- a/udp.c > +++ b/udp.c > @@ -847,6 +847,7 @@ fail: > * @af: Address family, AF_INET or AF_INET6 > * @saddr: Source address > * @daddr: Destination address > + * @ttl: TTL for packets to be sent in this call > * @p: Pool of UDP packets, with UDP headers > * @idx: Index of first packet to process > * @now: Current timestamp > @@ -857,7 +858,8 @@ fail: > */ > int udp_tap_handler(const struct ctx *c, uint8_t pif, > sa_family_t af, const void *saddr, const void *daddr, > - const struct pool *p, int idx, const struct timespec *now) > + uint8_t ttl, const struct pool *p, int idx, > + const struct timespec *now) > { > const struct flowside *toside; > struct mmsghdr mm[UIO_MAXIOV]; > @@ -883,7 +885,9 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, > src = ntohs(uh->source); > dst = ntohs(uh->dest); > > - tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr, src, dst, now); > + tosidx = udp_flow_from_tap(c, pif, af, saddr, daddr, > + src, dst, ttl, now); > + > if (!(uflow = udp_at_sidx(tosidx))) { > char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN]; > > diff --git a/udp.h b/udp.h > index de2df6d..041fad4 100644 > --- a/udp.h > +++ b/udp.h > @@ -15,7 +15,8 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref, > uint32_t events, const struct timespec *now); > int udp_tap_handler(const struct ctx *c, uint8_t pif, > sa_family_t af, const void *saddr, const void *daddr, > - const struct pool *p, int idx, const struct timespec *now); > + uint8_t ttl, const struct pool *p, int idx, > + const struct timespec *now); > int udp_sock_init(const struct ctx *c, int ns, const union inany_addr *addr, > const char *ifname, in_port_t port); > int udp_init(struct ctx *c); > diff --git a/udp_flow.c b/udp_flow.c > index bf4b896..db5f709 100644 > --- a/udp_flow.c > +++ b/udp_flow.c > @@ -236,6 +236,7 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref, > * @daddr: Destination address guest side > * @srcport: Source port on guest side > * @dstport: Destination port on guest side > + * @ttl: TTL for this flow > * > * Return: sidx for the destination side of the flow for this packet, or > * FLOW_SIDX_NONE if we couldn't find or create a flow. > @@ -244,7 +245,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c, > uint8_t pif, sa_family_t af, > const void *saddr, const void *daddr, > in_port_t srcport, in_port_t dstport, > - const struct timespec *now) > + uint8_t ttl, const struct timespec *now) > { > const struct flowside *ini; > struct udp_flow *uflow; > @@ -254,7 +255,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c, > ASSERT(pif == PIF_TAP); > > sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr, > - srcport, dstport); > + srcport, dstport, ttl); > if ((uflow = udp_at_sidx(sidx))) { > uflow->ts = now->tv_sec; > return flow_sidx_opposite(sidx); > @@ -271,7 +272,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c, > } > > ini = flow_initiate_af(flow, PIF_TAP, af, saddr, srcport, > - daddr, dstport); > + daddr, dstport, ttl); > > if (inany_is_unspecified(&ini->eaddr) || ini->eport == 0 || > inany_is_unspecified(&ini->oaddr) || ini->oport == 0) { > diff --git a/udp_flow.h b/udp_flow.h > index 9a1b059..7b40594 100644 > --- a/udp_flow.h > +++ b/udp_flow.h > @@ -31,7 +31,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c, > uint8_t pif, sa_family_t af, > const void *saddr, const void *daddr, > in_port_t srcport, in_port_t dstport, > - const struct timespec *now); > + uint8_t ttl, const struct timespec *now); > void udp_flow_close(const struct ctx *c, struct udp_flow *uflow); > bool udp_flow_defer(const struct udp_flow *uflow); > bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow, -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson [-- Attachment #2: signature.asc --] [-- Type: application/pgp-signature, Size: 833 bytes --] ^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v2 2/2] udp: support traceroute with udp 2025-03-27 16:51 [PATCH v2 0/2] udp: add guest-to-remote traceroute Jon Maloy 2025-03-27 16:51 ` [PATCH v2 1/2] Add ttl to outgoing flow structure lookups Jon Maloy @ 2025-03-27 16:51 ` Jon Maloy 1 sibling, 0 replies; 4+ messages in thread From: Jon Maloy @ 2025-03-27 16:51 UTC (permalink / raw) To: passt-dev, sbrivio, lvivier, dgibson, jmaloy Now that ICMP pass-through from socket-to-tap and TTL/hop limit based outgoing flow separation is in place, it is easy to support UDP based traceroute functionality in direction tap-to-socket. We fix that in this commit. Signed-off-by: Jon Maloy <jmaloy@redhat.com> --- v2: Using ancillary data instead of setsockopt to transfer outgoing TTL. --- udp.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/udp.c b/udp.c index 8a2c593..7361ffa 100644 --- a/udp.c +++ b/udp.c @@ -861,6 +861,7 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, uint8_t ttl, const struct pool *p, int idx, const struct timespec *now) { + char ancillary[CMSG_SPACE(sizeof(int))]; const struct flowside *toside; struct mmsghdr mm[UIO_MAXIOV]; union sockaddr_inany to_sa; @@ -868,6 +869,8 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, const struct udphdr *uh; struct udp_flow *uflow; int i, s, count = 0; + char *control = NULL; + int control_len = 0; flow_sidx_t tosidx; in_port_t src, dst; uint8_t topif; @@ -914,6 +917,22 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, pif_sockaddr(c, &to_sa, &sl, topif, &toside->eaddr, toside->eport); + if (ttl != DEFAULT_TTL) { + struct cmsghdr *cmsg = (void *) ancillary; + + if (af == AF_INET) { + cmsg->cmsg_level = IPPROTO_IP; + cmsg->cmsg_type = IP_TTL; + } else { + cmsg->cmsg_level = IPPROTO_IPV6; + cmsg->cmsg_type = IPV6_HOPLIMIT; + } + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + *((int *) CMSG_DATA(cmsg)) = ttl; + control = ancillary; + control_len = sizeof(ancillary); + } + for (i = 0; i < (int)p->count - idx; i++) { struct udphdr *uh_send; size_t len; @@ -936,8 +955,8 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, mm[i].msg_hdr.msg_iovlen = 0; } - mm[i].msg_hdr.msg_control = NULL; - mm[i].msg_hdr.msg_controllen = 0; + mm[i].msg_hdr.msg_control = control; + mm[i].msg_hdr.msg_controllen = control_len; mm[i].msg_hdr.msg_flags = 0; count++; -- @@ -861,6 +861,7 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, uint8_t ttl, const struct pool *p, int idx, const struct timespec *now) { + char ancillary[CMSG_SPACE(sizeof(int))]; const struct flowside *toside; struct mmsghdr mm[UIO_MAXIOV]; union sockaddr_inany to_sa; @@ -868,6 +869,8 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, const struct udphdr *uh; struct udp_flow *uflow; int i, s, count = 0; + char *control = NULL; + int control_len = 0; flow_sidx_t tosidx; in_port_t src, dst; uint8_t topif; @@ -914,6 +917,22 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, pif_sockaddr(c, &to_sa, &sl, topif, &toside->eaddr, toside->eport); + if (ttl != DEFAULT_TTL) { + struct cmsghdr *cmsg = (void *) ancillary; + + if (af == AF_INET) { + cmsg->cmsg_level = IPPROTO_IP; + cmsg->cmsg_type = IP_TTL; + } else { + cmsg->cmsg_level = IPPROTO_IPV6; + cmsg->cmsg_type = IPV6_HOPLIMIT; + } + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + *((int *) CMSG_DATA(cmsg)) = ttl; + control = ancillary; + control_len = sizeof(ancillary); + } + for (i = 0; i < (int)p->count - idx; i++) { struct udphdr *uh_send; size_t len; @@ -936,8 +955,8 @@ int udp_tap_handler(const struct ctx *c, uint8_t pif, mm[i].msg_hdr.msg_iovlen = 0; } - mm[i].msg_hdr.msg_control = NULL; - mm[i].msg_hdr.msg_controllen = 0; + mm[i].msg_hdr.msg_control = control; + mm[i].msg_hdr.msg_controllen = control_len; mm[i].msg_hdr.msg_flags = 0; count++; -- 2.48.1 ^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-03-28 1:58 UTC | newest] Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2025-03-27 16:51 [PATCH v2 0/2] udp: add guest-to-remote traceroute Jon Maloy 2025-03-27 16:51 ` [PATCH v2 1/2] Add ttl to outgoing flow structure lookups Jon Maloy 2025-03-28 1:50 ` David Gibson 2025-03-27 16:51 ` [PATCH v2 2/2] udp: support traceroute with udp Jon Maloy
Code repositories for project(s) associated with this public inbox https://passt.top/passt This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for IMAP folder(s).