public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: jmaloy@redhat.com, David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v7 24/27] udp: Direct datagrams from host to guest via flow table
Date: Fri,  5 Jul 2024 12:07:21 +1000	[thread overview]
Message-ID: <20240705020724.3447719-25-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240705020724.3447719-1-david@gibson.dropbear.id.au>

This replaces the last piece of existing UDP port tracking with the
common flow table.  Specifically use the flow table to direct datagrams
from host sockets to the guest tap interface.  Since this now requires
a flow for every datagram, we add some logging if we encounter any
datagrams for which we can't find or create a flow.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 flow_table.h |  14 ++++
 udp.c        | 188 +++++++++++++++------------------------------------
 2 files changed, 67 insertions(+), 135 deletions(-)

diff --git a/flow_table.h b/flow_table.h
index 1faac4a7..da9483b3 100644
--- a/flow_table.h
+++ b/flow_table.h
@@ -106,6 +106,20 @@ static inline uint8_t pif_at_sidx(flow_sidx_t sidx)
 	return flow->f.pif[sidx.side];
 }
 
+/** flowside_at_sidx - Retrieve a specific flowside
+ * @sidx:    Flow & side index
+ *
+ * Return: Flowside for the flow & side given by @sidx
+ */
+static inline const struct flowside *flowside_at_sidx(flow_sidx_t sidx)
+{
+	const union flow *flow = flow_at_sidx(sidx);
+
+	if (!flow)
+		return PIF_NONE;
+	return &flow->f.side[sidx.side];
+}
+
 /** flow_sidx_t - Index of one side of a flow from common structure
  * @f:		Common flow fields pointer
  * @side:	Which side to refer to (0 or 1)
diff --git a/udp.c b/udp.c
index a26ffe0c..7d63faf6 100644
--- a/udp.c
+++ b/udp.c
@@ -60,26 +60,6 @@
  * flow will come to the reply socket in preference to a listening socket.  The
  * sample program contrib/udp-reuseaddr/reuseaddr-priority.c documents and tests
  * that assumption.
- *
- * Port tracking
- * =============
- *
- * For UDP, a reduced version of port-based connection tracking is implemented
- * with two purposes:
- * - binding ephemeral ports when they're used as source port by the guest, so
- *   that replies on those ports can be forwarded back to the guest, with a
- *   fixed timeout for this binding
- * - packets received from the local host get their source changed to a local
- *   address (gateway address) so that they can be forwarded to the guest, and
- *   packets sent as replies by the guest need their destination address to
- *   be changed back to the address of the local host. This is dynamic to allow
- *   connections from the gateway as well, and uses the same fixed 180s timeout
- * 
- * Sockets for bound ports are created at initialisation time, one set for IPv4
- * and one for IPv6.
- *
- * Packets are forwarded back and forth, by prepending and stripping UDP headers
- * in the obvious way, with no port translation.
  */
 
 #include <sched.h>
@@ -498,7 +478,6 @@ static flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
 
 	ASSERT(ref.type == EPOLL_TYPE_UDP);
 
-	/* FIXME: Match reply packets to their flow as well */
 	if (!ref.udp.orig)
 		return FLOW_SIDX_NONE;
 
@@ -558,160 +537,87 @@ static void udp_splice_send(const struct ctx *c, size_t start, size_t n,
 
 /**
  * udp_update_hdr4() - Update headers for one IPv4 datagram
- * @c:		Execution context
  * @ip4h:	Pre-filled IPv4 header (except for tot_len and saddr)
- * @s_in:	Source socket address, filled in by recvmmsg()
  * @bp:		Pointer to udp_payload_t to update
- * @dstport:	Destination port number
+ * @fside:	Flowside with relevant addresses
  * @dlen:	Length of UDP payload
- * @now:	Current timestamp
  *
  * Return: size of IPv4 payload (UDP header + data)
  */
-static size_t udp_update_hdr4(const struct ctx *c,
-			      struct iphdr *ip4h, const struct sockaddr_in *s_in,
-			      struct udp_payload_t *bp,
-			      in_port_t dstport, size_t dlen,
-			      const struct timespec *now)
+static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
+			      const struct flowside *fside, size_t dlen)
 {
-	const struct in_addr dst = c->ip4.addr_seen;
-	in_port_t srcport = ntohs(s_in->sin_port);
+	const struct in_addr *src = inany_v4(&fside->faddr);
+	const struct in_addr *dst = inany_v4(&fside->eaddr);
 	size_t l4len = dlen + sizeof(bp->uh);
 	size_t l3len = l4len + sizeof(*ip4h);
-	struct in_addr src = s_in->sin_addr;
-
-	if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match) &&
-	    IN4_ARE_ADDR_EQUAL(&src, &c->ip4.dns_host) && srcport == 53 &&
-	    (udp_tap_map[V4][dstport].flags & PORT_DNS_FWD)) {
-		src = c->ip4.dns_match;
-	} else if (IN4_IS_ADDR_LOOPBACK(&src) ||
-		   IN4_ARE_ADDR_EQUAL(&src, &c->ip4.addr_seen)) {
-		udp_tap_map[V4][srcport].ts = now->tv_sec;
-		udp_tap_map[V4][srcport].flags |= PORT_LOCAL;
 
-		if (IN4_IS_ADDR_LOOPBACK(&src))
-			udp_tap_map[V4][srcport].flags |= PORT_LOOPBACK;
-		else
-			udp_tap_map[V4][srcport].flags &= ~PORT_LOOPBACK;
-
-		bitmap_set(udp_act[V4][UDP_ACT_TAP], srcport);
-
-		src = c->ip4.gw;
-	}
+	ASSERT(src && dst);
 
 	ip4h->tot_len = htons(l3len);
-	ip4h->daddr = dst.s_addr;
-	ip4h->saddr = src.s_addr;
-	ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, src, dst);
+	ip4h->daddr = dst->s_addr;
+	ip4h->saddr = src->s_addr;
+	ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, *src, *dst);
 
-	bp->uh.source = s_in->sin_port;
-	bp->uh.dest = htons(dstport);
+	bp->uh.source = htons(fside->fport);
+	bp->uh.dest = htons(fside->eport);
 	bp->uh.len = htons(l4len);
-	csum_udp4(&bp->uh, src, dst, bp->data, dlen);
+	csum_udp4(&bp->uh, *src, *dst, bp->data, dlen);
 
 	return l4len;
 }
 
 /**
  * udp_update_hdr6() - Update headers for one IPv6 datagram
- * @c:		Execution context
  * @ip6h:	Pre-filled IPv6 header (except for payload_len and addresses)
- * @s_in:	Source socket address, filled in by recvmmsg()
  * @bp:		Pointer to udp_payload_t to update
- * @dstport:	Destination port number
+ * @fside:	Flowside with relevant addresses
  * @dlen:	Length of UDP payload
- * @now:	Current timestamp
  *
  * Return: size of IPv6 payload (UDP header + data)
  */
-static size_t udp_update_hdr6(const struct ctx *c,
-			      struct ipv6hdr *ip6h, struct sockaddr_in6 *s_in6,
-			      struct udp_payload_t *bp,
-			      in_port_t dstport, size_t dlen,
-			      const struct timespec *now)
+static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
+			      const struct flowside *fside, size_t dlen)
 {
-	const struct in6_addr *src = &s_in6->sin6_addr;
-	const struct in6_addr *dst = &c->ip6.addr_seen;
-	in_port_t srcport = ntohs(s_in6->sin6_port);
 	uint16_t l4len = dlen + sizeof(bp->uh);
 
-	if (IN6_IS_ADDR_LINKLOCAL(src)) {
-		dst = &c->ip6.addr_ll_seen;
-	} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_match) &&
-		   IN6_ARE_ADDR_EQUAL(src, &c->ip6.dns_host) &&
-		   srcport == 53 &&
-		   (udp_tap_map[V4][dstport].flags & PORT_DNS_FWD)) {
-		src = &c->ip6.dns_match;
-	} else if (IN6_IS_ADDR_LOOPBACK(src)			||
-		   IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr_seen)	||
-		   IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr)) {
-		udp_tap_map[V6][srcport].ts = now->tv_sec;
-		udp_tap_map[V6][srcport].flags |= PORT_LOCAL;
-
-		if (IN6_IS_ADDR_LOOPBACK(src))
-			udp_tap_map[V6][srcport].flags |= PORT_LOOPBACK;
-		else
-			udp_tap_map[V6][srcport].flags &= ~PORT_LOOPBACK;
-
-		if (IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr))
-			udp_tap_map[V6][srcport].flags |= PORT_GUA;
-		else
-			udp_tap_map[V6][srcport].flags &= ~PORT_GUA;
-
-		bitmap_set(udp_act[V6][UDP_ACT_TAP], srcport);
-
-		dst = &c->ip6.addr_ll_seen;
-
-		if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw))
-			src = &c->ip6.gw;
-		else
-			src = &c->ip6.addr_ll;
-
-	}
-
 	ip6h->payload_len = htons(l4len);
-	ip6h->daddr = *dst;
-	ip6h->saddr = *src;
+	ip6h->daddr = fside->eaddr.a6;
+	ip6h->saddr = fside->faddr.a6;
 	ip6h->version = 6;
 	ip6h->nexthdr = IPPROTO_UDP;
 	ip6h->hop_limit = 255;
 
-	bp->uh.source = s_in6->sin6_port;
-	bp->uh.dest = htons(dstport);
+	bp->uh.source = htons(fside->fport);
+	bp->uh.dest = htons(fside->eport);
 	bp->uh.len = ip6h->payload_len;
-	csum_udp6(&bp->uh, src, dst, bp->data, dlen);
+	csum_udp6(&bp->uh, &fside->faddr.a6, &fside->eaddr.a6, bp->data, dlen);
 
 	return l4len;
 }
 
 /**
  * udp_tap_prepare() - Convert one datagram into a tap frame
- * @c:		Execution context
  * @mmh:	Receiving mmsghdr array
  * @idx:	Index of the datagram to prepare
- * @dstport:	Destination port
- * @v6:		Prepare for IPv6?
- * @now:	Current timestamp
+ * @fside:	flowside for destination side
  */
-static void udp_tap_prepare(const struct ctx *c, const struct mmsghdr *mmh,
-			    unsigned idx, in_port_t dstport, bool v6,
-			    const struct timespec *now)
+static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx,
+			    const struct flowside *fside)
 {
 	struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[idx];
 	struct udp_payload_t *bp = &udp_payload[idx];
 	struct udp_meta_t *bm = &udp_meta[idx];
 	size_t l4len;
 
-	if (v6) {
-		l4len = udp_update_hdr6(c, &bm->ip6h, &bm->s_in.sa6, bp,
-					dstport, mmh[idx].msg_len, now);
+	if (!inany_v4(&fside->eaddr) || !inany_v4(&fside->faddr)) {
+		l4len = udp_update_hdr6(&bm->ip6h, bp, fside, mmh[idx].msg_len);
 		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) +
 			       sizeof(udp6_eth_hdr));
 		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr);
 		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
 	} else {
-		l4len = udp_update_hdr4(c, &bm->ip4h, &bm->s_in.sa4, bp,
-					dstport, mmh[idx].msg_len, now);
+		l4len = udp_update_hdr4(&bm->ip4h, bp, fside, mmh[idx].msg_len);
 		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) +
 			       sizeof(udp4_eth_hdr));
 		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr);
@@ -766,17 +672,11 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve
 			  const struct timespec *now)
 {
 	struct mmsghdr *mmh_recv = ref.udp.v6 ? udp6_mh_recv : udp4_mh_recv;
-	in_port_t dstport = ref.udp.port;
 	int n, i;
 
 	if ((n = udp_sock_recv(c, ref.fd, events, mmh_recv)) <= 0)
 		return;
 
-	if (ref.udp.pif == PIF_SPLICE)
-		dstport += c->udp.fwd_out.f.delta[dstport];
-	else if (ref.udp.pif == PIF_HOST)
-		dstport += c->udp.fwd_in.f.delta[dstport];
-
 	/* We divide datagrams into batches based on how we need to send them,
 	 * determined by udp_meta[i].tosidx.  To avoid either two passes through
 	 * the array, or recalculating tosidx for a single entry, we have to
@@ -791,9 +691,9 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve
 		do {
 			if (pif_is_socket(batchpif))
 				udp_splice_prepare(mmh_recv, i);
-			else
-				udp_tap_prepare(c, mmh_recv, i, dstport,
-						ref.udp.v6, now);
+			else if (batchpif == PIF_TAP)
+				udp_tap_prepare(mmh_recv, i,
+						flowside_at_sidx(batchsidx));
 
 			if (++i >= n)
 				break;
@@ -803,12 +703,24 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve
 								now);
 		} while (flow_sidx_eq(udp_meta[i].tosidx, batchsidx));
 
-		if (pif_is_socket(batchpif))
+		if (pif_is_socket(batchpif)) {
 			udp_splice_send(c, batchstart, i - batchstart,
 					batchsidx);
-		else
+		} else if (batchpif == PIF_TAP) {
 			tap_send_frames(c, &udp_l2_iov[batchstart][0],
 					UDP_NUM_IOVS, i - batchstart);
+		} else if (flow_sidx_valid(batchsidx)) {
+			flow_sidx_t fromsidx = flow_sidx_opposite(batchsidx);
+			struct udp_flow *uflow = udp_at_sidx(batchsidx);
+
+			flow_err(uflow,
+				 "No support for forwarding UDP from %s to %s",
+				 pif_name(pif_at_sidx(fromsidx)),
+				 pif_name(batchpif));
+		} else {
+			debug("Discarding %d datagrams without flow",
+			      i - batchstart);
+		}
 	}
 }
 
@@ -845,14 +757,20 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 	for (i = 0; i < n; i++) {
 		if (pif_is_socket(topif))
 			udp_splice_prepare(mmh_recv, i);
-		else
-			udp_tap_prepare(c, mmh_recv, i, toside->eport, v6, now);
+		else if (topif == PIF_TAP)
+			udp_tap_prepare(mmh_recv, i, toside);
 	}
 
-	if (pif_is_socket(topif))
+	if (pif_is_socket(topif)) {
 		udp_splice_send(c, 0, n, tosidx);
-	else
+	} else if (topif == PIF_TAP) {
 		tap_send_frames(c, &udp_l2_iov[0][0], UDP_NUM_IOVS, n);
+	} else {
+		uint8_t frompif = uflow->f.pif[ref.flowside.side];
+
+		flow_err(uflow, "No support for forwarding UDP from %s to %s",
+			 pif_name(frompif), pif_name(topif));
+	}
 }
 
 /**
-- 
@@ -60,26 +60,6 @@
  * flow will come to the reply socket in preference to a listening socket.  The
  * sample program contrib/udp-reuseaddr/reuseaddr-priority.c documents and tests
  * that assumption.
- *
- * Port tracking
- * =============
- *
- * For UDP, a reduced version of port-based connection tracking is implemented
- * with two purposes:
- * - binding ephemeral ports when they're used as source port by the guest, so
- *   that replies on those ports can be forwarded back to the guest, with a
- *   fixed timeout for this binding
- * - packets received from the local host get their source changed to a local
- *   address (gateway address) so that they can be forwarded to the guest, and
- *   packets sent as replies by the guest need their destination address to
- *   be changed back to the address of the local host. This is dynamic to allow
- *   connections from the gateway as well, and uses the same fixed 180s timeout
- * 
- * Sockets for bound ports are created at initialisation time, one set for IPv4
- * and one for IPv6.
- *
- * Packets are forwarded back and forth, by prepending and stripping UDP headers
- * in the obvious way, with no port translation.
  */
 
 #include <sched.h>
@@ -498,7 +478,6 @@ static flow_sidx_t udp_flow_from_sock(const struct ctx *c, union epoll_ref ref,
 
 	ASSERT(ref.type == EPOLL_TYPE_UDP);
 
-	/* FIXME: Match reply packets to their flow as well */
 	if (!ref.udp.orig)
 		return FLOW_SIDX_NONE;
 
@@ -558,160 +537,87 @@ static void udp_splice_send(const struct ctx *c, size_t start, size_t n,
 
 /**
  * udp_update_hdr4() - Update headers for one IPv4 datagram
- * @c:		Execution context
  * @ip4h:	Pre-filled IPv4 header (except for tot_len and saddr)
- * @s_in:	Source socket address, filled in by recvmmsg()
  * @bp:		Pointer to udp_payload_t to update
- * @dstport:	Destination port number
+ * @fside:	Flowside with relevant addresses
  * @dlen:	Length of UDP payload
- * @now:	Current timestamp
  *
  * Return: size of IPv4 payload (UDP header + data)
  */
-static size_t udp_update_hdr4(const struct ctx *c,
-			      struct iphdr *ip4h, const struct sockaddr_in *s_in,
-			      struct udp_payload_t *bp,
-			      in_port_t dstport, size_t dlen,
-			      const struct timespec *now)
+static size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
+			      const struct flowside *fside, size_t dlen)
 {
-	const struct in_addr dst = c->ip4.addr_seen;
-	in_port_t srcport = ntohs(s_in->sin_port);
+	const struct in_addr *src = inany_v4(&fside->faddr);
+	const struct in_addr *dst = inany_v4(&fside->eaddr);
 	size_t l4len = dlen + sizeof(bp->uh);
 	size_t l3len = l4len + sizeof(*ip4h);
-	struct in_addr src = s_in->sin_addr;
-
-	if (!IN4_IS_ADDR_UNSPECIFIED(&c->ip4.dns_match) &&
-	    IN4_ARE_ADDR_EQUAL(&src, &c->ip4.dns_host) && srcport == 53 &&
-	    (udp_tap_map[V4][dstport].flags & PORT_DNS_FWD)) {
-		src = c->ip4.dns_match;
-	} else if (IN4_IS_ADDR_LOOPBACK(&src) ||
-		   IN4_ARE_ADDR_EQUAL(&src, &c->ip4.addr_seen)) {
-		udp_tap_map[V4][srcport].ts = now->tv_sec;
-		udp_tap_map[V4][srcport].flags |= PORT_LOCAL;
 
-		if (IN4_IS_ADDR_LOOPBACK(&src))
-			udp_tap_map[V4][srcport].flags |= PORT_LOOPBACK;
-		else
-			udp_tap_map[V4][srcport].flags &= ~PORT_LOOPBACK;
-
-		bitmap_set(udp_act[V4][UDP_ACT_TAP], srcport);
-
-		src = c->ip4.gw;
-	}
+	ASSERT(src && dst);
 
 	ip4h->tot_len = htons(l3len);
-	ip4h->daddr = dst.s_addr;
-	ip4h->saddr = src.s_addr;
-	ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, src, dst);
+	ip4h->daddr = dst->s_addr;
+	ip4h->saddr = src->s_addr;
+	ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, *src, *dst);
 
-	bp->uh.source = s_in->sin_port;
-	bp->uh.dest = htons(dstport);
+	bp->uh.source = htons(fside->fport);
+	bp->uh.dest = htons(fside->eport);
 	bp->uh.len = htons(l4len);
-	csum_udp4(&bp->uh, src, dst, bp->data, dlen);
+	csum_udp4(&bp->uh, *src, *dst, bp->data, dlen);
 
 	return l4len;
 }
 
 /**
  * udp_update_hdr6() - Update headers for one IPv6 datagram
- * @c:		Execution context
  * @ip6h:	Pre-filled IPv6 header (except for payload_len and addresses)
- * @s_in:	Source socket address, filled in by recvmmsg()
  * @bp:		Pointer to udp_payload_t to update
- * @dstport:	Destination port number
+ * @fside:	Flowside with relevant addresses
  * @dlen:	Length of UDP payload
- * @now:	Current timestamp
  *
  * Return: size of IPv6 payload (UDP header + data)
  */
-static size_t udp_update_hdr6(const struct ctx *c,
-			      struct ipv6hdr *ip6h, struct sockaddr_in6 *s_in6,
-			      struct udp_payload_t *bp,
-			      in_port_t dstport, size_t dlen,
-			      const struct timespec *now)
+static size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
+			      const struct flowside *fside, size_t dlen)
 {
-	const struct in6_addr *src = &s_in6->sin6_addr;
-	const struct in6_addr *dst = &c->ip6.addr_seen;
-	in_port_t srcport = ntohs(s_in6->sin6_port);
 	uint16_t l4len = dlen + sizeof(bp->uh);
 
-	if (IN6_IS_ADDR_LINKLOCAL(src)) {
-		dst = &c->ip6.addr_ll_seen;
-	} else if (!IN6_IS_ADDR_UNSPECIFIED(&c->ip6.dns_match) &&
-		   IN6_ARE_ADDR_EQUAL(src, &c->ip6.dns_host) &&
-		   srcport == 53 &&
-		   (udp_tap_map[V4][dstport].flags & PORT_DNS_FWD)) {
-		src = &c->ip6.dns_match;
-	} else if (IN6_IS_ADDR_LOOPBACK(src)			||
-		   IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr_seen)	||
-		   IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr)) {
-		udp_tap_map[V6][srcport].ts = now->tv_sec;
-		udp_tap_map[V6][srcport].flags |= PORT_LOCAL;
-
-		if (IN6_IS_ADDR_LOOPBACK(src))
-			udp_tap_map[V6][srcport].flags |= PORT_LOOPBACK;
-		else
-			udp_tap_map[V6][srcport].flags &= ~PORT_LOOPBACK;
-
-		if (IN6_ARE_ADDR_EQUAL(src, &c->ip6.addr))
-			udp_tap_map[V6][srcport].flags |= PORT_GUA;
-		else
-			udp_tap_map[V6][srcport].flags &= ~PORT_GUA;
-
-		bitmap_set(udp_act[V6][UDP_ACT_TAP], srcport);
-
-		dst = &c->ip6.addr_ll_seen;
-
-		if (IN6_IS_ADDR_LINKLOCAL(&c->ip6.gw))
-			src = &c->ip6.gw;
-		else
-			src = &c->ip6.addr_ll;
-
-	}
-
 	ip6h->payload_len = htons(l4len);
-	ip6h->daddr = *dst;
-	ip6h->saddr = *src;
+	ip6h->daddr = fside->eaddr.a6;
+	ip6h->saddr = fside->faddr.a6;
 	ip6h->version = 6;
 	ip6h->nexthdr = IPPROTO_UDP;
 	ip6h->hop_limit = 255;
 
-	bp->uh.source = s_in6->sin6_port;
-	bp->uh.dest = htons(dstport);
+	bp->uh.source = htons(fside->fport);
+	bp->uh.dest = htons(fside->eport);
 	bp->uh.len = ip6h->payload_len;
-	csum_udp6(&bp->uh, src, dst, bp->data, dlen);
+	csum_udp6(&bp->uh, &fside->faddr.a6, &fside->eaddr.a6, bp->data, dlen);
 
 	return l4len;
 }
 
 /**
  * udp_tap_prepare() - Convert one datagram into a tap frame
- * @c:		Execution context
  * @mmh:	Receiving mmsghdr array
  * @idx:	Index of the datagram to prepare
- * @dstport:	Destination port
- * @v6:		Prepare for IPv6?
- * @now:	Current timestamp
+ * @fside:	flowside for destination side
  */
-static void udp_tap_prepare(const struct ctx *c, const struct mmsghdr *mmh,
-			    unsigned idx, in_port_t dstport, bool v6,
-			    const struct timespec *now)
+static void udp_tap_prepare(const struct mmsghdr *mmh, unsigned idx,
+			    const struct flowside *fside)
 {
 	struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[idx];
 	struct udp_payload_t *bp = &udp_payload[idx];
 	struct udp_meta_t *bm = &udp_meta[idx];
 	size_t l4len;
 
-	if (v6) {
-		l4len = udp_update_hdr6(c, &bm->ip6h, &bm->s_in.sa6, bp,
-					dstport, mmh[idx].msg_len, now);
+	if (!inany_v4(&fside->eaddr) || !inany_v4(&fside->faddr)) {
+		l4len = udp_update_hdr6(&bm->ip6h, bp, fside, mmh[idx].msg_len);
 		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) +
 			       sizeof(udp6_eth_hdr));
 		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr);
 		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
 	} else {
-		l4len = udp_update_hdr4(c, &bm->ip4h, &bm->s_in.sa4, bp,
-					dstport, mmh[idx].msg_len, now);
+		l4len = udp_update_hdr4(&bm->ip4h, bp, fside, mmh[idx].msg_len);
 		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) +
 			       sizeof(udp4_eth_hdr));
 		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr);
@@ -766,17 +672,11 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve
 			  const struct timespec *now)
 {
 	struct mmsghdr *mmh_recv = ref.udp.v6 ? udp6_mh_recv : udp4_mh_recv;
-	in_port_t dstport = ref.udp.port;
 	int n, i;
 
 	if ((n = udp_sock_recv(c, ref.fd, events, mmh_recv)) <= 0)
 		return;
 
-	if (ref.udp.pif == PIF_SPLICE)
-		dstport += c->udp.fwd_out.f.delta[dstport];
-	else if (ref.udp.pif == PIF_HOST)
-		dstport += c->udp.fwd_in.f.delta[dstport];
-
 	/* We divide datagrams into batches based on how we need to send them,
 	 * determined by udp_meta[i].tosidx.  To avoid either two passes through
 	 * the array, or recalculating tosidx for a single entry, we have to
@@ -791,9 +691,9 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve
 		do {
 			if (pif_is_socket(batchpif))
 				udp_splice_prepare(mmh_recv, i);
-			else
-				udp_tap_prepare(c, mmh_recv, i, dstport,
-						ref.udp.v6, now);
+			else if (batchpif == PIF_TAP)
+				udp_tap_prepare(mmh_recv, i,
+						flowside_at_sidx(batchsidx));
 
 			if (++i >= n)
 				break;
@@ -803,12 +703,24 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve
 								now);
 		} while (flow_sidx_eq(udp_meta[i].tosidx, batchsidx));
 
-		if (pif_is_socket(batchpif))
+		if (pif_is_socket(batchpif)) {
 			udp_splice_send(c, batchstart, i - batchstart,
 					batchsidx);
-		else
+		} else if (batchpif == PIF_TAP) {
 			tap_send_frames(c, &udp_l2_iov[batchstart][0],
 					UDP_NUM_IOVS, i - batchstart);
+		} else if (flow_sidx_valid(batchsidx)) {
+			flow_sidx_t fromsidx = flow_sidx_opposite(batchsidx);
+			struct udp_flow *uflow = udp_at_sidx(batchsidx);
+
+			flow_err(uflow,
+				 "No support for forwarding UDP from %s to %s",
+				 pif_name(pif_at_sidx(fromsidx)),
+				 pif_name(batchpif));
+		} else {
+			debug("Discarding %d datagrams without flow",
+			      i - batchstart);
+		}
 	}
 }
 
@@ -845,14 +757,20 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 	for (i = 0; i < n; i++) {
 		if (pif_is_socket(topif))
 			udp_splice_prepare(mmh_recv, i);
-		else
-			udp_tap_prepare(c, mmh_recv, i, toside->eport, v6, now);
+		else if (topif == PIF_TAP)
+			udp_tap_prepare(mmh_recv, i, toside);
 	}
 
-	if (pif_is_socket(topif))
+	if (pif_is_socket(topif)) {
 		udp_splice_send(c, 0, n, tosidx);
-	else
+	} else if (topif == PIF_TAP) {
 		tap_send_frames(c, &udp_l2_iov[0][0], UDP_NUM_IOVS, n);
+	} else {
+		uint8_t frompif = uflow->f.pif[ref.flowside.side];
+
+		flow_err(uflow, "No support for forwarding UDP from %s to %s",
+			 pif_name(frompif), pif_name(topif));
+	}
 }
 
 /**
-- 
2.45.2


  parent reply	other threads:[~2024-07-05  2:07 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-05  2:06 [PATCH v7 00/27] Unified flow table David Gibson
2024-07-05  2:06 ` [PATCH v7 01/27] flow: Common address information for initiating side David Gibson
2024-07-05  2:06 ` [PATCH v7 02/27] flow: Common address information for target side David Gibson
2024-07-10 21:30   ` Stefano Brivio
2024-07-11  0:19     ` David Gibson
2024-07-05  2:07 ` [PATCH v7 03/27] tcp, flow: Remove redundant information, repack connection structures David Gibson
2024-07-05  2:07 ` [PATCH v7 04/27] tcp: Obtain guest address from flowside David Gibson
2024-07-05  2:07 ` [PATCH v7 05/27] tcp: Manage outbound address via flow table David Gibson
2024-07-05  2:07 ` [PATCH v7 06/27] tcp: Simplify endpoint validation using flowside information David Gibson
2024-07-05  2:07 ` [PATCH v7 07/27] tcp_splice: Eliminate SPLICE_V6 flag David Gibson
2024-07-05  2:07 ` [PATCH v7 08/27] tcp, flow: Replace TCP specific hash function with general flow hash David Gibson
2024-07-05  2:07 ` [PATCH v7 09/27] flow, tcp: Generalise TCP hash table to general flow hash table David Gibson
2024-07-05  2:07 ` [PATCH v7 10/27] tcp: Re-use flow hash for initial sequence number generation David Gibson
2024-07-05  2:07 ` [PATCH v7 11/27] icmp: Remove redundant id field from flow table entry David Gibson
2024-07-05  2:07 ` [PATCH v7 12/27] icmp: Obtain destination addresses from the flowsides David Gibson
2024-07-05  2:07 ` [PATCH v7 13/27] icmp: Look up ping flows using flow hash David Gibson
2024-07-05  2:07 ` [PATCH v7 14/27] icmp: Eliminate icmp_id_map David Gibson
2024-07-05  2:07 ` [PATCH v7 15/27] flow: Helper to create sockets based on flowside David Gibson
2024-07-10 21:32   ` Stefano Brivio
2024-07-11  0:21     ` David Gibson
2024-07-11  0:27     ` David Gibson
2024-07-05  2:07 ` [PATCH v7 16/27] icmp: Manage outbound socket address via flow table David Gibson
2024-07-05  2:07 ` [PATCH v7 17/27] flow, tcp: Flow based NAT and port forwarding for TCP David Gibson
2024-07-05  2:07 ` [PATCH v7 18/27] flow, icmp: Use general flow forwarding rules for ICMP David Gibson
2024-07-05  2:07 ` [PATCH v7 19/27] fwd: Update flow forwarding logic for UDP David Gibson
2024-07-08 21:26   ` Stefano Brivio
2024-07-09  0:19     ` David Gibson
2024-07-05  2:07 ` [PATCH v7 20/27] udp: Create flows for datagrams from originating sockets David Gibson
2024-07-09 22:32   ` Stefano Brivio
2024-07-09 23:59     ` David Gibson
2024-07-10 21:35       ` Stefano Brivio
2024-07-11  4:26         ` David Gibson
2024-07-11  8:20           ` Stefano Brivio
2024-07-11 22:58             ` David Gibson
2024-07-12  8:21               ` Stefano Brivio
2024-07-15  4:06                 ` David Gibson
2024-07-15 16:37                   ` Stefano Brivio
2024-07-17  0:49                     ` David Gibson
2024-07-05  2:07 ` [PATCH v7 21/27] udp: Handle "spliced" datagrams with per-flow sockets David Gibson
2024-07-09 22:32   ` Stefano Brivio
2024-07-10  0:23     ` David Gibson
2024-07-10 17:13       ` Stefano Brivio
2024-07-11  1:30         ` David Gibson
2024-07-11  8:23           ` Stefano Brivio
2024-07-11  2:48         ` David Gibson
2024-07-12 13:34   ` Stefano Brivio
2024-07-15  4:32     ` David Gibson
2024-07-05  2:07 ` [PATCH v7 22/27] udp: Remove obsolete splice tracking David Gibson
2024-07-10 21:36   ` Stefano Brivio
2024-07-11  0:43     ` David Gibson
2024-07-05  2:07 ` [PATCH v7 23/27] udp: Find or create flows for datagrams from tap interface David Gibson
2024-07-10 21:36   ` Stefano Brivio
2024-07-11  0:45     ` David Gibson
2024-07-05  2:07 ` David Gibson [this message]
2024-07-10 21:37   ` [PATCH v7 24/27] udp: Direct datagrams from host to guest via flow table Stefano Brivio
2024-07-11  0:46     ` David Gibson
2024-07-05  2:07 ` [PATCH v7 25/27] udp: Remove obsolete socket tracking David Gibson
2024-07-05  2:07 ` [PATCH v7 26/27] udp: Remove rdelta port forwarding maps David Gibson
2024-07-05  2:07 ` [PATCH v7 27/27] udp: Rename UDP listening sockets David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240705020724.3447719-25-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=jmaloy@redhat.com \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).