public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 1/3] udp: Merge udp[46]_mh_recv arrays
Date: Mon, 26 Aug 2024 19:37:14 +1000	[thread overview]
Message-ID: <20240826093716.1925064-2-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240826093716.1925064-1-david@gibson.dropbear.id.au>

We've already gotten rid of most of the IPv4/IPv6 specific data structures
in udp.c by merging them with each other.  One significant one remains:
udp[46]_mh_recv.  This was a bit awkward to remove because of a subtle
interaction.  We initialise the msg_namelen fields to represent the total
size we have for a socket address, but when we receive into the arrays
those are modified to the actual length of the sockaddr we received.

That meant that naively merging the arrays meant that if we received IPv4
datagrams, then IPv6 datagrams, the addresses for the latter would be
truncated.  In this patch address that by resetting the received
msg_namelen as soon as we've found a flow for the datagram.  Finding the
flow is the only thing that might use the actual sockaddr length, although
we in fact don't need it for the time being.

This also removes the last use of the 'v6' field from udp_listen_epoll_ref,
so remove that as well.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 udp.c | 57 ++++++++++++++++++++-------------------------------------
 udp.h |  2 --
 2 files changed, 20 insertions(+), 39 deletions(-)

diff --git a/udp.c b/udp.c
index 8a93aad6..6638c22b 100644
--- a/udp.c
+++ b/udp.c
@@ -178,8 +178,7 @@ enum udp_iov_idx {
 
 /* IOVs and msghdr arrays for receiving datagrams from sockets */
 static struct iovec	udp_iov_recv		[UDP_MAX_FRAMES];
-static struct mmsghdr	udp4_mh_recv		[UDP_MAX_FRAMES];
-static struct mmsghdr	udp6_mh_recv		[UDP_MAX_FRAMES];
+static struct mmsghdr	udp_mh_recv		[UDP_MAX_FRAMES];
 
 /* IOVs and msghdr arrays for sending "spliced" datagrams to sockets */
 static union sockaddr_inany udp_splice_to;
@@ -222,6 +221,7 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
 static void udp_iov_init_one(const struct ctx *c, size_t i)
 {
 	struct udp_payload_t *payload = &udp_payload[i];
+	struct msghdr *mh = &udp_mh_recv[i].msg_hdr;
 	struct udp_meta_t *meta = &udp_meta[i];
 	struct iovec *siov = &udp_iov_recv[i];
 	struct iovec *tiov = udp_l2_iov[i];
@@ -236,27 +236,10 @@ static void udp_iov_init_one(const struct ctx *c, size_t i)
 	tiov[UDP_IOV_TAP] = tap_hdr_iov(c, &meta->taph);
 	tiov[UDP_IOV_PAYLOAD].iov_base = payload;
 
-	/* It's useful to have separate msghdr arrays for receiving.  Otherwise,
-	 * an IPv4 recv() will alter msg_namelen, so we'd have to reset it every
-	 * time or risk truncating the address on future IPv6 recv()s.
-	 */
-	if (c->ifi4) {
-		struct msghdr *mh = &udp4_mh_recv[i].msg_hdr;
-
-		mh->msg_name	= &meta->s_in;
-		mh->msg_namelen	= sizeof(struct sockaddr_in);
-		mh->msg_iov	= siov;
-		mh->msg_iovlen	= 1;
-	}
-
-	if (c->ifi6) {
-		struct msghdr *mh = &udp6_mh_recv[i].msg_hdr;
-
-		mh->msg_name	= &meta->s_in;
-		mh->msg_namelen	= sizeof(struct sockaddr_in6);
-		mh->msg_iov	= siov;
-		mh->msg_iovlen	= 1;
-	}
+	mh->msg_name	= &meta->s_in;
+	mh->msg_namelen	= sizeof(meta->s_in);
+	mh->msg_iov	= siov;
+	mh->msg_iovlen	= 1;
 }
 
 /**
@@ -506,10 +489,10 @@ static int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
 void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
 			     uint32_t events, const struct timespec *now)
 {
-	struct mmsghdr *mmh_recv = ref.udp.v6 ? udp6_mh_recv : udp4_mh_recv;
+	const socklen_t sasize = sizeof(udp_meta[0].s_in);
 	int n, i;
 
-	if ((n = udp_sock_recv(c, ref.fd, events, mmh_recv)) <= 0)
+	if ((n = udp_sock_recv(c, ref.fd, events, udp_mh_recv)) <= 0)
 		return;
 
 	/* We divide datagrams into batches based on how we need to send them,
@@ -518,6 +501,7 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
 	 * populate it one entry *ahead* of the loop counter.
 	 */
 	udp_meta[0].tosidx = udp_flow_from_sock(c, ref, &udp_meta[0].s_in, now);
+	udp_mh_recv[0].msg_hdr.msg_namelen = sasize;
 	for (i = 0; i < n; ) {
 		flow_sidx_t batchsidx = udp_meta[i].tosidx;
 		uint8_t batchpif = pif_at_sidx(batchsidx);
@@ -525,18 +509,22 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
 
 		do {
 			if (pif_is_socket(batchpif)) {
-				udp_splice_prepare(mmh_recv, i);
+				udp_splice_prepare(udp_mh_recv, i);
 			} else if (batchpif == PIF_TAP) {
-				udp_tap_prepare(mmh_recv, i,
+				udp_tap_prepare(udp_mh_recv, i,
 						flowside_at_sidx(batchsidx));
 			}
 
+			/* Restore sockaddr length clobbered by recvmsg() */
+			udp_mh_recv[i].msg_hdr.msg_namelen = sizeof(udp_meta[i].s_in);
+
 			if (++i >= n)
 				break;
 
 			udp_meta[i].tosidx = udp_flow_from_sock(c, ref,
 								&udp_meta[i].s_in,
 								now);
+			udp_mh_recv[i].msg_hdr.msg_namelen = sasize;
 		} while (flow_sidx_eq(udp_meta[i].tosidx, batchsidx));
 
 		if (pif_is_socket(batchpif)) {
@@ -572,19 +560,16 @@ void udp_listen_sock_handler(const struct ctx *c, union epoll_ref ref,
 void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 			    uint32_t events, const struct timespec *now)
 {
-	const struct flowside *fromside = flowside_at_sidx(ref.flowside);
 	flow_sidx_t tosidx = flow_sidx_opposite(ref.flowside);
 	const struct flowside *toside = flowside_at_sidx(tosidx);
 	struct udp_flow *uflow = udp_at_sidx(ref.flowside);
 	int from_s = uflow->s[ref.flowside.sidei];
-	bool v6 = !inany_v4(&fromside->eaddr);
-	struct mmsghdr *mmh_recv = v6 ? udp6_mh_recv : udp4_mh_recv;
 	uint8_t topif = pif_at_sidx(tosidx);
 	int n, i;
 
 	ASSERT(!c->no_udp && uflow);
 
-	if ((n = udp_sock_recv(c, from_s, events, mmh_recv)) <= 0)
+	if ((n = udp_sock_recv(c, from_s, events, udp_mh_recv)) <= 0)
 		return;
 
 	flow_trace(uflow, "Received %d datagrams on reply socket", n);
@@ -592,9 +577,11 @@ void udp_reply_sock_handler(const struct ctx *c, union epoll_ref ref,
 
 	for (i = 0; i < n; i++) {
 		if (pif_is_socket(topif))
-			udp_splice_prepare(mmh_recv, i);
+			udp_splice_prepare(udp_mh_recv, i);
 		else if (topif == PIF_TAP)
-			udp_tap_prepare(mmh_recv, i, toside);
+			udp_tap_prepare(udp_mh_recv, i, toside);
+		/* Restore sockaddr length clobbered by recvmsg() */
+		udp_mh_recv[i].msg_hdr.msg_namelen = sizeof(udp_meta[i].s_in);
 	}
 
 	if (pif_is_socket(topif)) {
@@ -740,8 +727,6 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
 		uref.pif = PIF_HOST;
 
 	if ((af == AF_INET || af == AF_UNSPEC) && c->ifi4) {
-		uref.v6 = 0;
-
 		if (!ns) {
 			r4 = s = sock_l4(c, AF_INET, EPOLL_TYPE_UDP_LISTEN,
 					 addr, ifname, port, uref.u32);
@@ -756,8 +741,6 @@ int udp_sock_init(const struct ctx *c, int ns, sa_family_t af,
 	}
 
 	if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6) {
-		uref.v6 = 1;
-
 		if (!ns) {
 			r6 = s = sock_l4(c, AF_INET6, EPOLL_TYPE_UDP_LISTEN,
 					 addr, ifname, port, uref.u32);
diff --git a/udp.h b/udp.h
index fb42e1c5..a8e76bfe 100644
--- a/udp.h
+++ b/udp.h
@@ -26,14 +26,12 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s);
  * union udp_listen_epoll_ref - epoll reference for "listening" UDP sockets
  * @port:		Source port for connected sockets, bound port otherwise
  * @pif:		pif for this socket
- * @v6:			Set for IPv6 sockets or connections
  * @u32:		Opaque u32 value of reference
  */
 union udp_listen_epoll_ref {
 	struct {
 		in_port_t	port;
 		uint8_t		pif;
-		bool		v6:1;
 	};
 	uint32_t u32;
 };
-- 
@@ -26,14 +26,12 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s);
  * union udp_listen_epoll_ref - epoll reference for "listening" UDP sockets
  * @port:		Source port for connected sockets, bound port otherwise
  * @pif:		pif for this socket
- * @v6:			Set for IPv6 sockets or connections
  * @u32:		Opaque u32 value of reference
  */
 union udp_listen_epoll_ref {
 	struct {
 		in_port_t	port;
 		uint8_t		pif;
-		bool		v6:1;
 	};
 	uint32_t u32;
 };
-- 
2.46.0


  reply	other threads:[~2024-08-26  9:37 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-26  9:37 [PATCH 0/3] Dual stack sockets for UDP David Gibson
2024-08-26  9:37 ` David Gibson [this message]
2024-08-26 19:32   ` [PATCH 1/3] udp: Merge udp[46]_mh_recv arrays Stefano Brivio
2024-08-27  1:12     ` David Gibson
2024-08-27  5:33       ` Stefano Brivio
2024-08-27  6:04         ` David Gibson
2024-08-26  9:37 ` [PATCH 2/3] udp: Remove unnnecessary local from udp_sock_init() David Gibson
2024-08-26  9:37 ` [PATCH 3/3] udp: Use dual stack sockets for port forwarding when possible David Gibson
2024-08-26 19:33 ` [PATCH 0/3] Dual stack sockets for UDP Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240826093716.1925064-2-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).