public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 08/11] udp: Move some more of sock_handler tasks into sub-functions
Date: Fri,  5 Jul 2024 20:44:06 +1000	[thread overview]
Message-ID: <20240705104409.3847002-9-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240705104409.3847002-1-david@gibson.dropbear.id.au>

udp_buf_sock_handler(), udp_splice_send() and udp_tap_send loosely, do four
things between them:
  1. Receive some datagrams from a socket
  2. Split those datagrams into batches depending on how they need to be
     sent (via tap or via a specific splice socket)
  3. Prepare buffers for each datagram to send it onwards
  4. Actually send it onwards

Split (1) and (3) into specific helper functions.  This isn't
immediately useful (udp_splice_prepare(), in particular, is trivial),
but it will make further reworks clearer.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 udp.c | 130 +++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 84 insertions(+), 46 deletions(-)

diff --git a/udp.c b/udp.c
index 2d403378..af5f23f0 100644
--- a/udp.c
+++ b/udp.c
@@ -490,6 +490,16 @@ static int udp_mmh_splice_port(union epoll_ref ref, const struct mmsghdr *mmh)
 	return -1;
 }
 
+/**
+ * udp_splice_prepare() - Prepare one datagram for splicing
+ * @mmh:	Receiving mmsghdr array
+ * @idx:	Index of the datagram to prepare
+ */
+static void udp_splice_prepare(struct mmsghdr *mmh, unsigned idx)
+{
+	udp_mh_splice[idx].msg_hdr.msg_iov->iov_len = mmh[idx].msg_len;
+}
+
 /**
  * udp_splice_send() - Send datagrams from socket to socket
  * @c:		Execution context
@@ -535,7 +545,7 @@ static unsigned udp_splice_send(const struct ctx *c, size_t start, size_t n,
 	}
 
 	do {
-		udp_mh_splice[i].msg_hdr.msg_iov->iov_len = mmh_recv[i].msg_len;
+		udp_splice_prepare(mmh_recv, i);
 
 		if (++i >= n)
 			break;
@@ -706,6 +716,42 @@ static size_t udp_update_hdr6(const struct ctx *c,
 	return l4len;
 }
 
+/**
+ * udp_tap_prepare() - Convert one datagram into a tap frame
+ * @c:		Execution context
+ * @mmh:	Receiving mmsghdr array
+ * @idx:	Index of the datagram to prepare
+ * @dstport:	Destination port
+ * @v6:		Prepare for IPv6?
+ * @now:	Current timestamp
+ */
+static void udp_tap_prepare(const struct ctx *c, struct mmsghdr *mmh,
+			    unsigned idx, in_port_t dstport, bool v6,
+			    const struct timespec *now)
+{
+	struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[idx];
+	struct udp_payload_t *bp = &udp_payload[idx];
+	struct udp_meta_t *bm = &udp_meta[idx];
+	size_t l4len;
+
+	if (v6) {
+		l4len = udp_update_hdr6(c, &bm->ip6h, &bm->s_in.sa6, bp,
+					dstport, mmh[idx].msg_len, now);
+		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) +
+			       sizeof(udp6_eth_hdr));
+		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr);
+		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
+	} else {
+		l4len = udp_update_hdr4(c, &bm->ip4h, &bm->s_in.sa4, bp,
+					dstport, mmh[idx].msg_len, now);
+		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) +
+			       sizeof(udp4_eth_hdr));
+		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr);
+		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip4h);
+	}
+	(*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
+}
+
 /**
  * udp_tap_send() - Prepare UDP datagrams and send to tap interface
  * @c:		Execution context
@@ -737,29 +783,7 @@ static unsigned udp_tap_send(const struct ctx *c, size_t start, size_t n,
 		mmh_recv = udp4_mh_recv;
 
 	do {
-		struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[i];
-		struct udp_payload_t *bp = &udp_payload[i];
-		struct udp_meta_t *bm = &udp_meta[i];
-		size_t l4len;
-
-		if (ref.udp.v6) {
-			l4len = udp_update_hdr6(c, &bm->ip6h,
-						&bm->s_in.sa6, bp, dstport,
-						udp6_mh_recv[i].msg_len, now);
-			tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) +
-						  sizeof(udp6_eth_hdr));
-			(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr);
-			(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
-		} else {
-			l4len = udp_update_hdr4(c, &bm->ip4h,
-						&bm->s_in.sa4, bp, dstport,
-						udp4_mh_recv[i].msg_len, now);
-			tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) +
-						  sizeof(udp4_eth_hdr));
-			(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr);
-			(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip4h);
-		}
-		(*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
+		udp_tap_prepare(c, mmh_recv, i, dstport, ref.udp.v6, now);
 
 		if (++i >= n)
 			break;
@@ -771,6 +795,39 @@ static unsigned udp_tap_send(const struct ctx *c, size_t start, size_t n,
 	return i - start;
 }
 
+/**
+ * udp_sock_recv() - Receive datagrams from a socket
+ * @c:		Execution context
+ * @s:		Socket to receive from
+ * @events:	epoll events bitmap
+ * @mmh		mmsghdr array to receive into
+ *
+ * #syscalls recvmmsg
+ */
+int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
+		  struct mmsghdr *mmh)
+{
+	/* For not entirely clear reasons (data locality?) pasta gets better
+	 * throughput if we receive tap datagrams one at a atime.  For small
+	 * splice datagrams throughput is slightly better if we do batch, but
+	 * it's slightly worse for large splice datagrams.  Since we don't know
+	 * before we receive whether we'll use tap or splice, always go one at a
+	 * time for pasta mode.
+	 */
+	int n = (c->mode == MODE_PASTA ? 1 : UDP_MAX_FRAMES);
+
+	if (c->no_udp || !(events & EPOLLIN))
+		return 0;
+
+	n = recvmmsg(s, mmh, n, 0, NULL);
+	if (n < 0) {
+		err_perror("Error receiving datagrams");
+		return 0;
+	}
+
+	return n;
+}
+
 /**
  * udp_buf_sock_handler() - Handle new data from socket
  * @c:		Execution context
@@ -783,21 +840,11 @@ static unsigned udp_tap_send(const struct ctx *c, size_t start, size_t n,
 void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events,
 			  const struct timespec *now)
 {
-	/* For not entirely clear reasons (data locality?) pasta gets
-	 * better throughput if we receive tap datagrams one at a
-	 * atime.  For small splice datagrams throughput is slightly
-	 * better if we do batch, but it's slightly worse for large
-	 * splice datagrams.  Since we don't know before we receive
-	 * whether we'll use tap or splice, always go one at a time
-	 * for pasta mode.
-	 */
-	ssize_t n = (c->mode == MODE_PASTA ? 1 : UDP_MAX_FRAMES);
+	struct mmsghdr *mmh_recv = ref.udp.v6 ? udp6_mh_recv : udp4_mh_recv;
 	in_port_t dstport = ref.udp.port;
-	bool v6 = ref.udp.v6;
-	struct mmsghdr *mmh_recv;
-	int i, m;
+	int n, m, i;
 
-	if (c->no_udp || !(events & EPOLLIN))
+	if ((n = udp_sock_recv(c, ref.fd, events, mmh_recv)) <= 0)
 		return;
 
 	if (ref.udp.pif == PIF_SPLICE)
@@ -805,15 +852,6 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve
 	else if (ref.udp.pif == PIF_HOST)
 		dstport += c->udp.fwd_in.f.delta[dstport];
 
-	if (v6)
-		mmh_recv = udp6_mh_recv;
-	else
-		mmh_recv = udp4_mh_recv;
-
-	n = recvmmsg(ref.fd, mmh_recv, n, 0, NULL);
-	if (n <= 0)
-		return;
-
 	/* We divide things into batches based on how we need to send them,
 	 * determined by udp_meta[i].splicesrc.  To avoid either two passes
 	 * through the array, or recalculating splicesrc for a single entry, we
-- 
@@ -490,6 +490,16 @@ static int udp_mmh_splice_port(union epoll_ref ref, const struct mmsghdr *mmh)
 	return -1;
 }
 
+/**
+ * udp_splice_prepare() - Prepare one datagram for splicing
+ * @mmh:	Receiving mmsghdr array
+ * @idx:	Index of the datagram to prepare
+ */
+static void udp_splice_prepare(struct mmsghdr *mmh, unsigned idx)
+{
+	udp_mh_splice[idx].msg_hdr.msg_iov->iov_len = mmh[idx].msg_len;
+}
+
 /**
  * udp_splice_send() - Send datagrams from socket to socket
  * @c:		Execution context
@@ -535,7 +545,7 @@ static unsigned udp_splice_send(const struct ctx *c, size_t start, size_t n,
 	}
 
 	do {
-		udp_mh_splice[i].msg_hdr.msg_iov->iov_len = mmh_recv[i].msg_len;
+		udp_splice_prepare(mmh_recv, i);
 
 		if (++i >= n)
 			break;
@@ -706,6 +716,42 @@ static size_t udp_update_hdr6(const struct ctx *c,
 	return l4len;
 }
 
+/**
+ * udp_tap_prepare() - Convert one datagram into a tap frame
+ * @c:		Execution context
+ * @mmh:	Receiving mmsghdr array
+ * @idx:	Index of the datagram to prepare
+ * @dstport:	Destination port
+ * @v6:		Prepare for IPv6?
+ * @now:	Current timestamp
+ */
+static void udp_tap_prepare(const struct ctx *c, struct mmsghdr *mmh,
+			    unsigned idx, in_port_t dstport, bool v6,
+			    const struct timespec *now)
+{
+	struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[idx];
+	struct udp_payload_t *bp = &udp_payload[idx];
+	struct udp_meta_t *bm = &udp_meta[idx];
+	size_t l4len;
+
+	if (v6) {
+		l4len = udp_update_hdr6(c, &bm->ip6h, &bm->s_in.sa6, bp,
+					dstport, mmh[idx].msg_len, now);
+		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) +
+			       sizeof(udp6_eth_hdr));
+		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr);
+		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
+	} else {
+		l4len = udp_update_hdr4(c, &bm->ip4h, &bm->s_in.sa4, bp,
+					dstport, mmh[idx].msg_len, now);
+		tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) +
+			       sizeof(udp4_eth_hdr));
+		(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr);
+		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip4h);
+	}
+	(*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
+}
+
 /**
  * udp_tap_send() - Prepare UDP datagrams and send to tap interface
  * @c:		Execution context
@@ -737,29 +783,7 @@ static unsigned udp_tap_send(const struct ctx *c, size_t start, size_t n,
 		mmh_recv = udp4_mh_recv;
 
 	do {
-		struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[i];
-		struct udp_payload_t *bp = &udp_payload[i];
-		struct udp_meta_t *bm = &udp_meta[i];
-		size_t l4len;
-
-		if (ref.udp.v6) {
-			l4len = udp_update_hdr6(c, &bm->ip6h,
-						&bm->s_in.sa6, bp, dstport,
-						udp6_mh_recv[i].msg_len, now);
-			tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip6h) +
-						  sizeof(udp6_eth_hdr));
-			(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp6_eth_hdr);
-			(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
-		} else {
-			l4len = udp_update_hdr4(c, &bm->ip4h,
-						&bm->s_in.sa4, bp, dstport,
-						udp4_mh_recv[i].msg_len, now);
-			tap_hdr_update(&bm->taph, l4len + sizeof(bm->ip4h) +
-						  sizeof(udp4_eth_hdr));
-			(*tap_iov)[UDP_IOV_ETH] = IOV_OF_LVALUE(udp4_eth_hdr);
-			(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip4h);
-		}
-		(*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
+		udp_tap_prepare(c, mmh_recv, i, dstport, ref.udp.v6, now);
 
 		if (++i >= n)
 			break;
@@ -771,6 +795,39 @@ static unsigned udp_tap_send(const struct ctx *c, size_t start, size_t n,
 	return i - start;
 }
 
+/**
+ * udp_sock_recv() - Receive datagrams from a socket
+ * @c:		Execution context
+ * @s:		Socket to receive from
+ * @events:	epoll events bitmap
+ * @mmh		mmsghdr array to receive into
+ *
+ * #syscalls recvmmsg
+ */
+int udp_sock_recv(const struct ctx *c, int s, uint32_t events,
+		  struct mmsghdr *mmh)
+{
+	/* For not entirely clear reasons (data locality?) pasta gets better
+	 * throughput if we receive tap datagrams one at a atime.  For small
+	 * splice datagrams throughput is slightly better if we do batch, but
+	 * it's slightly worse for large splice datagrams.  Since we don't know
+	 * before we receive whether we'll use tap or splice, always go one at a
+	 * time for pasta mode.
+	 */
+	int n = (c->mode == MODE_PASTA ? 1 : UDP_MAX_FRAMES);
+
+	if (c->no_udp || !(events & EPOLLIN))
+		return 0;
+
+	n = recvmmsg(s, mmh, n, 0, NULL);
+	if (n < 0) {
+		err_perror("Error receiving datagrams");
+		return 0;
+	}
+
+	return n;
+}
+
 /**
  * udp_buf_sock_handler() - Handle new data from socket
  * @c:		Execution context
@@ -783,21 +840,11 @@ static unsigned udp_tap_send(const struct ctx *c, size_t start, size_t n,
 void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t events,
 			  const struct timespec *now)
 {
-	/* For not entirely clear reasons (data locality?) pasta gets
-	 * better throughput if we receive tap datagrams one at a
-	 * atime.  For small splice datagrams throughput is slightly
-	 * better if we do batch, but it's slightly worse for large
-	 * splice datagrams.  Since we don't know before we receive
-	 * whether we'll use tap or splice, always go one at a time
-	 * for pasta mode.
-	 */
-	ssize_t n = (c->mode == MODE_PASTA ? 1 : UDP_MAX_FRAMES);
+	struct mmsghdr *mmh_recv = ref.udp.v6 ? udp6_mh_recv : udp4_mh_recv;
 	in_port_t dstport = ref.udp.port;
-	bool v6 = ref.udp.v6;
-	struct mmsghdr *mmh_recv;
-	int i, m;
+	int n, m, i;
 
-	if (c->no_udp || !(events & EPOLLIN))
+	if ((n = udp_sock_recv(c, ref.fd, events, mmh_recv)) <= 0)
 		return;
 
 	if (ref.udp.pif == PIF_SPLICE)
@@ -805,15 +852,6 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve
 	else if (ref.udp.pif == PIF_HOST)
 		dstport += c->udp.fwd_in.f.delta[dstport];
 
-	if (v6)
-		mmh_recv = udp6_mh_recv;
-	else
-		mmh_recv = udp4_mh_recv;
-
-	n = recvmmsg(ref.fd, mmh_recv, n, 0, NULL);
-	if (n <= 0)
-		return;
-
 	/* We divide things into batches based on how we need to send them,
 	 * determined by udp_meta[i].splicesrc.  To avoid either two passes
 	 * through the array, or recalculating splicesrc for a single entry, we
-- 
2.45.2


  parent reply	other threads:[~2024-07-05 10:44 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-05 10:43 [PATCH v2 00/11] Preliminaries for UDP flow support David Gibson
2024-07-05 10:43 ` [PATCH v2 01/11] util: sock_l4() determine protocol from epoll type rather than the reverse David Gibson
2024-07-05 10:44 ` [PATCH v2 02/11] flow: Add flow_sidx_valid() helper David Gibson
2024-07-05 10:44 ` [PATCH v2 03/11] udp: Pass full epoll reference through more of sock handler path David Gibson
2024-07-05 10:44 ` [PATCH v2 04/11] udp: Rename IOV and mmsghdr arrays David Gibson
2024-07-05 10:44 ` [PATCH v2 05/11] udp: Unify udp[46]_mh_splice David Gibson
2024-07-05 10:44 ` [PATCH v2 06/11] udp: Unify udp[46]_l2_iov David Gibson
2024-07-05 10:44 ` [PATCH v2 07/11] udp: Don't repeatedly initialise udp[46]_eth_hdr David Gibson
2024-07-05 10:44 ` David Gibson [this message]
2024-07-05 10:44 ` [PATCH v2 09/11] udp: Consolidate datagram batching David Gibson
2024-07-05 10:44 ` [PATCH v2 10/11] doc: Add program to document and test assumptions about SO_REUSEADDR David Gibson
2024-07-12 11:42   ` David Taylor
2024-07-15  0:43     ` David Gibson
2024-07-05 10:44 ` [PATCH v2 11/11] doc: Test behaviour of zero length datagram recv()s David Gibson
2024-07-05 16:38 ` [PATCH v2 00/11] Preliminaries for UDP flow support Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240705104409.3847002-9-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).