From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mail.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3]) by passt.top (Postfix) with ESMTPS id 1B4775A004E for ; Fri, 05 Jul 2024 12:44:27 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gibson.dropbear.id.au; s=202312; t=1720176252; bh=2geuS4+emzaV2Z0FYz03ZzKLSKXJaFcAcIq3zo+fFjs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Y0yWRpCgM9+ez/eG38kOihmPVN2PbePL1Jdmg3BNMpYlqNPOKQAlf4/3qPHcvceVd OojlKC2E2JF8WTnmt1pO3SSthn4VoQoK+ZmrxWRuzWhPjunbLFxGSq40wuhDJ7ofAD M2aDLE6jTrq0qyFwSsbVKPZ5PkOo+SlguZ95GFcEumdsytXUZ/pXN9DbJ7DIbSfxb5 i4yl+GVj2/4wsPejTEwKVKl3T+egRBhcjefg2F6H/DO2Umsog0e/yQQ3qisHjUpMcG 42ASESNO1JocXlryD9P3pedE51BvSSWr1xtg/goTv7vnDiKM0aTluY+SfP8p3p+qFe +JIXNrcORF7fg== Received: by gandalf.ozlabs.org (Postfix, from userid 1007) id 4WFqrX0WD2z4xFg; Fri, 5 Jul 2024 20:44:12 +1000 (AEST) From: David Gibson To: passt-dev@passt.top, Stefano Brivio Subject: [PATCH v2 09/11] udp: Consolidate datagram batching Date: Fri, 5 Jul 2024 20:44:07 +1000 Message-ID: <20240705104409.3847002-10-david@gibson.dropbear.id.au> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240705104409.3847002-1-david@gibson.dropbear.id.au> References: <20240705104409.3847002-1-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Message-ID-Hash: HYWGV72LKATKBDLJTUHANCSVH3XR275F X-Message-ID-Hash: HYWGV72LKATKBDLJTUHANCSVH3XR275F X-MailFrom: dgibson@gandalf.ozlabs.org X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: David Gibson X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: When we receive datagrams on a socket, we need to split them into batches depending on how they need to be forwarded (either via a specific splice socket, or via tap). The logic to do this, is somewhat awkwardly split between udp_buf_sock_handler() itself, udp_splice_send() and udp_tap_send(). Move all the batching logic into udp_buf_sock_handler(), leaving udp_splice_send() to just send the prepared batch. udp_tap_send() reduces to just a call to tap_send_frames() so open-code that call in udp_buf_sock_handler(). This will allow separating the batching logic from the rest of the datagram forwarding logic, which we'll need for upcoming flow table support. Signed-off-by: David Gibson --- udp.c | 132 +++++++++++++++++++--------------------------------------- 1 file changed, 42 insertions(+), 90 deletions(-) diff --git a/udp.c b/udp.c index af5f23f0..dee402f7 100644 --- a/udp.c +++ b/udp.c @@ -501,42 +501,29 @@ static void udp_splice_prepare(struct mmsghdr *mmh, unsigned idx) } /** - * udp_splice_send() - Send datagrams from socket to socket + * udp_splice_send() - Send a batch of datagrams from socket to socket * @c: Execution context - * @start: Index of first datagram in udp[46]_l2_buf - * @n: Total number of datagrams in udp[46]_l2_buf pool - * @dst: Datagrams will be sent to this port (on destination side) + * @start: Index of batch's first datagram in udp[46]_l2_buf + * @n: Number of datagrams in batch + * @src: Source port for datagram (target side) + * @dst: Destination port for datagrams (target side) * @ref: epoll reference for origin socket * @now: Timestamp - * - * This consumes as many datagrams as are sendable via a single socket. It - * requires that udp_meta[@start].splicesrc is initialised, and will initialise - * udp_meta[].splicesrc for each datagram it consumes *and one more* (if - * present). - * - * Return: Number of datagrams forwarded */ -static unsigned udp_splice_send(const struct ctx *c, size_t start, size_t n, - in_port_t dst, union epoll_ref ref, - const struct timespec *now) +static void udp_splice_send(const struct ctx *c, size_t start, size_t n, + in_port_t src, in_port_t dst, + union epoll_ref ref, + const struct timespec *now) { - in_port_t src = udp_meta[start].splicesrc; - struct mmsghdr *mmh_recv; - unsigned int i = start; int s; - ASSERT(udp_meta[start].splicesrc >= 0); - ASSERT(ref.type == EPOLL_TYPE_UDP); - if (ref.udp.v6) { - mmh_recv = udp6_mh_recv; udp_splice_to.sa6 = (struct sockaddr_in6) { .sin6_family = AF_INET6, .sin6_addr = in6addr_loopback, .sin6_port = htons(dst), }; } else { - mmh_recv = udp4_mh_recv; udp_splice_to.sa4 = (struct sockaddr_in) { .sin_family = AF_INET, .sin_addr = in4addr_loopback, @@ -544,15 +531,6 @@ static unsigned udp_splice_send(const struct ctx *c, size_t start, size_t n, }; } - do { - udp_splice_prepare(mmh_recv, i); - - if (++i >= n) - break; - - udp_meta[i].splicesrc = udp_mmh_splice_port(ref, &mmh_recv[i]); - } while (udp_meta[i].splicesrc == src); - if (ref.udp.pif == PIF_SPLICE) { src += c->udp.fwd_in.rdelta[src]; s = udp_splice_init[ref.udp.v6][src].sock; @@ -560,7 +538,7 @@ static unsigned udp_splice_send(const struct ctx *c, size_t start, size_t n, s = udp_splice_new(c, ref.udp.v6, src, false); if (s < 0) - goto out; + return; udp_splice_ns[ref.udp.v6][dst].ts = now->tv_sec; udp_splice_init[ref.udp.v6][src].ts = now->tv_sec; @@ -577,15 +555,13 @@ static unsigned udp_splice_send(const struct ctx *c, size_t start, size_t n, s = arg.s; } if (s < 0) - goto out; + return; udp_splice_init[ref.udp.v6][dst].ts = now->tv_sec; udp_splice_ns[ref.udp.v6][src].ts = now->tv_sec; } - sendmmsg(s, udp_mh_splice + start, i - start, MSG_NOSIGNAL); -out: - return i - start; + sendmmsg(s, udp_mh_splice + start, n, MSG_NOSIGNAL); } /** @@ -725,7 +701,7 @@ static size_t udp_update_hdr6(const struct ctx *c, * @v6: Prepare for IPv6? * @now: Current timestamp */ -static void udp_tap_prepare(const struct ctx *c, struct mmsghdr *mmh, +static void udp_tap_prepare(const struct ctx *c, const struct mmsghdr *mmh, unsigned idx, in_port_t dstport, bool v6, const struct timespec *now) { @@ -752,49 +728,6 @@ static void udp_tap_prepare(const struct ctx *c, struct mmsghdr *mmh, (*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len; } -/** - * udp_tap_send() - Prepare UDP datagrams and send to tap interface - * @c: Execution context - * @start: Index of first datagram in udp[46]_l2_buf pool - * @n: Total number of datagrams in udp[46]_l2_buf pool - * @dstport: Destination port number on destination side - * @ref: epoll reference for origin socket - * @now: Current timestamp - * - * This consumes as many frames as are sendable via tap. It requires that - * udp_meta[@start].splicesrc is initialised, and will initialise - * udp_meta[].splicesrc for each frame it consumes *and one more* (if present). - * - * Return: Number of frames sent via tap - */ -static unsigned udp_tap_send(const struct ctx *c, size_t start, size_t n, - in_port_t dstport, union epoll_ref ref, - const struct timespec *now) -{ - struct mmsghdr *mmh_recv; - size_t i = start; - - ASSERT(udp_meta[start].splicesrc == -1); - ASSERT(ref.type == EPOLL_TYPE_UDP); - - if (ref.udp.v6) - mmh_recv = udp6_mh_recv; - else - mmh_recv = udp4_mh_recv; - - do { - udp_tap_prepare(c, mmh_recv, i, dstport, ref.udp.v6, now); - - if (++i >= n) - break; - - udp_meta[i].splicesrc = udp_mmh_splice_port(ref, &mmh_recv[i]); - } while (udp_meta[i].splicesrc == -1); - - tap_send_frames(c, &udp_l2_iov[start][0], UDP_NUM_IOVS, i - start); - return i - start; -} - /** * udp_sock_recv() - Receive datagrams from a socket * @c: Execution context @@ -842,7 +775,7 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve { struct mmsghdr *mmh_recv = ref.udp.v6 ? udp6_mh_recv : udp4_mh_recv; in_port_t dstport = ref.udp.port; - int n, m, i; + int n, i; if ((n = udp_sock_recv(c, ref.fd, events, mmh_recv)) <= 0) return; @@ -852,19 +785,38 @@ void udp_buf_sock_handler(const struct ctx *c, union epoll_ref ref, uint32_t eve else if (ref.udp.pif == PIF_HOST) dstport += c->udp.fwd_in.f.delta[dstport]; - /* We divide things into batches based on how we need to send them, + /* We divide datagrams into batches based on how we need to send them, * determined by udp_meta[i].splicesrc. To avoid either two passes * through the array, or recalculating splicesrc for a single entry, we - * have to populate it one entry *ahead* of the loop counter (if - * present). So we fill in entry 0 before the loop, then udp_*_send() - * populate one entry past where they consume. + * have to populate it one entry *ahead* of the loop counter. */ udp_meta[0].splicesrc = udp_mmh_splice_port(ref, mmh_recv); - for (i = 0; i < n; i += m) { - if (udp_meta[i].splicesrc >= 0) - m = udp_splice_send(c, i, n, dstport, ref, now); - else - m = udp_tap_send(c, i, n, dstport, ref, now); + for (i = 0; i < n; ) { + int batchsrc = udp_meta[i].splicesrc; + int batchstart = i; + + do { + if (batchsrc >= 0) { + udp_splice_prepare(mmh_recv, i); + } else { + udp_tap_prepare(c, mmh_recv, i, dstport, + ref.udp.v6, now); + } + + if (++i >= n) + break; + + udp_meta[i].splicesrc = udp_mmh_splice_port(ref, + &mmh_recv[i]); + } while (udp_meta[i].splicesrc == batchsrc); + + if (batchsrc >= 0) { + udp_splice_send(c, batchstart, i - batchstart, + batchsrc, dstport, ref, now); + } else { + tap_send_frames(c, &udp_l2_iov[batchstart][0], + UDP_NUM_IOVS, i - batchstart); + } } } -- 2.45.2