From: David Gibson <david@gibson.dropbear.id.au>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH v4 2/5] udp_vu: Use iov_tail to manage virtqueue buffers
Date: Tue, 24 Mar 2026 13:11:09 +1100 [thread overview]
Message-ID: <acHyvVBAF7XDpyWe@zatzit> (raw)
In-Reply-To: <20260323143151.538673-3-lvivier@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 9831 bytes --]
On Mon, Mar 23, 2026 at 03:31:48PM +0100, Laurent Vivier wrote:
> Replace direct iovec pointer arithmetic in UDP vhost-user handling with
> iov_tail operations.
>
> udp_vu_sock_recv() now takes an iov/cnt pair instead of using the
> file-scoped iov_vu array, and returns the data length rather than the
> iov count. Internally it uses IOV_TAIL() to create a view past the
> L2/L3/L4 headers, and iov_tail_clone() to build the recvmsg() iovec,
> removing the manual pointer offset and restore pattern.
>
> udp_vu_prepare() and udp_vu_csum() take a const struct iov_tail *
> instead of referencing iov_vu directly, making data flow explicit.
>
> udp_vu_csum() uses iov_drop_header() and IOV_REMOVE_HEADER() to locate
> the UDP header and payload, replacing manual offset calculations via
> vu_payloadv4()/vu_payloadv6().
>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Though a couple of nits below.
> ---
> udp_vu.c | 124 +++++++++++++++++++++++++++++++------------------------
> 1 file changed, 71 insertions(+), 53 deletions(-)
>
> diff --git a/udp_vu.c b/udp_vu.c
> index 6a1e1696b19f..6638cb0ddc5f 100644
> --- a/udp_vu.c
> +++ b/udp_vu.c
> @@ -59,21 +59,26 @@ static size_t udp_vu_hdrlen(bool v6)
> /**
> * udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers
> * @c: Execution context
> + * @iov: IO vector for the frame (in/out)
> + * @cnt: Number of IO vector entries (in/out)
> * @vq: virtqueue to use to receive data
> * @s: Socket to receive from
> * @v6: Set for IPv6 connections
> - * @dlen: Size of received data (output)
> *
> - * Return: number of iov entries used to store the datagram, 0 if the datagram
> + * Return: size of received data, 0 if the datagram
> * was discarded because the virtqueue is not ready, -1 on error
> */
> -static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
> - bool v6, ssize_t *dlen)
> +static ssize_t udp_vu_sock_recv(const struct ctx *c, struct iovec *iov,
> + size_t *cnt, unsigned *elem_used,
> + struct vu_virtq *vq, int s, bool v6)
> {
> const struct vu_dev *vdev = c->vdev;
> - int elem_cnt, elem_used, iov_used;
> struct msghdr msg = { 0 };
> - size_t iov_cnt, hdrlen;
> + struct iov_tail payload;
> + size_t hdrlen, iov_used;
> + unsigned elem_cnt;
> + unsigned i, j;
> + ssize_t dlen;
>
> assert(!c->no_udp);
>
> @@ -83,6 +88,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
> if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)
> debug_perror("Failed to discard datagram");
>
> + *cnt = 0;
> return 0;
> }
>
> @@ -90,67 +96,70 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
> hdrlen = udp_vu_hdrlen(v6);
>
> elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem),
> - iov_vu, ARRAY_SIZE(iov_vu), &iov_cnt,
> + iov, *cnt, &iov_used,
> IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL);
> if (elem_cnt == 0)
> return -1;
>
> - assert((size_t)elem_cnt == iov_cnt); /* one iovec per element */
> + assert((size_t)elem_cnt == iov_used); /* one iovec per element */
>
> - /* reserve space for the headers */
> - assert(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN));
> - iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen;
> - iov_vu[0].iov_len -= hdrlen;
> + payload = IOV_TAIL(iov, iov_used, hdrlen);
>
> - /* read data from the socket */
> - msg.msg_iov = iov_vu;
> - msg.msg_iovlen = iov_cnt;
> + struct iovec msg_iov[payload.cnt];
> + msg.msg_iov = msg_iov;
> + msg.msg_iovlen = iov_tail_clone(msg.msg_iov, payload.cnt, &payload);
>
> - *dlen = recvmsg(s, &msg, 0);
> - if (*dlen < 0) {
> + /* read data from the socket */
> + dlen = recvmsg(s, &msg, 0);
> + if (dlen < 0) {
> vu_queue_rewind(vq, elem_cnt);
> return -1;
> }
>
> - /* restore the pointer to the headers address */
> - iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen;
> - iov_vu[0].iov_len += hdrlen;
> + *cnt = vu_pad(iov, iov_used, 0, dlen + hdrlen);
>
> - iov_used = vu_pad(iov_vu, iov_cnt, 0, *dlen + hdrlen);
> - elem_used = iov_used; /* one iovec per element */
> + *elem_used = 0;
> + for (i = 0, j = 0; j < *cnt && i < elem_cnt; i++) {
Nit, this would be slightly easier to read with the i condition before
the j condition.
> + if (j + elem[i].in_num > *cnt)
> + elem[i].in_num = *cnt - j;
> + j += elem[i].in_num;
> + (*elem_used)++;
> + }
>
> - vu_set_vnethdr(iov_vu[0].iov_base, elem_used);
> + vu_set_vnethdr(iov[0].iov_base, *elem_used);
>
> /* release unused buffers */
> - vu_queue_rewind(vq, elem_cnt - elem_used);
> + vu_queue_rewind(vq, elem_cnt - *elem_used);
>
> - return iov_used;
> + return dlen;
> }
>
> /**
> * udp_vu_prepare() - Prepare the packet header
> * @c: Execution context
> + * @data: IO vector tail for the frame
> * @toside: Address information for one side of the flow
> * @dlen: Packet data length
> *
> * Return: Layer-4 length
> */
> -static size_t udp_vu_prepare(const struct ctx *c,
> +static size_t udp_vu_prepare(const struct ctx *c, const struct iov_tail *data,
> const struct flowside *toside, ssize_t dlen)
> {
> + const struct iovec *iov = data->iov;
> struct ethhdr *eh;
> size_t l4len;
>
> /* ethernet header */
> - eh = vu_eth(iov_vu[0].iov_base);
> + eh = vu_eth(iov[0].iov_base);
>
> memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
> memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
>
> /* initialize header */
> if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
> - struct iphdr *iph = vu_ip(iov_vu[0].iov_base);
> - struct udp_payload_t *bp = vu_payloadv4(iov_vu[0].iov_base);
> + struct iphdr *iph = vu_ip(iov[0].iov_base);
> + struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base);
>
> eh->h_proto = htons(ETH_P_IP);
>
> @@ -158,8 +167,8 @@ static size_t udp_vu_prepare(const struct ctx *c,
>
> l4len = udp_update_hdr4(iph, bp, toside, dlen, true);
> } else {
> - struct ipv6hdr *ip6h = vu_ip(iov_vu[0].iov_base);
> - struct udp_payload_t *bp = vu_payloadv6(iov_vu[0].iov_base);
> + struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base);
> + struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base);
>
> eh->h_proto = htons(ETH_P_IPV6);
>
> @@ -174,25 +183,29 @@ static size_t udp_vu_prepare(const struct ctx *c,
> /**
> * udp_vu_csum() - Calculate and set checksum for a UDP packet
> * @toside: Address information for one side of the flow
> - * @iov_used: Number of used iov_vu items
> + * @data: IO vector tail for the frame (including vnet header)
> */
> -static void udp_vu_csum(const struct flowside *toside, int iov_used)
> +static void udp_vu_csum(const struct flowside *toside,
> + const struct iov_tail *data)
> {
> const struct in_addr *src4 = inany_v4(&toside->oaddr);
> const struct in_addr *dst4 = inany_v4(&toside->eaddr);
> - char *base = iov_vu[0].iov_base;
> - struct udp_payload_t *bp;
> - struct iov_tail data;
> + struct iov_tail payload = *data;
> + struct udphdr *uh, uh_storage;
> + bool ipv4 = src4 && dst4;
> +
> + IOV_DROP_HEADER(&payload, struct virtio_net_hdr_mrg_rxbuf);
> + IOV_DROP_HEADER(&payload, struct ethhdr);
> + if (ipv4)
> + IOV_DROP_HEADER(&payload, struct iphdr);
> + else
> + IOV_DROP_HEADER(&payload, struct ipv6hdr);
> + uh = IOV_REMOVE_HEADER(&payload, uh_storage);
If uh_storage is actually used, this function is not correct (we never
write it back to the iov). That doesn't add any limitation that
wasn't already there, but it's arguably less obvious. I'm guessing
you address this later in the series, but maybe a FIXME comment?
> - if (src4 && dst4) {
> - bp = vu_payloadv4(base);
> - data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base);
> - csum_udp4(&bp->uh, *src4, *dst4, &data);
> - } else {
> - bp = vu_payloadv6(base);
> - data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base);
> - csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data);
> - }
> + if (ipv4)
> + csum_udp4(uh, *src4, *dst4, &payload);
> + else
> + csum_udp6(uh, &toside->oaddr.a6, &toside->eaddr.a6, &payload);
> }
>
> /**
> @@ -208,23 +221,28 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
> bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
> struct vu_dev *vdev = c->vdev;
> struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
> + struct iov_tail data;
> int i;
>
> for (i = 0; i < n; i++) {
> + unsigned elem_used;
> + size_t iov_cnt;
> ssize_t dlen;
> - int iov_used;
>
> - iov_used = udp_vu_sock_recv(c, vq, s, v6, &dlen);
> - if (iov_used < 0)
> + iov_cnt = ARRAY_SIZE(iov_vu);
> + dlen = udp_vu_sock_recv(c, iov_vu, &iov_cnt, &elem_used, vq,
> + s, v6);
> + if (dlen < 0)
> break;
>
> - if (iov_used > 0) {
> - udp_vu_prepare(c, toside, dlen);
> + if (iov_cnt > 0) {
> + data = IOV_TAIL(iov_vu, iov_cnt, 0);
> + udp_vu_prepare(c, &data, toside, dlen);
> if (*c->pcap) {
> - udp_vu_csum(toside, iov_used);
> - pcap_iov(iov_vu, iov_used, VNET_HLEN);
> + udp_vu_csum(toside, &data);
> + pcap_iov(data.iov, data.cnt, VNET_HLEN);
> }
> - vu_flush(vdev, vq, elem, iov_used);
> + vu_flush(vdev, vq, elem, elem_used);
> }
> }
> }
> --
> 2.53.0
>
--
David Gibson (he or they) | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you, not the other way
| around.
http://www.ozlabs.org/~dgibson
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
next prev parent reply other threads:[~2026-03-24 2:11 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-23 14:31 [PATCH v4 0/5] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
2026-03-23 14:31 ` [PATCH v4 1/5] vhost-user: Centralise Ethernet frame padding in vu_collect(), vu_pad() and vu_flush() Laurent Vivier
2026-03-24 1:56 ` David Gibson
2026-03-24 8:04 ` Laurent Vivier
2026-03-23 14:31 ` [PATCH v4 2/5] udp_vu: Use iov_tail to manage virtqueue buffers Laurent Vivier
2026-03-24 2:11 ` David Gibson [this message]
2026-03-23 14:31 ` [PATCH v4 3/5] udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller Laurent Vivier
2026-03-24 2:37 ` David Gibson
2026-03-23 14:31 ` [PATCH v4 4/5] iov: Add IOV_PUT_HEADER() and with_header() to write header data back to iov_tail Laurent Vivier
2026-03-24 2:41 ` David Gibson
2026-03-24 2:48 ` David Gibson
2026-03-24 7:44 ` Laurent Vivier
2026-03-24 23:46 ` David Gibson
2026-03-24 7:16 ` Laurent Vivier
2026-03-24 23:38 ` David Gibson
2026-03-23 14:31 ` [PATCH v4 5/5] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6() Laurent Vivier
2026-03-24 2:54 ` David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=acHyvVBAF7XDpyWe@zatzit \
--to=david@gibson.dropbear.id.au \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).