public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH v2 04/13] udp_vu: Use iov_tail to manage virtqueue buffers
Date: Thu, 12 Mar 2026 13:38:40 +1100	[thread overview]
Message-ID: <abInMI9DGhsOmAUL@zatzit> (raw)
In-Reply-To: <20260309094744.1907754-5-lvivier@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 9242 bytes --]

On Mon, Mar 09, 2026 at 10:47:35AM +0100, Laurent Vivier wrote:
> Replace direct iovec pointer arithmetic in UDP vhost-user handling with
> iov_tail operations.
> 
> udp_vu_sock_recv() now takes an iov/cnt pair instead of using the
> file-scoped iov_vu array, and returns the data length rather than the
> iov count.  Internally it uses iov_drop_header() to skip past L2/L3/L4
> headers before receiving, and iov_tail_clone() to build the recvmsg()
> iovec, removing the manual pointer offset and restore pattern.
> 
> udp_vu_prepare() and udp_vu_csum() take a const struct iov_tail *
> instead of referencing iov_vu directly, making data flow explicit.
> 
> udp_vu_csum() uses iov_drop_header() and IOV_REMOVE_HEADER() to locate
> the UDP header and payload, replacing manual offset calculations via
> vu_payloadv4()/vu_payloadv6().
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>

Minor notes only.

> ---
>  udp_vu.c | 111 ++++++++++++++++++++++++++++---------------------------
>  1 file changed, 57 insertions(+), 54 deletions(-)
> 
> diff --git a/udp_vu.c b/udp_vu.c
> index 439f2cb399b7..a39254776099 100644
> --- a/udp_vu.c
> +++ b/udp_vu.c
> @@ -59,21 +59,25 @@ static size_t udp_vu_hdrlen(bool v6)
>  /**
>   * udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers
>   * @c:		Execution context
> + * @iov:	IO vector for the frame (modified on output)
> + * @cnt:	Number of IO vector entries (in/out)

Nit: "modified on output" and "in/out" are different ways of saying
the same thing, yes?

>   * @vq:		virtqueue to use to receive data
>   * @s:		Socket to receive from
>   * @v6:		Set for IPv6 connections
> - * @dlen:	Size of received data (output)
>   *
> - * Return: number of iov entries used to store the datagram, 0 if the datagram
> + * Return: size of received data, 0 if the datagram
>   *         was discarded because the virtqueue is not ready, -1 on error
>   */
> -static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
> -			    bool v6, ssize_t *dlen)
> +static ssize_t udp_vu_sock_recv(const struct ctx *c, struct iovec *iov,
> +				size_t *cnt, struct vu_virtq *vq, int s,
> +				bool v6)
>  {
>  	const struct vu_dev *vdev = c->vdev;
>  	struct msghdr msg  = { 0 };
> -	int iov_cnt, iov_used;
> +	struct iov_tail payload;
>  	size_t hdrlen;
> +	ssize_t dlen;
> +	int iov_cnt;
>  
>  	ASSERT(!c->no_udp);
>  
> @@ -83,78 +87,74 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
>  		if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)
>  			debug_perror("Failed to discard datagram");
>  
> +		*cnt = 0;
>  		return 0;
>  	}
>  
>  	/* compute L2 header length */
>  	hdrlen = udp_vu_hdrlen(v6);
>  
> -	vu_init_elem(elem, iov_vu, ARRAY_SIZE(elem));
> +	vu_init_elem(elem, iov, *cnt);
>  
>  	iov_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem),
>  			     IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL);
>  	if (iov_cnt == 0)
>  		return -1;
>  
> -	/* reserve space for the headers */
> -	ASSERT(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN));
> +	payload = IOV_TAIL(iov, iov_cnt, hdrlen);
>  
> -	iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen;
> -	iov_vu[0].iov_len -= hdrlen;
> +	struct iovec msg_iov[payload.cnt];

We generally avoid inline declarations, although C11 does allow them.

> +	msg.msg_iov = msg_iov;
> +	msg.msg_iovlen = iov_tail_clone(msg.msg_iov, payload.cnt, &payload);
>  	/* read data from the socket */
> -	msg.msg_iov = iov_vu;
> -	msg.msg_iovlen = iov_cnt;
> -
> -	*dlen = recvmsg(s, &msg, 0);
> -	if (*dlen < 0) {
> +	dlen = recvmsg(s, &msg, 0);
> +	if (dlen < 0) {
>  		vu_queue_rewind(vq, iov_cnt);
>  		return -1;
>  	}
>  
> -	/* restore the pointer to the headers address */
> -	iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen;
> -	iov_vu[0].iov_len += hdrlen;
> -
>  	/* Pad short frames to ETH_ZLEN */
> -	if (ETH_ZLEN + VNET_HLEN > *dlen + hdrlen) {
> -		iov_memset(iov_vu, iov_cnt, *dlen + hdrlen, 0,
> -			   ETH_ZLEN + VNET_HLEN - (*dlen + hdrlen));
> +	if (ETH_ZLEN + VNET_HLEN > dlen + hdrlen) {
> +		iov_memset(iov, iov_cnt, dlen + hdrlen, 0,
> +			   ETH_ZLEN + VNET_HLEN - (dlen + hdrlen));
>  	}
> -	iov_used = iov_truncate(iov_vu, iov_cnt, *dlen + hdrlen);
> +	*cnt = iov_truncate(iov, iov_cnt, dlen + hdrlen);
> -	vu_set_vnethdr(iov_vu[0].iov_base, iov_used);
> +	vu_set_vnethdr(iov[0].iov_base, *cnt);
>  
>  	/* release unused buffers */
> -	vu_queue_rewind(vq, iov_cnt - iov_used);
> +	vu_queue_rewind(vq, iov_cnt - *cnt);
>  
> -	return iov_used;
> +	return dlen;
>  }
>  
>  /**
>   * udp_vu_prepare() - Prepare the packet header
>   * @c:		Execution context
> + * @data:	IO vector tail for the frame
>   * @toside:	Address information for one side of the flow
>   * @dlen:	Packet data length
>   *
>   * Return: Layer-4 length
>   */
> -static size_t udp_vu_prepare(const struct ctx *c,
> +static size_t udp_vu_prepare(const struct ctx *c, const struct iov_tail *data,
>  			     const struct flowside *toside, ssize_t dlen)
>  {
> +	const struct iovec *iov = data->iov;
>  	struct ethhdr *eh;
>  	size_t l4len;
>  
>  	/* ethernet header */
> -	eh = vu_eth(iov_vu[0].iov_base);
> +	eh = vu_eth(iov[0].iov_base);
>  
>  	memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
>  	memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
>  
>  	/* initialize header */
>  	if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
> -		struct iphdr *iph = vu_ip(iov_vu[0].iov_base);
> -		struct udp_payload_t *bp = vu_payloadv4(iov_vu[0].iov_base);
> +		struct iphdr *iph = vu_ip(iov[0].iov_base);
> +		struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base);
>  
>  		eh->h_proto = htons(ETH_P_IP);
>  
> @@ -162,8 +162,8 @@ static size_t udp_vu_prepare(const struct ctx *c,
>  
>  		l4len = udp_update_hdr4(iph, bp, toside, dlen, true);
>  	} else {
> -		struct ipv6hdr *ip6h = vu_ip(iov_vu[0].iov_base);
> -		struct udp_payload_t *bp = vu_payloadv6(iov_vu[0].iov_base);
> +		struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base);
> +		struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base);
>  
>  		eh->h_proto = htons(ETH_P_IPV6);
>  
> @@ -178,25 +178,25 @@ static size_t udp_vu_prepare(const struct ctx *c,
>  /**
>   * udp_vu_csum() - Calculate and set checksum for a UDP packet
>   * @toside:	Address information for one side of the flow
> - * @iov_used:	Number of used iov_vu items
> + * @data:	IO vector tail for the frame

With or without the VU header?

>   */
> -static void udp_vu_csum(const struct flowside *toside, int iov_used)
> +static void udp_vu_csum(const struct flowside *toside,
> +			const struct iov_tail *data)
>  {
>  	const struct in_addr *src4 = inany_v4(&toside->oaddr);
>  	const struct in_addr *dst4 = inany_v4(&toside->eaddr);
> -	char *base = iov_vu[0].iov_base;
> -	struct udp_payload_t *bp;
> -	struct iov_tail data;
> +	struct iov_tail payload = *data;
> +	struct udphdr *uh, uh_storage;
> +	bool ipv4 = src4 && dst4;
>  
> -	if (src4 && dst4) {
> -		bp = vu_payloadv4(base);
> -		data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base);
> -		csum_udp4(&bp->uh, *src4, *dst4, &data);
> -	} else {
> -		bp = vu_payloadv6(base);
> -		data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base);
> -		csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data);
> -	}
> +	iov_drop_header(&payload,
> +			udp_vu_hdrlen(!ipv4) - sizeof(struct udphdr));

This construction is a bit awkward, better to IOV_DROP_HEADER() on the
ethernet, then the IP header?

> +	uh = IOV_REMOVE_HEADER(&payload, uh_storage);
> +
> +	if (ipv4)
> +		csum_udp4(uh, *src4, *dst4, &payload);
> +	else
> +		csum_udp6(uh, &toside->oaddr.a6, &toside->eaddr.a6, &payload);
>  }
>  
>  /**
> @@ -212,23 +212,26 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
>  	bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
>  	struct vu_dev *vdev = c->vdev;
>  	struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
> +	struct iov_tail data;
>  	int i;
>  
>  	for (i = 0; i < n; i++) {
> +		size_t iov_cnt;
>  		ssize_t dlen;
> -		int iov_used;
>  
> -		iov_used = udp_vu_sock_recv(c, vq, s, v6, &dlen);
> -		if (iov_used < 0)
> +		iov_cnt = VIRTQUEUE_MAX_SIZE;
> +		dlen = udp_vu_sock_recv(c, iov_vu, &iov_cnt, vq, s, v6);
> +		if (dlen < 0)
>  			break;
>  
> -		if (iov_used > 0) {
> -			udp_vu_prepare(c, toside, dlen);
> +		if (iov_cnt > 0) {
> +			data = IOV_TAIL(iov_vu, iov_cnt, 0);
> +			udp_vu_prepare(c, &data, toside, dlen);
>  			if (*c->pcap) {
> -				udp_vu_csum(toside, iov_used);
> -				pcap_iov(iov_vu, iov_used, VNET_HLEN);
> +				udp_vu_csum(toside, &data);
> +				pcap_iov(data.iov, data.cnt, VNET_HLEN);
>  			}
> -			vu_flush(vdev, vq, elem, iov_used);
> +			vu_flush(vdev, vq, elem, data.cnt);
>  		}
>  	}
>  }
> -- 
> 2.53.0
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  reply	other threads:[~2026-03-12  2:40 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-09  9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
2026-03-09  9:47 ` [PATCH v2 01/13] iov: Add iov_truncate() helper and use it in vu handlers Laurent Vivier
2026-03-09  9:47 ` [PATCH v2 02/13] vhost-user: Centralise 802.3 frame padding in vu_collect() and vu_flush() Laurent Vivier
2026-03-12  2:05   ` David Gibson
2026-03-09  9:47 ` [PATCH v2 03/13] vhost-user: Use ARRAY_SIZE(elem) instead of VIRTQUEUE_MAX_SIZE Laurent Vivier
2026-03-09  9:47 ` [PATCH v2 04/13] udp_vu: Use iov_tail to manage virtqueue buffers Laurent Vivier
2026-03-12  2:38   ` David Gibson [this message]
2026-03-09  9:47 ` [PATCH v2 05/13] udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller Laurent Vivier
2026-03-12  3:44   ` David Gibson
2026-03-09  9:47 ` [PATCH v2 06/13] iov: Add IOV_PUT_HEADER() to write header data back to iov_tail Laurent Vivier
2026-03-12  4:12   ` David Gibson
2026-03-12  8:20     ` Laurent Vivier
2026-03-09  9:47 ` [PATCH v2 07/13] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6() Laurent Vivier
2026-03-12  4:16   ` David Gibson
2026-03-09  9:47 ` [PATCH v2 08/13] udp_vu: Use iov_tail in udp_vu_prepare() Laurent Vivier
2026-03-12  4:30   ` David Gibson
2026-03-12  8:19     ` Laurent Vivier
2026-03-12  9:51       ` David Gibson
2026-03-09  9:47 ` [PATCH v2 09/13] vu_common: Pass iov_tail to vu_set_vnethdr() Laurent Vivier
2026-03-12  4:34   ` David Gibson
2026-03-09  9:47 ` [PATCH v2 10/13] vu_common: Accept explicit iovec counts in vu_set_element() Laurent Vivier
2026-03-09  9:47 ` [PATCH v2 11/13] vu_common: Accept explicit iovec count per element in vu_init_elem() Laurent Vivier
2026-03-09  9:47 ` [PATCH v2 12/13] vu_common: Prepare to use multibuffer with guest RX Laurent Vivier
2026-03-09  9:47 ` [PATCH v2 13/13] vhost-user,udp: Use 2 iovec entries per element Laurent Vivier
2026-03-12  4:39   ` David Gibson
2026-03-12  8:08     ` Laurent Vivier
2026-03-12  9:47       ` David Gibson
2026-03-12 10:42         ` Laurent Vivier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=abInMI9DGhsOmAUL@zatzit \
    --to=david@gibson.dropbear.id.au \
    --cc=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).