public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH v10 3/3] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6()
Date: Wed, 20 May 2026 11:30:22 +1000	[thread overview]
Message-ID: <ag0OrrwICcBHJkSf@zatzit> (raw)
In-Reply-To: <20260519155613.3127607-4-lvivier@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 15008 bytes --]

On Tue, May 19, 2026 at 05:56:13PM +0200, Laurent Vivier wrote:
> Change udp_update_hdr4() and udp_update_hdr6() to take an iov_tail
> pointing at the UDP frame instead of a contiguous udp_payload_t buffer
> and explicit data length.  This lets vhost-user pass scatter-gather
> virtqueue buffers directly without an intermediate copy.
> 
> The UDP header is built into a local struct udphdr and written back with
> IOV_PUSH_HEADER().  On the tap side, udp_tap_prepare() wraps the
> existing udp_payload_t in a two-element iov to match the new interface.
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  iov.c          |   1 -
>  udp.c          |  74 +++++++++++++++++-----------------
>  udp_internal.h |   6 ++-
>  udp_vu.c       | 106 ++++++++++++++++++-------------------------------
>  4 files changed, 78 insertions(+), 109 deletions(-)

Lovely :).

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

Couple of cosmetic nits below, follow up at your discretion.

> 
> diff --git a/iov.c b/iov.c
> index 6a5d7d35b67f..9248ba95a9f2 100644
> --- a/iov.c
> +++ b/iov.c
> @@ -367,7 +367,6 @@ void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align)
>   *
>   * Return: number of bytes written
>   */
> -/* cppcheck-suppress unusedFunction */
>  size_t iov_push_header_(struct iov_tail *tail, const void *v, size_t len)
>  {
>  	size_t l;
> diff --git a/udp.c b/udp.c
> index 66dc7766868c..cfc7b2439881 100644
> --- a/udp.c
> +++ b/udp.c
> @@ -255,20 +255,22 @@ static void udp_iov_init(const struct ctx *c)
>  /**
>   * udp_update_hdr4() - Update headers for one IPv4 datagram
>   * @ip4h:		Pre-filled IPv4 header (except for tot_len and saddr)
> - * @bp:			Pointer to udp_payload_t to update
> + * @uh:			UDP header to fill
> + * @payload:		UDP payload
>   * @toside:		Flowside for destination side
>   * @dlen:		Length of UDP payload
>   * @no_udp_csum:	Do not set UDP checksum
>   *
> - * Return: size of IPv4 payload (UDP header + data)
> + * Return: size of datagram (UDP header + data)
>   */
> -size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
> +size_t udp_update_hdr4(struct iphdr *ip4h, struct udphdr *uh,
> +		       struct iov_tail *payload,
>  		       const struct flowside *toside, size_t dlen,
>  		       bool no_udp_csum)
>  {
>  	const struct in_addr *src = inany_v4(&toside->oaddr);
>  	const struct in_addr *dst = inany_v4(&toside->eaddr);
> -	size_t l4len = dlen + sizeof(bp->uh);
> +	size_t l4len = dlen + sizeof(*uh);
>  	size_t l3len = l4len + sizeof(*ip4h);
>  
>  	assert(src && dst);
> @@ -278,19 +280,13 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
>  	ip4h->saddr = src->s_addr;
>  	ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, *src, *dst);
>  
> -	bp->uh.source = htons(toside->oport);
> -	bp->uh.dest = htons(toside->eport);
> -	bp->uh.len = htons(l4len);
> -	if (no_udp_csum) {
> -		bp->uh.check = 0;
> -	} else {
> -		const struct iovec iov = {
> -			.iov_base = bp->data,
> -			.iov_len = dlen
> -		};
> -		struct iov_tail data = IOV_TAIL(&iov, 1, 0);
> -		csum_udp4(&bp->uh, *src, *dst, &data, dlen);
> -	}
> +	uh->source = htons(toside->oport);
> +	uh->dest = htons(toside->eport);
> +	uh->len = htons(l4len);
> +	if (no_udp_csum)
> +		uh->check = 0;
> +	else
> +		csum_udp4(uh, *src, *dst, payload, dlen);
>  
>  	return l4len;
>  }
> @@ -299,18 +295,20 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
>   * udp_update_hdr6() - Update headers for one IPv6 datagram
>   * @ip6h:		Pre-filled IPv6 header (except for payload_len and
>   * 			addresses)
> - * @bp:			Pointer to udp_payload_t to update
> + * @uh:			UDP header to fill
> + * @payload:		UDP payload
>   * @toside:		Flowside for destination side
>   * @dlen:		Length of UDP payload
>   * @no_udp_csum:	Do not set UDP checksum
>   *
> - * Return: size of IPv6 payload (UDP header + data)
> + * Return: size of datagram (UDP header + data)
>   */
> -size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
> +size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udphdr *uh,
> +		       struct iov_tail *payload,
>  		       const struct flowside *toside, size_t dlen,
>  		       bool no_udp_csum)
>  {
> -	uint16_t l4len = dlen + sizeof(bp->uh);
> +	uint16_t l4len = dlen + sizeof(*uh);
>  
>  	ip6h->payload_len = htons(l4len);
>  	ip6h->daddr = toside->eaddr.a6;
> @@ -319,23 +317,19 @@ size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
>  	ip6h->nexthdr = IPPROTO_UDP;
>  	ip6h->hop_limit = 255;
>  
> -	bp->uh.source = htons(toside->oport);
> -	bp->uh.dest = htons(toside->eport);
> -	bp->uh.len = ip6h->payload_len;
> +	uh->source = htons(toside->oport);
> +	uh->dest = htons(toside->eport);
> +	uh->len = htons(l4len);
> +
>  	if (no_udp_csum) {
>  		/* 0 is an invalid checksum for UDP IPv6 and dropped by
>  		 * the kernel stack, even if the checksum is disabled by virtio
>  		 * flags. We need to put any non-zero value here.
>  		 */
> -		bp->uh.check = 0xffff;
> +		uh->check = 0xffff;
>  	} else {
> -		const struct iovec iov = {
> -			.iov_base = bp->data,
> -			.iov_len = dlen
> -		};
> -		struct iov_tail data = IOV_TAIL(&iov, 1, 0);
> -		csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data,
> -			  dlen);
> +		csum_udp6(uh, &toside->oaddr.a6, &toside->eaddr.a6,
> +			  payload, dlen);
>  	}
>  
>  	return l4len;
> @@ -372,15 +366,20 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
>  			    bool no_udp_csum)
>  {
>  	struct iovec (*tap_iov)[UDP_NUM_IOVS] = &udp_l2_iov[idx];
> +	struct udphdr *uh = (*tap_iov)[UDP_IOV_PAYLOAD].iov_base;
> +	struct iov_tail payload = IOV_TAIL(&(*tap_iov)[UDP_IOV_PAYLOAD], 1,
> +					   sizeof(*uh));
>  	struct ethhdr *eh = (*tap_iov)[UDP_IOV_ETH].iov_base;
> -	struct udp_payload_t *bp = &udp_payload[idx];
>  	struct udp_meta_t *bm = &udp_meta[idx];
>  	size_t l4len, l2len;
>  
> +	l4len = sizeof(struct udphdr) + mmh[idx].msg_len;

Nit: sizeof(*uh).

> +	(*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
> +
>  	eth_update_mac(eh, NULL, tap_omac);
>  	if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) {
> -		l4len = udp_update_hdr6(&bm->ip6h, bp, toside,
> -					mmh[idx].msg_len, no_udp_csum);
> +		udp_update_hdr6(&bm->ip6h, uh, &payload, toside,
> +			        mmh[idx].msg_len, no_udp_csum);
>  
>  		l2len = MAX(l4len + sizeof(bm->ip6h) + ETH_HLEN, ETH_ZLEN);
>  		tap_hdr_update(&bm->taph, l2len);
> @@ -388,8 +387,8 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
>  		eh->h_proto = htons_constant(ETH_P_IPV6);
>  		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
>  	} else {
> -		l4len = udp_update_hdr4(&bm->ip4h, bp, toside,
> -					mmh[idx].msg_len, no_udp_csum);
> +		udp_update_hdr4(&bm->ip4h, uh, &payload, toside,
> +			        mmh[idx].msg_len, no_udp_csum);
>  
>  		l2len = MAX(l4len + sizeof(bm->ip4h) + ETH_HLEN, ETH_ZLEN);
>  		tap_hdr_update(&bm->taph, l2len);
> @@ -397,7 +396,6 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
>  		eh->h_proto = htons_constant(ETH_P_IP);
>  		(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip4h);
>  	}
> -	(*tap_iov)[UDP_IOV_PAYLOAD].iov_len = l4len;
>  
>  	udp_tap_pad(*tap_iov);
>  }
> diff --git a/udp_internal.h b/udp_internal.h
> index 64e457748324..361cc7495a01 100644
> --- a/udp_internal.h
> +++ b/udp_internal.h
> @@ -25,10 +25,12 @@ struct udp_payload_t {
>  } __attribute__ ((packed, aligned(__alignof__(unsigned int))));
>  #endif
>  
> -size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
> +size_t udp_update_hdr4(struct iphdr *ip4h, struct udphdr *uh,
> +		       struct iov_tail *payload,
>  		       const struct flowside *toside, size_t dlen,
>  		       bool no_udp_csum);
> -size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
> +size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udphdr *uh,
> +		       struct iov_tail *payload,
>  		       const struct flowside *toside, size_t dlen,
>  		       bool no_udp_csum);
>  void udp_sock_fwd(const struct ctx *c, int s, int rule_hint,
> diff --git a/udp_vu.c b/udp_vu.c
> index 74bf79d57969..888367ef4e16 100644
> --- a/udp_vu.c
> +++ b/udp_vu.c
> @@ -55,40 +55,33 @@ static size_t udp_vu_hdrlen(bool v6)
>  
>  /**
>   * udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers
> - * @iov:	IO vector for the frame (in/out)
> - * @cnt:	Number of available entries in @iov (input)
> - * 		Number of used entries in @iov to store the datagram (output)
> + * @payload:	UDP payload

Nit: "Buffer(s) for UDP payload" maybe?

> + * @cnt:	Number of used entries in @payload to store the datagram (output)
>   * 		Unchanged on failure
>   * @s:		Socket to receive from
> - * @v6:		Set for IPv6 connections
>   *
>   * Return: size of received data, -1 on error
>   */
> -static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6)
> +static ssize_t udp_vu_sock_recv(struct iov_tail *payload, size_t *cnt, int s)
>  {
>  	struct iovec msg_iov[VIRTQUEUE_MAX_SIZE];
>  	struct msghdr msg  = { 0 };
> -	struct iov_tail payload;
> -	size_t hdrlen, iov_used;
> +	size_t iov_used;
>  	ssize_t dlen;
>  
> -	/* compute L2 header length */
> -	hdrlen = udp_vu_hdrlen(v6);
> -
> -	payload = IOV_TAIL(iov, *cnt, hdrlen);
> -
>  	msg.msg_iov = msg_iov;
> -	msg.msg_iovlen = iov_tail_clone(msg.msg_iov, payload.cnt, &payload);
> +	msg.msg_iovlen = iov_tail_clone(msg.msg_iov, ARRAY_SIZE(msg_iov),
> +					payload);
>  
>  	/* read data from the socket */
>  	dlen = recvmsg(s, &msg, 0);
>  	if (dlen < 0)
>  		return -1;
>  
> -	iov_used = iov_skip_bytes(iov, *cnt,
> -				  MAX(dlen + hdrlen, VNET_HLEN + ETH_ZLEN),
> -				  NULL);
> -	if (iov_used < *cnt)
> +	iov_used = iov_skip_bytes(payload->iov, payload->cnt,
> +				  MAX(dlen + payload->off,
> +				      VNET_HLEN + ETH_ZLEN), NULL);
> +	if (iov_used < payload->cnt)
>  		iov_used++;
>  	*cnt = iov_used; /* one iovec per element */
>  
> @@ -98,69 +91,44 @@ static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6)
>  /**
>   * udp_vu_prepare() - Prepare the packet header
>   * @c:		Execution context
> - * @iov:	IO vector for the frame (including vnet header)
> + * @data:	IO vector tail for the L2 frame, on return points to the L4 header
> + * @payload:	UDP payload
>   * @toside:	Address information for one side of the flow
>   * @dlen:	Packet data length
>   */
> -static void udp_vu_prepare(const struct ctx *c, const struct iovec *iov,
> -			     const struct flowside *toside, ssize_t dlen)
> +static void udp_vu_prepare(const struct ctx *c, struct iov_tail *data,
> +			   struct iov_tail *payload,
> +			   const struct flowside *toside, size_t dlen)
>  {
> -	struct ethhdr *eh;
> +	bool ipv4 = inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr);
> +	struct ethhdr eh;
> +	struct udphdr uh;
>  
>  	/* ethernet header */
> -	eh = vu_eth(iov[0].iov_base);
> +	memcpy(eh.h_dest, c->guest_mac, sizeof(eh.h_dest));
> +	memcpy(eh.h_source, c->our_tap_mac, sizeof(eh.h_source));
>  
> -	memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
> -	memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
> +	if (ipv4)
> +		eh.h_proto = htons(ETH_P_IP);
> +	else
> +		eh.h_proto = htons(ETH_P_IPV6);
> +	IOV_PUSH_HEADER(data, eh);
>  
>  	/* initialize header */
> -	if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
> -		struct iphdr *iph = vu_ip(iov[0].iov_base);
> -		struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base);
> -
> -		eh->h_proto = htons(ETH_P_IP);
> +	if (ipv4) {
> +		struct iphdr iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
>  
> -		*iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
> +		udp_update_hdr4(&iph, &uh, payload, toside, dlen, !*c->pcap);
>  
> -		udp_update_hdr4(iph, bp, toside, dlen, true);
> +		IOV_PUSH_HEADER(data, iph);
>  	} else {
> -		struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base);
> -		struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base);
> +		struct ipv6hdr ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
>  
> -		eh->h_proto = htons(ETH_P_IPV6);
> +		udp_update_hdr6(&ip6h, &uh, payload, toside, dlen, !*c->pcap);
>  
> -		*ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
> -
> -		udp_update_hdr6(ip6h, bp, toside, dlen, true);
> -	}
> -}
> -
> -/**
> - * udp_vu_csum() - Calculate and set checksum for a UDP packet
> - * @toside:	Address information for one side of the flow
> - * @iov:	IO vector for the frame
> - * @cnt:	Number of IO vector entries
> - * @dlen:	Data length
> - */
> -static void udp_vu_csum(const struct flowside *toside, const struct iovec *iov,
> -			size_t cnt, size_t dlen)
> -{
> -	const struct in_addr *src4 = inany_v4(&toside->oaddr);
> -	const struct in_addr *dst4 = inany_v4(&toside->eaddr);
> -	char *base = iov[0].iov_base;
> -	struct udp_payload_t *bp;
> -	struct iov_tail data;
> -
> -	if (src4 && dst4) {
> -		bp = vu_payloadv4(base);
> -		data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
> -		csum_udp4(&bp->uh, *src4, *dst4, &data, dlen);
> -	} else {
> -		bp = vu_payloadv6(base);
> -		data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
> -		csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data,
> -			  dlen);
> +		IOV_PUSH_HEADER(data, ip6h);
>  	}
> +	IOV_PUSH_HEADER(data, uh);
>  }
>  
>  /**
> @@ -198,6 +166,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
>  
>  	for (i = 0; i < n; i++) {
>  		unsigned elem_cnt, elem_used, j, k;
> +		struct iov_tail payload;
>  		size_t iov_cnt;
>  		ssize_t dlen;
>  
> @@ -207,7 +176,8 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
>  		if (elem_cnt == 0)
>  			break;
>  
> -		dlen = udp_vu_sock_recv(iov_vu, &iov_cnt, s, v6);
> +		payload = IOV_TAIL(iov_vu, iov_cnt, hdrlen);
> +		dlen = udp_vu_sock_recv(&payload, &iov_cnt, s);
>  		if (dlen < 0) {
>  			vu_queue_rewind(vq, elem_cnt);
>  			break;
> @@ -227,9 +197,9 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
>  		vu_queue_rewind(vq, elem_cnt - elem_used);
>  
>  		if (iov_cnt > 0) {
> -			udp_vu_prepare(c, iov_vu, toside, dlen);
> +			struct iov_tail data = IOV_TAIL(iov_vu, iov_cnt, VNET_HLEN);
> +			udp_vu_prepare(c, &data, &payload, toside, dlen);
>  			if (*c->pcap) {
> -				udp_vu_csum(toside, iov_vu, iov_cnt, dlen);
>  				pcap_iov(iov_vu, iov_cnt, VNET_HLEN,
>  					 hdrlen + dlen - VNET_HLEN);
>  			}
> -- 
> 2.54.0
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

      reply	other threads:[~2026-05-20  1:30 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-19 15:56 [PATCH v10 0/3] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
2026-05-19 15:56 ` [PATCH v10 1/3] udp_vu: Allow virtqueue elements with multiple iovec entries Laurent Vivier
2026-05-19 15:56 ` [PATCH v10 2/3] iov: Introduce IOV_PUSH_HEADER() macro Laurent Vivier
2026-05-19 15:56 ` [PATCH v10 3/3] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6() Laurent Vivier
2026-05-20  1:30   ` David Gibson [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ag0OrrwICcBHJkSf@zatzit \
    --to=david@gibson.dropbear.id.au \
    --cc=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).