From: Stefano Brivio <sbrivio@redhat.com>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH v6 3/3] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6()
Date: Fri, 03 Apr 2026 13:53:34 +0200 (CEST) [thread overview]
Message-ID: <20260403135333.1dd11ad0@elisabeth> (raw)
In-Reply-To: <20260401192326.1783350-4-lvivier@redhat.com>
On Wed, 1 Apr 2026 21:23:26 +0200
Laurent Vivier <lvivier@redhat.com> wrote:
> Change udp_update_hdr4() and udp_update_hdr6() to take an iov_tail
> pointing at the UDP frame instead of a contiguous udp_payload_t buffer
> and explicit data length. This lets vhost-user pass scatter-gather
> virtqueue buffers directly without an intermediate copy.
>
> The UDP header is built into a local struct udphdr and written back with
> IOV_PUSH_HEADER(). On the tap side, udp_tap_prepare() wraps the
> existing udp_payload_t in a two-element iov to match the new interface.
>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
> iov.c | 1 -
> udp.c | 70 ++++++++++++++++++++++++-------------------
> udp_internal.h | 4 +--
> udp_vu.c | 81 +++++++++++++++++++++++++++-----------------------
> 4 files changed, 84 insertions(+), 72 deletions(-)
>
> diff --git a/iov.c b/iov.c
> index 6357c477bea6..d5fb4e81a502 100644
> --- a/iov.c
> +++ b/iov.c
> @@ -368,7 +368,6 @@ void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align)
> *
> * Return: number of bytes written
> */
> -/* cppcheck-suppress unusedFunction */
> size_t iov_push_header_(struct iov_tail *tail, const void *v, size_t len)
> {
> size_t l;
> diff --git a/udp.c b/udp.c
> index e113b26bc726..0035a645ded5 100644
> --- a/udp.c
> +++ b/udp.c
> @@ -255,21 +255,22 @@ static void udp_iov_init(const struct ctx *c)
> /**
> * udp_update_hdr4() - Update headers for one IPv4 datagram
> * @ip4h: Pre-filled IPv4 header (except for tot_len and saddr)
> - * @bp: Pointer to udp_payload_t to update
> + * @payload: iov_tail with UDP payload to update
> * @toside: Flowside for destination side
> * @dlen: Length of UDP payload
> * @no_udp_csum: Do not set UDP checksum
> *
> * Return: size of IPv4 payload (UDP header + data)
> */
> -size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
> +size_t udp_update_hdr4(struct iphdr *ip4h, struct iov_tail *payload,
> const struct flowside *toside, size_t dlen,
> bool no_udp_csum)
> {
> const struct in_addr *src = inany_v4(&toside->oaddr);
> const struct in_addr *dst = inany_v4(&toside->eaddr);
> - size_t l4len = dlen + sizeof(bp->uh);
> + size_t l4len = dlen + sizeof(struct udphdr);
> size_t l3len = l4len + sizeof(*ip4h);
> + struct udphdr uh;
>
> assert(src && dst);
>
> @@ -278,19 +279,18 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
> ip4h->saddr = src->s_addr;
> ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, *src, *dst);
>
> - bp->uh.source = htons(toside->oport);
> - bp->uh.dest = htons(toside->eport);
> - bp->uh.len = htons(l4len);
> + uh.source = htons(toside->oport);
> + uh.dest = htons(toside->eport);
> + uh.len = htons(l4len);
> if (no_udp_csum) {
> - bp->uh.check = 0;
> + uh.check = 0;
> } else {
> - const struct iovec iov = {
> - .iov_base = bp->data,
> - .iov_len = dlen
> - };
> - struct iov_tail data = IOV_TAIL(&iov, 1, 0);
> - csum_udp4(&bp->uh, *src, *dst, &data, l4len);
> + struct iov_tail data = *payload;
> +
> + IOV_DROP_HEADER(&data, struct udphdr);
> + csum_udp4(&uh, *src, *dst, &data, l4len);
> }
> + IOV_PUSH_HEADER(payload, uh);
>
> return l4len;
> }
> @@ -299,18 +299,19 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
> * udp_update_hdr6() - Update headers for one IPv6 datagram
> * @ip6h: Pre-filled IPv6 header (except for payload_len and
> * addresses)
> - * @bp: Pointer to udp_payload_t to update
> + * @payload: iov_tail with UDP payload to update
> * @toside: Flowside for destination side
> * @dlen: Length of UDP payload
> * @no_udp_csum: Do not set UDP checksum
> *
> * Return: size of IPv6 payload (UDP header + data)
> */
> -size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
> +size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct iov_tail *payload,
> const struct flowside *toside, size_t dlen,
> bool no_udp_csum)
> {
> - uint16_t l4len = dlen + sizeof(bp->uh);
> + uint16_t l4len = dlen + sizeof(struct udphdr);
> + struct udphdr uh;
>
> ip6h->payload_len = htons(l4len);
> ip6h->daddr = toside->eaddr.a6;
> @@ -319,24 +320,24 @@ size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
> ip6h->nexthdr = IPPROTO_UDP;
> ip6h->hop_limit = 255;
>
> - bp->uh.source = htons(toside->oport);
> - bp->uh.dest = htons(toside->eport);
> - bp->uh.len = ip6h->payload_len;
> + uh.source = htons(toside->oport);
> + uh.dest = htons(toside->eport);
> + uh.len = htons(l4len);
> if (no_udp_csum) {
> /* 0 is an invalid checksum for UDP IPv6 and dropped by
> - * the kernel stack, even if the checksum is disabled by virtio
> - * flags. We need to put any non-zero value here.
> + * the kernel stack, even if the checksum is disabled
> + * by virtio flags. We need to put any non-zero value
> + * here.
This part looks unrelated (and the old comment was fitting nicely).
> */
> - bp->uh.check = 0xffff;
> + uh.check = 0xffff;
> } else {
> - const struct iovec iov = {
> - .iov_base = bp->data,
> - .iov_len = dlen
> - };
> - struct iov_tail data = IOV_TAIL(&iov, 1, 0);
> - csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data,
> - l4len);
> + struct iov_tail data = *payload;
> +
> + IOV_DROP_HEADER(&data, struct udphdr);
> + csum_udp6(&uh, &toside->oaddr.a6, &toside->eaddr.a6,
> + &data, l4len);
> }
> + IOV_PUSH_HEADER(payload, uh);
>
> return l4len;
> }
> @@ -375,11 +376,18 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
> struct ethhdr *eh = (*tap_iov)[UDP_IOV_ETH].iov_base;
> struct udp_payload_t *bp = &udp_payload[idx];
> struct udp_meta_t *bm = &udp_meta[idx];
> + struct iovec iov[2];
> + struct iov_tail payload = IOV_TAIL(iov, ARRAY_SIZE(iov), 0);
> size_t l4len, l2len;
>
> + iov[0].iov_base = &bp->uh;
> + iov[0].iov_len = sizeof(bp->uh);
> + iov[1].iov_base = bp->data;
> + iov[1].iov_len = mmh[idx].msg_len;
> +
> eth_update_mac(eh, NULL, tap_omac);
> if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) {
> - l4len = udp_update_hdr6(&bm->ip6h, bp, toside,
> + l4len = udp_update_hdr6(&bm->ip6h, &payload, toside,
> mmh[idx].msg_len, no_udp_csum);
>
> l2len = MAX(l4len + sizeof(bm->ip6h) + ETH_HLEN, ETH_ZLEN);
> @@ -388,7 +396,7 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
> eh->h_proto = htons_constant(ETH_P_IPV6);
> (*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
> } else {
> - l4len = udp_update_hdr4(&bm->ip4h, bp, toside,
> + l4len = udp_update_hdr4(&bm->ip4h, &payload, toside,
> mmh[idx].msg_len, no_udp_csum);
>
> l2len = MAX(l4len + sizeof(bm->ip4h) + ETH_HLEN, ETH_ZLEN);
> diff --git a/udp_internal.h b/udp_internal.h
> index 64e457748324..e6cbaab79519 100644
> --- a/udp_internal.h
> +++ b/udp_internal.h
> @@ -25,10 +25,10 @@ struct udp_payload_t {
> } __attribute__ ((packed, aligned(__alignof__(unsigned int))));
> #endif
>
> -size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
> +size_t udp_update_hdr4(struct iphdr *ip4h, struct iov_tail *payload,
> const struct flowside *toside, size_t dlen,
> bool no_udp_csum);
> -size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
> +size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct iov_tail *payload,
> const struct flowside *toside, size_t dlen,
> bool no_udp_csum);
> void udp_sock_fwd(const struct ctx *c, int s, int rule_hint,
> diff --git a/udp_vu.c b/udp_vu.c
> index 5608a3a96ff5..5bc9509a1b98 100644
> --- a/udp_vu.c
> +++ b/udp_vu.c
> @@ -96,43 +96,53 @@ static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6)
> /**
> * udp_vu_prepare() - Prepare the packet header
> * @c: Execution context
> - * @iov: IO vector for the frame (including vnet header)
> + * @data: IO vector tail for the frame,
> + * on return, points to the L3 frame
I think "L3 header" or "IP header" is clearer than "L3 frame" (also
because... it's not a frame). By the way it fits in the usual way we
write this stuff:
* @data: IO vector tail for the frame, points to the L3 header on return
> * @toside: Address information for one side of the flow
> * @dlen: Packet data length
> *
> * Return: Layer-4 length
> */
> -static size_t udp_vu_prepare(const struct ctx *c, const struct iovec *iov,
> - const struct flowside *toside, ssize_t dlen)
> +static size_t udp_vu_prepare(const struct ctx *c, struct iov_tail *data,
> + const struct flowside *toside, size_t dlen)
> {
> - struct ethhdr *eh;
> + bool ipv4 = inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr);
> + struct ethhdr eh;
> size_t l4len;
>
> /* ethernet header */
> - eh = vu_eth(iov[0].iov_base);
> + memcpy(eh.h_dest, c->guest_mac, sizeof(eh.h_dest));
> + memcpy(eh.h_source, c->our_tap_mac, sizeof(eh.h_source));
>
> - memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
> - memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
> + if (ipv4)
> + eh.h_proto = htons(ETH_P_IP);
> + else
> + eh.h_proto = htons(ETH_P_IPV6);
> + IOV_PUSH_HEADER(data, eh);
>
> /* initialize header */
> - if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
> - struct iphdr *iph = vu_ip(iov[0].iov_base);
> - struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base);
> + if (ipv4) {
> + struct iov_tail udp_frame;
> + struct iphdr iph;
>
> - eh->h_proto = htons(ETH_P_IP);
> + iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
>
> - *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
> + udp_frame = *data;
> + IOV_DROP_HEADER(&udp_frame, struct iphdr);
> + l4len = udp_update_hdr4(&iph, &udp_frame, toside, dlen, true);
>
> - l4len = udp_update_hdr4(iph, bp, toside, dlen, true);
> + IOV_PUSH_HEADER(data, iph);
> } else {
> - struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base);
> - struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base);
> + struct iov_tail udp_frame;
> + struct ipv6hdr ip6h;
>
> - eh->h_proto = htons(ETH_P_IPV6);
> + ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
>
> - *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
> + udp_frame = *data;
> + IOV_DROP_HEADER(&udp_frame, struct ipv6hdr);
> + l4len = udp_update_hdr6(&ip6h, &udp_frame, toside, dlen, true);
>
> - l4len = udp_update_hdr6(ip6h, bp, toside, dlen, true);
> + IOV_PUSH_HEADER(data, ip6h);
> }
>
> return l4len;
> @@ -141,29 +151,23 @@ static size_t udp_vu_prepare(const struct ctx *c, const struct iovec *iov,
> /**
> * udp_vu_csum() - Calculate and set checksum for a UDP packet
> * @toside: Address information for one side of the flow
> - * @iov: IO vector for the frame
> - * @cnt: Number of IO vector entries
> + * @data: IO vector tail for the L3 frame
...meaning for the... packet, including IP headers?
> * @l4len: L4 length
> */
> -static void udp_vu_csum(const struct flowside *toside, const struct iovec *iov,
> - size_t cnt, size_t l4len)
> +static void udp_vu_csum(const struct flowside *toside, struct iov_tail *data,
> + size_t l4len)
> {
> const struct in_addr *src4 = inany_v4(&toside->oaddr);
> const struct in_addr *dst4 = inany_v4(&toside->eaddr);
> - char *base = iov[0].iov_base;
> - struct udp_payload_t *bp;
> - struct iov_tail data;
> -
> - if (src4 && dst4) {
> - bp = vu_payloadv4(base);
> - data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
> - csum_udp4(&bp->uh, *src4, *dst4, &data, l4len);
> - } else {
> - bp = vu_payloadv6(base);
> - data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
> - csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data,
> - l4len);
> - }
> + struct udphdr *uh, uh_storage;
> + bool ipv4 = src4 && dst4;
> +
> + uh = IOV_REMOVE_HEADER(data, uh_storage);
> +
> + if (ipv4)
> + csum_udp4(uh, *src4, *dst4, data, l4len);
> + else
> + csum_udp6(uh, &toside->oaddr.a6, &toside->eaddr.a6, data, l4len);
Coverity Scan doesn't like this:
/home/sbrivio/passt/udp_vu.c:168:3:
Type: Dereference null return value (NULL_RETURNS)
/home/sbrivio/passt/udp_vu.c:165:2: Call to null-returning function
1. returned_null: "iov_remove_header_" returns "NULL" (checked 4 out of 5 times).
/home/sbrivio/passt/iov.c:405:2: Call to null-returning function
1.1. path: Condition "!p", taking true branch.
/home/sbrivio/passt/iov.c:406:3:
1.2. return_null: Explicitly returning null.
/home/sbrivio/passt/udp_vu.c:165:2:
2. var_assigned: Assigning: "uh" = "NULL" return value from "iov_remove_header_".
/home/sbrivio/passt/udp_vu.c:167:2:
3. path: Condition "ipv4", taking true branch.
/home/sbrivio/passt/udp_vu.c:168:3:
4. dereference: Passing null pointer "uh" to "csum_udp4", which dereferences it.
/home/sbrivio/passt/checksum.c:192:2: Call to null-returning function
4.1. dereference: Dereferencing pointer "udp4hr".
/home/sbrivio/passt/arp.c:86:2: Examples where return value was checked for null
5. example_assign: Example 1: Assigning: "ah" = return value from "iov_remove_header_(data, &ah_storage, 8UL, 2UL)".
/home/sbrivio/passt/arp.c:88:2:
6. example_checked: Example 1 (cont.): "ah" has its value checked in "ah".
/home/sbrivio/passt/dhcp.c:327:2: Examples where return value was checked for null
7. example_assign: Example 2: Assigning: "uh" = return value from "iov_remove_header_(data, &uh_storage, 8UL, 2UL)".
/home/sbrivio/passt/dhcp.c:328:2:
8. example_checked: Example 2 (cont.): "uh" has its value checked in "uh".
/home/sbrivio/passt/dhcpv6.c:576:2: Examples where return value was checked for null
9. example_assign: Example 3: Assigning: "uh" = return value from "iov_remove_header_(data, &uh_storage, 8UL, 2UL)".
/home/sbrivio/passt/dhcpv6.c:577:2:
10. example_checked: Example 3 (cont.): "uh" has its value checked in "uh".
/home/sbrivio/passt/udp.c:1083:3: Examples where return value was checked for null
11. example_assign: Example 4: Assigning: "uh_send" = return value from "iov_remove_header_(&data, &uh_storage, 8UL, 2UL)".
/home/sbrivio/passt/udp.c:1084:3:
12. example_checked: Example 4 (cont.): "uh_send" has its value checked in "uh_send".
/home/sbrivio/passt/udp_vu.c:170:3:
Type: Dereference null return value (NULL_RETURNS)
/home/sbrivio/passt/udp_vu.c:165:2: Call to null-returning function
1. returned_null: "iov_remove_header_" returns "NULL" (checked 4 out of 5 times).
/home/sbrivio/passt/iov.c:405:2: Call to null-returning function
1.1. path: Condition "!p", taking true branch.
/home/sbrivio/passt/iov.c:406:3:
1.2. return_null: Explicitly returning null.
/home/sbrivio/passt/udp_vu.c:165:2:
2. var_assigned: Assigning: "uh" = "NULL" return value from "iov_remove_header_".
/home/sbrivio/passt/udp_vu.c:167:2:
3. path: Condition "ipv4", taking false branch.
/home/sbrivio/passt/udp_vu.c:170:3:
4. dereference: Passing null pointer "uh" to "csum_udp6", which dereferences it.
/home/sbrivio/passt/checksum.c:258:2: Call to null-returning function
4.1. dereference: Dereferencing pointer "udp6hr".
/home/sbrivio/passt/arp.c:86:2: Examples where return value was checked for null
5. example_assign: Example 1: Assigning: "ah" = return value from "iov_remove_header_(data, &ah_storage, 8UL, 2UL)".
/home/sbrivio/passt/arp.c:88:2:
6. example_checked: Example 1 (cont.): "ah" has its value checked in "ah".
/home/sbrivio/passt/dhcp.c:327:2: Examples where return value was checked for null
7. example_assign: Example 2: Assigning: "uh" = return value from "iov_remove_header_(data, &uh_storage, 8UL, 2UL)".
/home/sbrivio/passt/dhcp.c:328:2:
8. example_checked: Example 2 (cont.): "uh" has its value checked in "uh".
/home/sbrivio/passt/dhcpv6.c:576:2: Examples where return value was checked for null
9. example_assign: Example 3: Assigning: "uh" = return value from "iov_remove_header_(data, &uh_storage, 8UL, 2UL)".
/home/sbrivio/passt/dhcpv6.c:577:2:
10. example_checked: Example 3 (cont.): "uh" has its value checked in "uh".
/home/sbrivio/passt/udp.c:1083:3: Examples where return value was checked for null
11. example_assign: Example 4: Assigning: "uh_send" = return value from "iov_remove_header_(&data, &uh_storage, 8UL, 2UL)".
/home/sbrivio/passt/udp.c:1084:3:
12. example_checked: Example 4 (cont.): "uh_send" has its value checked in "uh_send".
I'm not sure if there's a path where we could *really* make iov_remove_header_()
return NULL (we just built the header and we should always have space for it at
this point, I guess), but maybe an explicit check would make this more robust
anyway.
> }
>
> /**
> @@ -228,9 +232,10 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
> vu_queue_rewind(vq, elem_cnt - elem_used);
>
> if (iov_cnt > 0) {
> - size_t l4len = udp_vu_prepare(c, iov_vu, toside, dlen);
> + struct iov_tail data = IOV_TAIL(iov_vu, iov_cnt, VNET_HLEN);
> + size_t l4len = udp_vu_prepare(c, &data, toside, dlen);
> if (*c->pcap) {
> - udp_vu_csum(toside, iov_vu, iov_cnt, l4len);
> + udp_vu_csum(toside, &data, l4len);
> pcap_iov(iov_vu, iov_cnt, VNET_HLEN,
> hdrlen + dlen - VNET_HLEN);
> }
The rest of this series looks good to me, the TCP one will probably
take me a bit longer to review (unlikely that I'll finish today).
--
Stefano
prev parent reply other threads:[~2026-04-03 11:53 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-01 19:23 [PATCH v6 0/3] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
2026-04-01 19:23 ` [PATCH v6 1/3] udp_vu: Allow virtqueue elements with multiple iovec entries Laurent Vivier
2026-04-03 11:53 ` Stefano Brivio
2026-04-03 15:18 ` Laurent Vivier
2026-04-03 16:59 ` Stefano Brivio
2026-04-03 17:14 ` Laurent Vivier
2026-04-01 19:23 ` [PATCH v6 2/3] iov: Introduce IOV_PUSH_HEADER() macro Laurent Vivier
2026-04-01 19:23 ` [PATCH v6 3/3] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6() Laurent Vivier
2026-04-03 11:53 ` Stefano Brivio [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260403135333.1dd11ad0@elisabeth \
--to=sbrivio@redhat.com \
--cc=lvivier@redhat.com \
--cc=passt-dev@passt.top \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).