* [PATCH v8 1/3] udp_vu: Allow virtqueue elements with multiple iovec entries
2026-04-16 16:09 [PATCH v8 0/3] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
@ 2026-04-16 16:09 ` Laurent Vivier
2026-04-16 16:09 ` [PATCH v8 2/3] iov: Introduce IOV_PUSH_HEADER() macro Laurent Vivier
2026-04-16 16:09 ` [PATCH v8 3/3] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6() Laurent Vivier
2 siblings, 0 replies; 4+ messages in thread
From: Laurent Vivier @ 2026-04-16 16:09 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier, David Gibson
The previous code assumed a 1:1 mapping between virtqueue elements and
iovec entries (enforced by an assert). Drop that assumption to allow
elements that span multiple iovecs: track elem_used separately by
walking the element list against the iov count returned after padding.
This also fixes vu_queue_rewind() and vu_flush() to use the element
count rather than the iov count.
Use iov_tail_clone() in udp_vu_sock_recv() to handle header offset,
replacing the manual base/len adjustment and restore pattern.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
udp_vu.c | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/udp_vu.c b/udp_vu.c
index 5464917a0153..ef8e60cc390a 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -66,30 +66,25 @@ static size_t udp_vu_hdrlen(bool v6)
*/
static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6)
{
+ struct iovec msg_iov[VIRTQUEUE_MAX_SIZE];
struct msghdr msg = { 0 };
+ struct iov_tail payload;
size_t hdrlen, iov_used;
ssize_t dlen;
/* compute L2 header length */
hdrlen = udp_vu_hdrlen(v6);
- /* reserve space for the headers */
- assert(iov[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN));
- iov[0].iov_base = (char *)iov[0].iov_base + hdrlen;
- iov[0].iov_len -= hdrlen;
+ payload = IOV_TAIL(iov, *cnt, hdrlen);
- /* read data from the socket */
- msg.msg_iov = iov;
- msg.msg_iovlen = *cnt;
+ msg.msg_iov = msg_iov;
+ msg.msg_iovlen = iov_tail_clone(msg.msg_iov, payload.cnt, &payload);
+ /* read data from the socket */
dlen = recvmsg(s, &msg, 0);
if (dlen < 0)
return -1;
- /* restore the pointer to the headers address */
- iov[0].iov_base = (char *)iov[0].iov_base - hdrlen;
- iov[0].iov_len += hdrlen;
-
iov_used = iov_skip_bytes(iov, *cnt,
MAX(dlen + hdrlen, VNET_HLEN + ETH_ZLEN),
NULL);
@@ -202,7 +197,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
}
for (i = 0; i < n; i++) {
- unsigned elem_cnt, elem_used;
+ unsigned elem_cnt, elem_used, j, k;
size_t iov_cnt;
ssize_t dlen;
@@ -212,15 +207,21 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
if (elem_cnt == 0)
break;
- assert((size_t)elem_cnt == iov_cnt); /* one iovec per element */
-
dlen = udp_vu_sock_recv(iov_vu, &iov_cnt, s, v6);
if (dlen < 0) {
vu_queue_rewind(vq, elem_cnt);
break;
}
- elem_used = iov_cnt; /* one iovec per element */
+ elem_used = 0;
+ for (j = 0, k = 0; k < iov_cnt && j < elem_cnt; j++) {
+ size_t iov_still_needed = iov_cnt - k;
+
+ if (elem[j].in_num > iov_still_needed)
+ elem[j].in_num = iov_still_needed;
+ k += elem[j].in_num;
+ elem_used++;
+ }
/* release unused buffers */
vu_queue_rewind(vq, elem_cnt - elem_used);
--
2.53.0
^ permalink raw reply [flat|nested] 4+ messages in thread* [PATCH v8 3/3] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6()
2026-04-16 16:09 [PATCH v8 0/3] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
2026-04-16 16:09 ` [PATCH v8 1/3] udp_vu: Allow virtqueue elements with multiple iovec entries Laurent Vivier
2026-04-16 16:09 ` [PATCH v8 2/3] iov: Introduce IOV_PUSH_HEADER() macro Laurent Vivier
@ 2026-04-16 16:09 ` Laurent Vivier
2 siblings, 0 replies; 4+ messages in thread
From: Laurent Vivier @ 2026-04-16 16:09 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Change udp_update_hdr4() and udp_update_hdr6() to take an iov_tail
pointing at the UDP frame instead of a contiguous udp_payload_t buffer
and explicit data length. This lets vhost-user pass scatter-gather
virtqueue buffers directly without an intermediate copy.
The UDP header is built into a local struct udphdr and written back with
IOV_PUSH_HEADER(). On the tap side, udp_tap_prepare() wraps the
existing udp_payload_t in a two-element iov to match the new interface.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
iov.c | 1 -
udp.c | 70 +++++++++++++++++++++++------------------
udp_internal.h | 4 +--
udp_vu.c | 85 ++++++++++++++++++++++++++------------------------
4 files changed, 86 insertions(+), 74 deletions(-)
diff --git a/iov.c b/iov.c
index b1bcdc4649df..c0d9c6d21322 100644
--- a/iov.c
+++ b/iov.c
@@ -368,7 +368,6 @@ void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align)
*
* Return: number of bytes written
*/
-/* cppcheck-suppress unusedFunction */
size_t iov_push_header_(struct iov_tail *tail, const void *v, size_t len)
{
size_t l;
diff --git a/udp.c b/udp.c
index 4eef10854d8a..536b9a74dd15 100644
--- a/udp.c
+++ b/udp.c
@@ -255,20 +255,21 @@ static void udp_iov_init(const struct ctx *c)
/**
* udp_update_hdr4() - Update headers for one IPv4 datagram
* @ip4h: Pre-filled IPv4 header (except for tot_len and saddr)
- * @bp: Pointer to udp_payload_t to update
+ * @payload: iov_tail with datagram to update
* @toside: Flowside for destination side
* @dlen: Length of UDP payload
* @no_udp_csum: Do not set UDP checksum
*
- * Return: size of IPv4 payload (UDP header + data)
+ * Return: size of datagram (UDP header + data)
*/
-size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
+size_t udp_update_hdr4(struct iphdr *ip4h, struct iov_tail *payload,
const struct flowside *toside, size_t dlen,
bool no_udp_csum)
{
const struct in_addr *src = inany_v4(&toside->oaddr);
const struct in_addr *dst = inany_v4(&toside->eaddr);
- size_t l4len = dlen + sizeof(bp->uh);
+ struct udphdr uh;
+ size_t l4len = dlen + sizeof(uh);
size_t l3len = l4len + sizeof(*ip4h);
assert(src && dst);
@@ -278,19 +279,18 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
ip4h->saddr = src->s_addr;
ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, *src, *dst);
- bp->uh.source = htons(toside->oport);
- bp->uh.dest = htons(toside->eport);
- bp->uh.len = htons(l4len);
+ uh.source = htons(toside->oport);
+ uh.dest = htons(toside->eport);
+ uh.len = htons(l4len);
if (no_udp_csum) {
- bp->uh.check = 0;
+ uh.check = 0;
} else {
- const struct iovec iov = {
- .iov_base = bp->data,
- .iov_len = dlen
- };
- struct iov_tail data = IOV_TAIL(&iov, 1, 0);
- csum_udp4(&bp->uh, *src, *dst, &data, dlen);
+ struct iov_tail data = *payload;
+
+ IOV_DROP_HEADER(&data, struct udphdr);
+ csum_udp4(&uh, *src, *dst, &data, dlen);
}
+ IOV_PUSH_HEADER(payload, uh);
return l4len;
}
@@ -299,18 +299,19 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
* udp_update_hdr6() - Update headers for one IPv6 datagram
* @ip6h: Pre-filled IPv6 header (except for payload_len and
* addresses)
- * @bp: Pointer to udp_payload_t to update
+ * @payload: iov_tail with datagram to update
* @toside: Flowside for destination side
* @dlen: Length of UDP payload
* @no_udp_csum: Do not set UDP checksum
*
- * Return: size of IPv6 payload (UDP header + data)
+ * Return: size of datagram (UDP header + data)
*/
-size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
+size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct iov_tail *payload,
const struct flowside *toside, size_t dlen,
bool no_udp_csum)
{
- uint16_t l4len = dlen + sizeof(bp->uh);
+ struct udphdr uh;
+ uint16_t l4len = dlen + sizeof(uh);
ip6h->payload_len = htons(l4len);
ip6h->daddr = toside->eaddr.a6;
@@ -319,24 +320,24 @@ size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
ip6h->nexthdr = IPPROTO_UDP;
ip6h->hop_limit = 255;
- bp->uh.source = htons(toside->oport);
- bp->uh.dest = htons(toside->eport);
- bp->uh.len = ip6h->payload_len;
+ uh.source = htons(toside->oport);
+ uh.dest = htons(toside->eport);
+ uh.len = htons(l4len);
+
if (no_udp_csum) {
/* 0 is an invalid checksum for UDP IPv6 and dropped by
* the kernel stack, even if the checksum is disabled by virtio
* flags. We need to put any non-zero value here.
*/
- bp->uh.check = 0xffff;
+ uh.check = 0xffff;
} else {
- const struct iovec iov = {
- .iov_base = bp->data,
- .iov_len = dlen
- };
- struct iov_tail data = IOV_TAIL(&iov, 1, 0);
- csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data,
- dlen);
+ struct iov_tail data = *payload;
+
+ IOV_DROP_HEADER(&data, struct udphdr);
+ csum_udp6(&uh, &toside->oaddr.a6, &toside->eaddr.a6,
+ &data, dlen);
}
+ IOV_PUSH_HEADER(payload, uh);
return l4len;
}
@@ -375,11 +376,18 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
struct ethhdr *eh = (*tap_iov)[UDP_IOV_ETH].iov_base;
struct udp_payload_t *bp = &udp_payload[idx];
struct udp_meta_t *bm = &udp_meta[idx];
+ struct iovec iov[2];
+ struct iov_tail payload = IOV_TAIL(iov, ARRAY_SIZE(iov), 0);
size_t l4len, l2len;
+ iov[0].iov_base = &bp->uh;
+ iov[0].iov_len = sizeof(bp->uh);
+ iov[1].iov_base = bp->data;
+ iov[1].iov_len = mmh[idx].msg_len;
+
eth_update_mac(eh, NULL, tap_omac);
if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) {
- l4len = udp_update_hdr6(&bm->ip6h, bp, toside,
+ l4len = udp_update_hdr6(&bm->ip6h, &payload, toside,
mmh[idx].msg_len, no_udp_csum);
l2len = MAX(l4len + sizeof(bm->ip6h) + ETH_HLEN, ETH_ZLEN);
@@ -388,7 +396,7 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
eh->h_proto = htons_constant(ETH_P_IPV6);
(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
} else {
- l4len = udp_update_hdr4(&bm->ip4h, bp, toside,
+ l4len = udp_update_hdr4(&bm->ip4h, &payload, toside,
mmh[idx].msg_len, no_udp_csum);
l2len = MAX(l4len + sizeof(bm->ip4h) + ETH_HLEN, ETH_ZLEN);
diff --git a/udp_internal.h b/udp_internal.h
index 64e457748324..e6cbaab79519 100644
--- a/udp_internal.h
+++ b/udp_internal.h
@@ -25,10 +25,10 @@ struct udp_payload_t {
} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
#endif
-size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
+size_t udp_update_hdr4(struct iphdr *ip4h, struct iov_tail *payload,
const struct flowside *toside, size_t dlen,
bool no_udp_csum);
-size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
+size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct iov_tail *payload,
const struct flowside *toside, size_t dlen,
bool no_udp_csum);
void udp_sock_fwd(const struct ctx *c, int s, int rule_hint,
diff --git a/udp_vu.c b/udp_vu.c
index ef8e60cc390a..8cf50ca1c38f 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -98,69 +98,73 @@ static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6)
/**
* udp_vu_prepare() - Prepare the packet header
* @c: Execution context
- * @iov: IO vector for the frame (including vnet header)
+ * @data: IO vector tail for the L2 frame, on return points to the L4 header
* @toside: Address information for one side of the flow
* @dlen: Packet data length
*/
-static void udp_vu_prepare(const struct ctx *c, const struct iovec *iov,
- const struct flowside *toside, ssize_t dlen)
+static void udp_vu_prepare(const struct ctx *c, struct iov_tail *data,
+ const struct flowside *toside, size_t dlen)
{
- struct ethhdr *eh;
+ bool ipv4 = inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr);
+ struct ethhdr eh;
/* ethernet header */
- eh = vu_eth(iov[0].iov_base);
+ memcpy(eh.h_dest, c->guest_mac, sizeof(eh.h_dest));
+ memcpy(eh.h_source, c->our_tap_mac, sizeof(eh.h_source));
- memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
- memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
+ if (ipv4)
+ eh.h_proto = htons(ETH_P_IP);
+ else
+ eh.h_proto = htons(ETH_P_IPV6);
+ IOV_PUSH_HEADER(data, eh);
/* initialize header */
- if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
- struct iphdr *iph = vu_ip(iov[0].iov_base);
- struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base);
-
- eh->h_proto = htons(ETH_P_IP);
+ if (ipv4) {
+ struct iov_tail datagram;
+ struct iphdr iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
- *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
+ datagram = *data;
+ IOV_DROP_HEADER(&datagram, struct iphdr);
+ udp_update_hdr4(&iph, &datagram, toside, dlen, true);
- udp_update_hdr4(iph, bp, toside, dlen, true);
+ IOV_PUSH_HEADER(data, iph);
} else {
- struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base);
- struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base);
-
- eh->h_proto = htons(ETH_P_IPV6);
+ struct iov_tail datagram;
+ struct ipv6hdr ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
- *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
+ datagram = *data;
+ IOV_DROP_HEADER(&datagram, struct ipv6hdr);
+ udp_update_hdr6(&ip6h, &datagram, toside, dlen, true);
- udp_update_hdr6(ip6h, bp, toside, dlen, true);
+ IOV_PUSH_HEADER(data, ip6h);
}
}
/**
* udp_vu_csum() - Calculate and set checksum for a UDP packet
* @toside: Address information for one side of the flow
- * @iov: IO vector for the frame
- * @cnt: Number of IO vector entries
+ * @data: IO vector tail for the L4 frame (point to the UDP header)
* @dlen: Data length
*/
-static void udp_vu_csum(const struct flowside *toside, const struct iovec *iov,
- size_t cnt, size_t dlen)
+static void udp_vu_csum(const struct flowside *toside, struct iov_tail *data,
+ size_t dlen)
{
const struct in_addr *src4 = inany_v4(&toside->oaddr);
const struct in_addr *dst4 = inany_v4(&toside->eaddr);
- char *base = iov[0].iov_base;
- struct udp_payload_t *bp;
- struct iov_tail data;
-
- if (src4 && dst4) {
- bp = vu_payloadv4(base);
- data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
- csum_udp4(&bp->uh, *src4, *dst4, &data, dlen);
- } else {
- bp = vu_payloadv6(base);
- data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
- csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data,
- dlen);
- }
+ struct iov_tail current = *data;
+ struct udphdr *uh, uh_storage;
+ bool ipv4 = src4 && dst4;
+
+ uh = IOV_REMOVE_HEADER(¤t, uh_storage);
+ if (!uh)
+ return;
+
+ if (ipv4)
+ csum_udp4(uh, *src4, *dst4, ¤t, dlen);
+ else
+ csum_udp6(uh, &toside->oaddr.a6, &toside->eaddr.a6, ¤t, dlen);
+
+ IOV_PUSH_HEADER(data, *uh);
}
/**
@@ -227,9 +231,10 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
vu_queue_rewind(vq, elem_cnt - elem_used);
if (iov_cnt > 0) {
- udp_vu_prepare(c, iov_vu, toside, dlen);
+ struct iov_tail data = IOV_TAIL(iov_vu, iov_cnt, VNET_HLEN);
+ udp_vu_prepare(c, &data, toside, dlen);
if (*c->pcap) {
- udp_vu_csum(toside, iov_vu, iov_cnt, dlen);
+ udp_vu_csum(toside, &data, dlen);
pcap_iov(iov_vu, iov_cnt, VNET_HLEN,
hdrlen + dlen - VNET_HLEN);
}
--
2.53.0
^ permalink raw reply [flat|nested] 4+ messages in thread