* [PATCH v2 01/13] iov: Add iov_truncate() helper and use it in vu handlers
2026-03-09 9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
@ 2026-03-09 9:47 ` Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 02/13] vhost-user: Centralise 802.3 frame padding in vu_collect() and vu_flush() Laurent Vivier
` (11 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Laurent Vivier @ 2026-03-09 9:47 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Add a generic iov_truncate() function that truncates an IO vector to a
given number of bytes, returning the number of iov entries that contain
data after truncation.
Use it in udp_vu_sock_recv() and tcp_vu_sock_recv() to replace the
open-coded truncation logic that adjusted iov entries after recvmsg().
Also convert the direct iov_len assignment in tcp_vu_send_flag() to use
iov_truncate() for consistency.
Add an ASSERT() in tcp_vu_data_from_sock() to quiet the Coverity error:
passt/tcp_vu.c:457:3:
19. overflow_const: Expression "dlen + hdrlen", where "dlen" is known to
be equal to -86, and "hdrlen" is known to be equal to 86, underflows
the type of "dlen + hdrlen", which is type "unsigned long".
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
iov.c | 22 ++++++++++++++++++++++
iov.h | 1 +
tcp_vu.c | 20 ++++++++------------
udp_vu.c | 12 +++---------
4 files changed, 34 insertions(+), 21 deletions(-)
diff --git a/iov.c b/iov.c
index ad726daa4cd8..31a3f5bc29e5 100644
--- a/iov.c
+++ b/iov.c
@@ -147,6 +147,28 @@ size_t iov_size(const struct iovec *iov, size_t iov_cnt)
return len;
}
+/**
+ * iov_truncate() - Truncate an IO vector to a given number of bytes
+ * @iov: IO vector (modified)
+ * @iov_cnt: Number of entries in @iov
+ * @size: Total number of bytes to keep
+ *
+ * Return: number of iov entries that contain data after truncation
+ */
+size_t iov_truncate(struct iovec *iov, size_t iov_cnt, size_t size)
+{
+ size_t i, offset;
+
+ i = iov_skip_bytes(iov, iov_cnt, size, &offset);
+
+ if (i < iov_cnt) {
+ iov[i].iov_len = offset;
+ i += !!offset;
+ }
+
+ return i;
+}
+
/**
* iov_tail_prune() - Remove any unneeded buffers from an IOV tail
* @tail: IO vector tail (modified)
diff --git a/iov.h b/iov.h
index d1ab91a94e22..b4e50b0fca5a 100644
--- a/iov.h
+++ b/iov.h
@@ -29,6 +29,7 @@ size_t iov_from_buf(const struct iovec *iov, size_t iov_cnt,
size_t iov_to_buf(const struct iovec *iov, size_t iov_cnt,
size_t offset, void *buf, size_t bytes);
size_t iov_size(const struct iovec *iov, size_t iov_cnt);
+size_t iov_truncate(struct iovec *iov, size_t iov_cnt, size_t size);
/*
* DOC: Theory of Operation, struct iov_tail
diff --git a/tcp_vu.c b/tcp_vu.c
index 88be232dca66..fd734e857b3b 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -131,7 +131,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
return ret;
}
- flags_elem[0].in_sg[0].iov_len = hdrlen + optlen;
+ iov_truncate(&flags_iov[0], 1, hdrlen + optlen);
payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen);
if (flags & KEEPALIVE)
@@ -192,9 +192,9 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
struct msghdr mh_sock = { 0 };
uint16_t mss = MSS_GET(conn);
int s = conn->sock;
- ssize_t ret, len;
size_t hdrlen;
int elem_cnt;
+ ssize_t ret;
int i;
*iov_cnt = 0;
@@ -247,15 +247,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
ret -= already_sent;
/* adjust iov number and length of the last iov */
- len = ret;
- for (i = 0; len && i < elem_cnt; i++) {
- struct iovec *iov = &elem[i].in_sg[0];
-
- if (iov->iov_len > (size_t)len)
- iov->iov_len = len;
-
- len -= iov->iov_len;
- }
+ i = iov_truncate(&iov_vu[DISCARD_IOV_NUM], elem_cnt, ret);
/* adjust head count */
while (*head_cnt > 0 && head[*head_cnt - 1] >= i)
@@ -448,10 +440,14 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
for (i = 0, previous_dlen = -1, check = NULL; i < head_cnt; i++) {
struct iovec *iov = &elem[head[i]].in_sg[0];
int buf_cnt = head[i + 1] - head[i];
- ssize_t dlen = iov_size(iov, buf_cnt) - hdrlen;
+ size_t frame_size = iov_size(iov, buf_cnt);
bool push = i == head_cnt - 1;
+ ssize_t dlen;
size_t l2len;
+ ASSERT(frame_size >= hdrlen);
+
+ dlen = frame_size - hdrlen;
vu_set_vnethdr(iov->iov_base, buf_cnt);
/* The IPv4 header checksum varies only with dlen */
diff --git a/udp_vu.c b/udp_vu.c
index 3520f89e5671..5effca777e0a 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -71,9 +71,9 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
bool v6, ssize_t *dlen)
{
const struct vu_dev *vdev = c->vdev;
- int iov_cnt, idx, iov_used;
- size_t off, hdrlen, l2len;
struct msghdr msg = { 0 };
+ int iov_cnt, iov_used;
+ size_t hdrlen, l2len;
ASSERT(!c->no_udp);
@@ -115,13 +115,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen;
iov_vu[0].iov_len += hdrlen;
- /* count the numbers of buffer filled by recvmsg() */
- idx = iov_skip_bytes(iov_vu, iov_cnt, *dlen + hdrlen, &off);
-
- /* adjust last iov length */
- if (idx < iov_cnt)
- iov_vu[idx].iov_len = off;
- iov_used = idx + !!off;
+ iov_used = iov_truncate(iov_vu, iov_cnt, *dlen + hdrlen);
/* pad frame to 60 bytes: first buffer is at least ETH_ZLEN long */
l2len = *dlen + hdrlen - VNET_HLEN;
--
2.53.0
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH v2 02/13] vhost-user: Centralise 802.3 frame padding in vu_collect() and vu_flush()
2026-03-09 9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 01/13] iov: Add iov_truncate() helper and use it in vu handlers Laurent Vivier
@ 2026-03-09 9:47 ` Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 03/13] vhost-user: Use ARRAY_SIZE(elem) instead of VIRTQUEUE_MAX_SIZE Laurent Vivier
` (10 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Laurent Vivier @ 2026-03-09 9:47 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
The per-protocol padding done by vu_pad() in tcp_vu.c and udp_vu.c was
only correct for single-buffer frames, and assumed the padding area always
fell within the first iov. It also relied on each caller computing the
right MAX(..., ETH_ZLEN + VNET_HLEN) size for vu_collect() and calling
vu_pad() at the right point.
Centralise padding logic into the two shared vhost-user helpers instead:
- vu_collect() now ensures at least ETH_ZLEN + VNET_HLEN bytes of buffer
space are collected, so there is always room for a minimum-sized frame.
- vu_flush() computes the actual frame length (accounting for
VIRTIO_NET_F_MRG_RXBUF multi-buffer frames) and passes the padded
length to vu_queue_fill().
A new iov_memset() helper in iov.c zero-fills the padding area in each
buffer before iov_truncate() sets the logical frame size. The callers in
tcp_vu.c, udp_vu.c and vu_send_single() use iov_memset() directly,
replacing the now-removed vu_pad() helper and the MAX(..., ETH_ZLEN +
VNET_HLEN) size calculations passed to vu_collect().
Centralising padding here will also ease the move to multi-iovec per
element support, since there will be a single place to update.
In vu_send_single(), fix padding, truncation and data copy to use the
requested frame size rather than the total available buffer space from
vu_collect(), which could be larger. Also add matching padding, truncation
and explicit size to vu_collect() for the DUP_ACK path in
tcp_vu_send_flag().
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
iov.c | 24 +++++++++++++++++++
iov.h | 2 ++
tcp_vu.c | 35 +++++++++++++++++----------
udp_vu.c | 12 ++++++----
vu_common.c | 69 +++++++++++++++++++++++++++++++----------------------
vu_common.h | 1 -
6 files changed, 96 insertions(+), 47 deletions(-)
diff --git a/iov.c b/iov.c
index 31a3f5bc29e5..cd48667226f3 100644
--- a/iov.c
+++ b/iov.c
@@ -169,6 +169,30 @@ size_t iov_truncate(struct iovec *iov, size_t iov_cnt, size_t size)
return i;
}
+/**
+ * iov_memset() - Set bytes of an IO vector to a given value
+ * @iov: IO vector
+ * @iov_cnt: Number of elements in @iov
+ * @offset: Byte offset in the iovec at which to start
+ * @c: Byte value to fill with
+ * @length: Number of bytes to set
+ */
+void iov_memset(const struct iovec *iov, size_t iov_cnt, size_t offset, int c,
+ size_t length)
+{
+ size_t i;
+
+ i = iov_skip_bytes(iov, iov_cnt, offset, &offset);
+
+ for ( ; i < iov_cnt; i++) {
+ size_t n = MIN(iov[i].iov_len - offset, length);
+
+ memset((char *)iov[i].iov_base + offset, c, n);
+ offset = 0;
+ length -= n;
+ }
+}
+
/**
* iov_tail_prune() - Remove any unneeded buffers from an IOV tail
* @tail: IO vector tail (modified)
diff --git a/iov.h b/iov.h
index b4e50b0fca5a..d295d05b3bab 100644
--- a/iov.h
+++ b/iov.h
@@ -30,6 +30,8 @@ size_t iov_to_buf(const struct iovec *iov, size_t iov_cnt,
size_t offset, void *buf, size_t bytes);
size_t iov_size(const struct iovec *iov, size_t iov_cnt);
size_t iov_truncate(struct iovec *iov, size_t iov_cnt, size_t size);
+void iov_memset(const struct iovec *iov, size_t iov_cnt, size_t offset, int c,
+ size_t length);
/*
* DOC: Theory of Operation, struct iov_tail
diff --git a/tcp_vu.c b/tcp_vu.c
index fd734e857b3b..3adead5f33fa 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -72,12 +72,12 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
struct vu_virtq_element flags_elem[2];
- size_t optlen, hdrlen, l2len;
struct ipv6hdr *ip6h = NULL;
struct iphdr *ip4h = NULL;
struct iovec flags_iov[2];
struct tcp_syn_opts *opts;
struct iov_tail payload;
+ size_t optlen, hdrlen;
struct tcphdr *th;
struct ethhdr *eh;
uint32_t seq;
@@ -90,7 +90,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
vu_set_element(&flags_elem[0], NULL, &flags_iov[0]);
elem_cnt = vu_collect(vdev, vq, &flags_elem[0], 1,
- MAX(hdrlen + sizeof(*opts), ETH_ZLEN + VNET_HLEN), NULL);
+ hdrlen + sizeof(*opts), NULL);
if (elem_cnt != 1)
return -1;
@@ -131,6 +131,11 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
return ret;
}
+ /* Pad short frames to ETH_ZLEN */
+ if (ETH_ZLEN + VNET_HLEN > hdrlen + optlen) {
+ iov_memset(&flags_iov[0], 1, hdrlen + optlen, 0,
+ ETH_ZLEN + VNET_HLEN - (hdrlen + optlen));
+ }
iov_truncate(&flags_iov[0], 1, hdrlen + optlen);
payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen);
@@ -140,9 +145,6 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload,
NULL, seq, !*c->pcap);
- l2len = optlen + hdrlen - VNET_HLEN;
- vu_pad(&flags_elem[0].in_sg[0], l2len);
-
if (*c->pcap)
pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN);
nb_ack = 1;
@@ -151,10 +153,17 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
vu_set_element(&flags_elem[1], NULL, &flags_iov[1]);
elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1,
- flags_elem[0].in_sg[0].iov_len, NULL);
+ hdrlen + optlen, NULL);
if (elem_cnt == 1 &&
flags_elem[1].in_sg[0].iov_len >=
flags_elem[0].in_sg[0].iov_len) {
+ /* Pad short frames to ETH_ZLEN */
+ if (ETH_ZLEN + VNET_HLEN > hdrlen + optlen) {
+ iov_memset(&flags_iov[1], 1, hdrlen + optlen, 0,
+ ETH_ZLEN + VNET_HLEN -
+ (hdrlen + optlen));
+ }
+ iov_truncate(&flags_iov[1], 1, hdrlen + optlen);
memcpy(flags_elem[1].in_sg[0].iov_base,
flags_elem[0].in_sg[0].iov_base,
flags_elem[0].in_sg[0].iov_len);
@@ -212,8 +221,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
cnt = vu_collect(vdev, vq, &elem[elem_cnt],
VIRTQUEUE_MAX_SIZE - elem_cnt,
- MAX(MIN(mss, fillsize) + hdrlen, ETH_ZLEN + VNET_HLEN),
- &frame_size);
+ MIN(mss, fillsize) + hdrlen, &frame_size);
if (cnt == 0)
break;
@@ -222,6 +230,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
/* reserve space for headers in iov */
iov = &elem[elem_cnt].in_sg[0];
ASSERT(iov->iov_len >= hdrlen);
+
iov->iov_base = (char *)iov->iov_base + hdrlen;
iov->iov_len -= hdrlen;
head[(*head_cnt)++] = elem_cnt;
@@ -246,6 +255,11 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
if (!peek_offset_cap)
ret -= already_sent;
+ /* Pad short frames to ETH_ZLEN */
+ if (ETH_ZLEN + VNET_HLEN > (size_t)ret + hdrlen) {
+ iov_memset(&iov_vu[DISCARD_IOV_NUM], elem_cnt, ret, 0,
+ ETH_ZLEN + VNET_HLEN - (ret + hdrlen));
+ }
/* adjust iov number and length of the last iov */
i = iov_truncate(&iov_vu[DISCARD_IOV_NUM], elem_cnt, ret);
@@ -443,7 +457,6 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
size_t frame_size = iov_size(iov, buf_cnt);
bool push = i == head_cnt - 1;
ssize_t dlen;
- size_t l2len;
ASSERT(frame_size >= hdrlen);
@@ -457,10 +470,6 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push);
- /* Pad first/single buffer only, it's at least ETH_ZLEN long */
- l2len = dlen + hdrlen - VNET_HLEN;
- vu_pad(iov, l2len);
-
if (*c->pcap)
pcap_iov(iov, buf_cnt, VNET_HLEN);
diff --git a/udp_vu.c b/udp_vu.c
index 5effca777e0a..ef9d26118eaf 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -73,7 +73,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
const struct vu_dev *vdev = c->vdev;
struct msghdr msg = { 0 };
int iov_cnt, iov_used;
- size_t hdrlen, l2len;
+ size_t hdrlen;
ASSERT(!c->no_udp);
@@ -98,6 +98,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
/* reserve space for the headers */
ASSERT(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN));
+
iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen;
iov_vu[0].iov_len -= hdrlen;
@@ -115,12 +116,13 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen;
iov_vu[0].iov_len += hdrlen;
+ /* Pad short frames to ETH_ZLEN */
+ if (ETH_ZLEN + VNET_HLEN > *dlen + hdrlen) {
+ iov_memset(iov_vu, iov_cnt, *dlen + hdrlen, 0,
+ ETH_ZLEN + VNET_HLEN - (*dlen + hdrlen));
+ }
iov_used = iov_truncate(iov_vu, iov_cnt, *dlen + hdrlen);
- /* pad frame to 60 bytes: first buffer is at least ETH_ZLEN long */
- l2len = *dlen + hdrlen - VNET_HLEN;
- vu_pad(&iov_vu[0], l2len);
-
vu_set_vnethdr(iov_vu[0].iov_base, iov_used);
/* release unused buffers */
diff --git a/vu_common.c b/vu_common.c
index 5f2ce18e5b71..8ea05dd30890 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -87,8 +87,8 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
size_t current_size = 0;
int elem_cnt = 0;
+ size = MAX(size, ETH_ZLEN + VNET_HLEN); /* 802.3 minimum size */
while (current_size < size && elem_cnt < max_elem) {
- struct iovec *iov;
int ret;
ret = vu_queue_pop(vdev, vq, &elem[elem_cnt]);
@@ -101,12 +101,12 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
break;
}
- iov = &elem[elem_cnt].in_sg[0];
+ elem[elem_cnt].in_num = iov_truncate(elem[elem_cnt].in_sg,
+ elem[elem_cnt].in_num,
+ size - current_size);
- if (iov->iov_len > size - current_size)
- iov->iov_len = size - current_size;
-
- current_size += iov->iov_len;
+ current_size += iov_size(elem[elem_cnt].in_sg,
+ elem[elem_cnt].in_num);
elem_cnt++;
if (!vu_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
@@ -143,10 +143,30 @@ void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr, int num_buffers)
void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int elem_cnt)
{
- int i;
-
- for (i = 0; i < elem_cnt; i++)
- vu_queue_fill(vdev, vq, &elem[i], elem[i].in_sg[0].iov_len, i);
+ int i, j, num_buffers;
+
+ for (i = 0; i < elem_cnt; i += num_buffers) {
+ const struct virtio_net_hdr_mrg_rxbuf *vnethdr;
+ size_t len, padding, elem_size;
+
+ vnethdr = elem[i].in_sg[0].iov_base;
+ num_buffers = le16toh(vnethdr->num_buffers);
+
+ len = 0;
+ for (j = 0; j < num_buffers - 1; j++) {
+ elem_size = iov_size(elem[i + j].in_sg,
+ elem[i + j].in_num);
+ vu_queue_fill(vdev, vq, &elem[i + j],
+ elem_size, i + j);
+ len += elem_size;
+ }
+ /* pad the last element to have an 802.3 minimum frame size */
+ elem_size = iov_size(elem[i + j].in_sg, elem[i + j].in_num);
+ padding = MAX(0, (ssize_t)(ETH_ZLEN + VNET_HLEN) -
+ (ssize_t)(len + elem_size));
+ vu_queue_fill(vdev, vq, &elem[i + j], elem_size + padding,
+ i + j);
+ }
vu_queue_flush(vdev, vq, elem_cnt);
vu_queue_notify(vdev, vq);
@@ -268,38 +288,31 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
goto err;
}
+ /* Pad short frames to ETH_ZLEN */
+ if (size < ETH_ZLEN + VNET_HLEN) {
+ iov_memset(in_sg, elem_cnt, size, 0,
+ ETH_ZLEN + VNET_HLEN - size);
+ }
+ elem_cnt = iov_truncate(in_sg, elem_cnt, size);
+
vu_set_vnethdr(in_sg[0].iov_base, elem_cnt);
- total -= VNET_HLEN;
+ size -= VNET_HLEN;
/* copy data from the buffer to the iovec */
- iov_from_buf(in_sg, elem_cnt, VNET_HLEN, buf, total);
+ iov_from_buf(in_sg, elem_cnt, VNET_HLEN, buf, size);
if (*c->pcap)
pcap_iov(in_sg, elem_cnt, VNET_HLEN);
vu_flush(vdev, vq, elem, elem_cnt);
- trace("vhost-user sent %zu", total);
+ trace("vhost-user sent %zu", size);
- return total;
+ return size;
err:
for (i = 0; i < elem_cnt; i++)
vu_queue_detach_element(vq);
return -1;
}
-
-/**
- * vu_pad() - Pad 802.3 frame to minimum length (60 bytes) if needed
- * @iov: Buffer in iovec array where end of 802.3 frame is stored
- * @l2len: Layer-2 length already filled in frame
- */
-void vu_pad(struct iovec *iov, size_t l2len)
-{
- if (l2len >= ETH_ZLEN)
- return;
-
- memset((char *)iov->iov_base + iov->iov_len, 0, ETH_ZLEN - l2len);
- iov->iov_len += ETH_ZLEN - l2len;
-}
diff --git a/vu_common.h b/vu_common.h
index 865d9771fa89..5de0c987b936 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -61,6 +61,5 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
const struct timespec *now);
int vu_send_single(const struct ctx *c, const void *buf, size_t size);
-void vu_pad(struct iovec *iov, size_t l2len);
#endif /* VU_COMMON_H */
--
2.53.0
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH v2 03/13] vhost-user: Use ARRAY_SIZE(elem) instead of VIRTQUEUE_MAX_SIZE
2026-03-09 9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 01/13] iov: Add iov_truncate() helper and use it in vu handlers Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 02/13] vhost-user: Centralise 802.3 frame padding in vu_collect() and vu_flush() Laurent Vivier
@ 2026-03-09 9:47 ` Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 04/13] udp_vu: Use iov_tail to manage virtqueue buffers Laurent Vivier
` (9 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Laurent Vivier @ 2026-03-09 9:47 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier, David Gibson
When passing the element count to vu_init_elem(), vu_collect(), or using
it as a loop bound, use ARRAY_SIZE(elem) instead of the VIRTQUEUE_MAX_SIZE.
No functional change.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp_vu.c | 6 +++---
udp_vu.c | 4 ++--
vu_common.c | 6 +++---
3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/tcp_vu.c b/tcp_vu.c
index 3adead5f33fa..8da2dcfe78d0 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -210,17 +210,17 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
hdrlen = tcp_vu_hdrlen(v6);
- vu_init_elem(elem, &iov_vu[DISCARD_IOV_NUM], VIRTQUEUE_MAX_SIZE);
+ vu_init_elem(elem, &iov_vu[DISCARD_IOV_NUM], ARRAY_SIZE(elem));
elem_cnt = 0;
*head_cnt = 0;
- while (fillsize > 0 && elem_cnt < VIRTQUEUE_MAX_SIZE) {
+ while (fillsize > 0 && elem_cnt < ARRAY_SIZE(elem)) {
struct iovec *iov;
size_t frame_size, dlen;
int cnt;
cnt = vu_collect(vdev, vq, &elem[elem_cnt],
- VIRTQUEUE_MAX_SIZE - elem_cnt,
+ ARRAY_SIZE(elem) - elem_cnt,
MIN(mss, fillsize) + hdrlen, &frame_size);
if (cnt == 0)
break;
diff --git a/udp_vu.c b/udp_vu.c
index ef9d26118eaf..439f2cb399b7 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -89,9 +89,9 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
/* compute L2 header length */
hdrlen = udp_vu_hdrlen(v6);
- vu_init_elem(elem, iov_vu, VIRTQUEUE_MAX_SIZE);
+ vu_init_elem(elem, iov_vu, ARRAY_SIZE(elem));
- iov_cnt = vu_collect(vdev, vq, elem, VIRTQUEUE_MAX_SIZE,
+ iov_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem),
IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL);
if (iov_cnt == 0)
return -1;
diff --git a/vu_common.c b/vu_common.c
index 8ea05dd30890..8afa5199908f 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -193,7 +193,7 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
count = 0;
out_sg_count = 0;
- while (count < VIRTQUEUE_MAX_SIZE &&
+ while (count < ARRAY_SIZE(elem) &&
out_sg_count + VU_MAX_TX_BUFFER_NB <= VIRTQUEUE_MAX_SIZE) {
int ret;
struct iov_tail data;
@@ -278,10 +278,10 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
return -1;
}
- vu_init_elem(elem, in_sg, VIRTQUEUE_MAX_SIZE);
+ vu_init_elem(elem, in_sg, ARRAY_SIZE(elem));
size += VNET_HLEN;
- elem_cnt = vu_collect(vdev, vq, elem, VIRTQUEUE_MAX_SIZE, size, &total);
+ elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem), size, &total);
if (total < size) {
debug("vu_send_single: no space to send the data "
"elem_cnt %d size %zd", elem_cnt, total);
--
2.53.0
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH v2 04/13] udp_vu: Use iov_tail to manage virtqueue buffers
2026-03-09 9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (2 preceding siblings ...)
2026-03-09 9:47 ` [PATCH v2 03/13] vhost-user: Use ARRAY_SIZE(elem) instead of VIRTQUEUE_MAX_SIZE Laurent Vivier
@ 2026-03-09 9:47 ` Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 05/13] udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller Laurent Vivier
` (8 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Laurent Vivier @ 2026-03-09 9:47 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Replace direct iovec pointer arithmetic in UDP vhost-user handling with
iov_tail operations.
udp_vu_sock_recv() now takes an iov/cnt pair instead of using the
file-scoped iov_vu array, and returns the data length rather than the
iov count. Internally it uses iov_drop_header() to skip past L2/L3/L4
headers before receiving, and iov_tail_clone() to build the recvmsg()
iovec, removing the manual pointer offset and restore pattern.
udp_vu_prepare() and udp_vu_csum() take a const struct iov_tail *
instead of referencing iov_vu directly, making data flow explicit.
udp_vu_csum() uses iov_drop_header() and IOV_REMOVE_HEADER() to locate
the UDP header and payload, replacing manual offset calculations via
vu_payloadv4()/vu_payloadv6().
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
udp_vu.c | 111 ++++++++++++++++++++++++++++---------------------------
1 file changed, 57 insertions(+), 54 deletions(-)
diff --git a/udp_vu.c b/udp_vu.c
index 439f2cb399b7..a39254776099 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -59,21 +59,25 @@ static size_t udp_vu_hdrlen(bool v6)
/**
* udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers
* @c: Execution context
+ * @iov: IO vector for the frame (modified on output)
+ * @cnt: Number of IO vector entries (in/out)
* @vq: virtqueue to use to receive data
* @s: Socket to receive from
* @v6: Set for IPv6 connections
- * @dlen: Size of received data (output)
*
- * Return: number of iov entries used to store the datagram, 0 if the datagram
+ * Return: size of received data, 0 if the datagram
* was discarded because the virtqueue is not ready, -1 on error
*/
-static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
- bool v6, ssize_t *dlen)
+static ssize_t udp_vu_sock_recv(const struct ctx *c, struct iovec *iov,
+ size_t *cnt, struct vu_virtq *vq, int s,
+ bool v6)
{
const struct vu_dev *vdev = c->vdev;
struct msghdr msg = { 0 };
- int iov_cnt, iov_used;
+ struct iov_tail payload;
size_t hdrlen;
+ ssize_t dlen;
+ int iov_cnt;
ASSERT(!c->no_udp);
@@ -83,78 +87,74 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)
debug_perror("Failed to discard datagram");
+ *cnt = 0;
return 0;
}
/* compute L2 header length */
hdrlen = udp_vu_hdrlen(v6);
- vu_init_elem(elem, iov_vu, ARRAY_SIZE(elem));
+ vu_init_elem(elem, iov, *cnt);
iov_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem),
IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL);
if (iov_cnt == 0)
return -1;
- /* reserve space for the headers */
- ASSERT(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN));
+ payload = IOV_TAIL(iov, iov_cnt, hdrlen);
- iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen;
- iov_vu[0].iov_len -= hdrlen;
+ struct iovec msg_iov[payload.cnt];
+ msg.msg_iov = msg_iov;
+ msg.msg_iovlen = iov_tail_clone(msg.msg_iov, payload.cnt, &payload);
/* read data from the socket */
- msg.msg_iov = iov_vu;
- msg.msg_iovlen = iov_cnt;
-
- *dlen = recvmsg(s, &msg, 0);
- if (*dlen < 0) {
+ dlen = recvmsg(s, &msg, 0);
+ if (dlen < 0) {
vu_queue_rewind(vq, iov_cnt);
return -1;
}
- /* restore the pointer to the headers address */
- iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen;
- iov_vu[0].iov_len += hdrlen;
-
/* Pad short frames to ETH_ZLEN */
- if (ETH_ZLEN + VNET_HLEN > *dlen + hdrlen) {
- iov_memset(iov_vu, iov_cnt, *dlen + hdrlen, 0,
- ETH_ZLEN + VNET_HLEN - (*dlen + hdrlen));
+ if (ETH_ZLEN + VNET_HLEN > dlen + hdrlen) {
+ iov_memset(iov, iov_cnt, dlen + hdrlen, 0,
+ ETH_ZLEN + VNET_HLEN - (dlen + hdrlen));
}
- iov_used = iov_truncate(iov_vu, iov_cnt, *dlen + hdrlen);
+ *cnt = iov_truncate(iov, iov_cnt, dlen + hdrlen);
- vu_set_vnethdr(iov_vu[0].iov_base, iov_used);
+ vu_set_vnethdr(iov[0].iov_base, *cnt);
/* release unused buffers */
- vu_queue_rewind(vq, iov_cnt - iov_used);
+ vu_queue_rewind(vq, iov_cnt - *cnt);
- return iov_used;
+ return dlen;
}
/**
* udp_vu_prepare() - Prepare the packet header
* @c: Execution context
+ * @data: IO vector tail for the frame
* @toside: Address information for one side of the flow
* @dlen: Packet data length
*
* Return: Layer-4 length
*/
-static size_t udp_vu_prepare(const struct ctx *c,
+static size_t udp_vu_prepare(const struct ctx *c, const struct iov_tail *data,
const struct flowside *toside, ssize_t dlen)
{
+ const struct iovec *iov = data->iov;
struct ethhdr *eh;
size_t l4len;
/* ethernet header */
- eh = vu_eth(iov_vu[0].iov_base);
+ eh = vu_eth(iov[0].iov_base);
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
/* initialize header */
if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
- struct iphdr *iph = vu_ip(iov_vu[0].iov_base);
- struct udp_payload_t *bp = vu_payloadv4(iov_vu[0].iov_base);
+ struct iphdr *iph = vu_ip(iov[0].iov_base);
+ struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base);
eh->h_proto = htons(ETH_P_IP);
@@ -162,8 +162,8 @@ static size_t udp_vu_prepare(const struct ctx *c,
l4len = udp_update_hdr4(iph, bp, toside, dlen, true);
} else {
- struct ipv6hdr *ip6h = vu_ip(iov_vu[0].iov_base);
- struct udp_payload_t *bp = vu_payloadv6(iov_vu[0].iov_base);
+ struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base);
+ struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base);
eh->h_proto = htons(ETH_P_IPV6);
@@ -178,25 +178,25 @@ static size_t udp_vu_prepare(const struct ctx *c,
/**
* udp_vu_csum() - Calculate and set checksum for a UDP packet
* @toside: Address information for one side of the flow
- * @iov_used: Number of used iov_vu items
+ * @data: IO vector tail for the frame
*/
-static void udp_vu_csum(const struct flowside *toside, int iov_used)
+static void udp_vu_csum(const struct flowside *toside,
+ const struct iov_tail *data)
{
const struct in_addr *src4 = inany_v4(&toside->oaddr);
const struct in_addr *dst4 = inany_v4(&toside->eaddr);
- char *base = iov_vu[0].iov_base;
- struct udp_payload_t *bp;
- struct iov_tail data;
+ struct iov_tail payload = *data;
+ struct udphdr *uh, uh_storage;
+ bool ipv4 = src4 && dst4;
- if (src4 && dst4) {
- bp = vu_payloadv4(base);
- data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base);
- csum_udp4(&bp->uh, *src4, *dst4, &data);
- } else {
- bp = vu_payloadv6(base);
- data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base);
- csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data);
- }
+ iov_drop_header(&payload,
+ udp_vu_hdrlen(!ipv4) - sizeof(struct udphdr));
+ uh = IOV_REMOVE_HEADER(&payload, uh_storage);
+
+ if (ipv4)
+ csum_udp4(uh, *src4, *dst4, &payload);
+ else
+ csum_udp6(uh, &toside->oaddr.a6, &toside->eaddr.a6, &payload);
}
/**
@@ -212,23 +212,26 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
+ struct iov_tail data;
int i;
for (i = 0; i < n; i++) {
+ size_t iov_cnt;
ssize_t dlen;
- int iov_used;
- iov_used = udp_vu_sock_recv(c, vq, s, v6, &dlen);
- if (iov_used < 0)
+ iov_cnt = VIRTQUEUE_MAX_SIZE;
+ dlen = udp_vu_sock_recv(c, iov_vu, &iov_cnt, vq, s, v6);
+ if (dlen < 0)
break;
- if (iov_used > 0) {
- udp_vu_prepare(c, toside, dlen);
+ if (iov_cnt > 0) {
+ data = IOV_TAIL(iov_vu, iov_cnt, 0);
+ udp_vu_prepare(c, &data, toside, dlen);
if (*c->pcap) {
- udp_vu_csum(toside, iov_used);
- pcap_iov(iov_vu, iov_used, VNET_HLEN);
+ udp_vu_csum(toside, &data);
+ pcap_iov(data.iov, data.cnt, VNET_HLEN);
}
- vu_flush(vdev, vq, elem, iov_used);
+ vu_flush(vdev, vq, elem, data.cnt);
}
}
}
--
2.53.0
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH v2 05/13] udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller
2026-03-09 9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (3 preceding siblings ...)
2026-03-09 9:47 ` [PATCH v2 04/13] udp_vu: Use iov_tail to manage virtqueue buffers Laurent Vivier
@ 2026-03-09 9:47 ` Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 06/13] iov: Add IOV_PUT_HEADER() to write header data back to iov_tail Laurent Vivier
` (7 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Laurent Vivier @ 2026-03-09 9:47 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
udp_vu_sock_recv() currently mixes two concerns: receiving data from the
socket and managing virtqueue buffers (collecting, rewinding, releasing).
This makes the function harder to reason about and couples socket I/O
with virtqueue state.
Move all virtqueue operations, vu_collect(), vu_init_elem(),
vu_queue_rewind(), vu_set_vnethdr(), and the queue-readiness check, into
udp_vu_sock_to_tap(), which is the only caller. This turns
udp_vu_sock_recv() into a pure socket receive function that simply reads
into the provided iov array and adjusts its length.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
udp_vu.c | 87 ++++++++++++++++++++++++++------------------------------
1 file changed, 41 insertions(+), 46 deletions(-)
diff --git a/udp_vu.c b/udp_vu.c
index a39254776099..27ae93de4420 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -58,73 +58,40 @@ static size_t udp_vu_hdrlen(bool v6)
/**
* udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers
- * @c: Execution context
* @iov: IO vector for the frame (modified on output)
* @cnt: Number of IO vector entries (in/out)
- * @vq: virtqueue to use to receive data
* @s: Socket to receive from
* @v6: Set for IPv6 connections
*
- * Return: size of received data, 0 if the datagram
- * was discarded because the virtqueue is not ready, -1 on error
+ * Return: size of received data, -1 on error
*/
-static ssize_t udp_vu_sock_recv(const struct ctx *c, struct iovec *iov,
- size_t *cnt, struct vu_virtq *vq, int s,
- bool v6)
+static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6)
{
- const struct vu_dev *vdev = c->vdev;
- struct msghdr msg = { 0 };
+ struct iovec msg_iov[*cnt];
+ struct msghdr msg = { 0 };
struct iov_tail payload;
size_t hdrlen;
ssize_t dlen;
- int iov_cnt;
-
- ASSERT(!c->no_udp);
-
- if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
- debug("Got UDP packet, but RX virtqueue not usable yet");
-
- if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)
- debug_perror("Failed to discard datagram");
-
- *cnt = 0;
- return 0;
- }
/* compute L2 header length */
hdrlen = udp_vu_hdrlen(v6);
- vu_init_elem(elem, iov, *cnt);
-
- iov_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem),
- IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL);
- if (iov_cnt == 0)
- return -1;
-
- payload = IOV_TAIL(iov, iov_cnt, hdrlen);
+ payload = IOV_TAIL(iov, *cnt, hdrlen);
- struct iovec msg_iov[payload.cnt];
msg.msg_iov = msg_iov;
msg.msg_iovlen = iov_tail_clone(msg.msg_iov, payload.cnt, &payload);
/* read data from the socket */
dlen = recvmsg(s, &msg, 0);
- if (dlen < 0) {
- vu_queue_rewind(vq, iov_cnt);
+ if (dlen < 0)
return -1;
- }
/* Pad short frames to ETH_ZLEN */
if (ETH_ZLEN + VNET_HLEN > dlen + hdrlen) {
- iov_memset(iov, iov_cnt, dlen + hdrlen, 0,
+ iov_memset(iov, *cnt, dlen + hdrlen, 0,
ETH_ZLEN + VNET_HLEN - (dlen + hdrlen));
}
- *cnt = iov_truncate(iov, iov_cnt, dlen + hdrlen);
-
- vu_set_vnethdr(iov[0].iov_base, *cnt);
-
- /* release unused buffers */
- vu_queue_rewind(vq, iov_cnt - *cnt);
+ *cnt = iov_truncate(iov, *cnt, dlen + hdrlen);
return dlen;
}
@@ -212,20 +179,48 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
- struct iov_tail data;
int i;
+ ASSERT(!c->no_udp);
+
+ if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
+ struct msghdr msg = { 0 };
+
+ debug("Got UDP packet, but RX virtqueue not usable yet");
+
+ for (i = 0; i < n; i++) {
+ if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)
+ debug_perror("Failed to discard datagram");
+ }
+
+ return;
+ }
+
for (i = 0; i < n; i++) {
size_t iov_cnt;
ssize_t dlen;
+ int elem_cnt;
- iov_cnt = VIRTQUEUE_MAX_SIZE;
- dlen = udp_vu_sock_recv(c, iov_vu, &iov_cnt, vq, s, v6);
- if (dlen < 0)
+ vu_init_elem(elem, iov_vu, ARRAY_SIZE(elem));
+
+ elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem),
+ IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL);
+ if (elem_cnt == 0)
break;
+ iov_cnt = elem_cnt;
+ dlen = udp_vu_sock_recv(iov_vu, &iov_cnt, s, v6);
+ if (dlen < 0) {
+ vu_queue_rewind(vq, elem_cnt);
+ break;
+ }
+
+ /* release unused buffers */
+ vu_queue_rewind(vq, elem_cnt - iov_cnt);
+
if (iov_cnt > 0) {
- data = IOV_TAIL(iov_vu, iov_cnt, 0);
+ struct iov_tail data = IOV_TAIL(iov_vu, iov_cnt, 0);
+ vu_set_vnethdr(iov_vu[0].iov_base, iov_cnt);
udp_vu_prepare(c, &data, toside, dlen);
if (*c->pcap) {
udp_vu_csum(toside, &data);
--
2.53.0
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH v2 06/13] iov: Add IOV_PUT_HEADER() to write header data back to iov_tail
2026-03-09 9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (4 preceding siblings ...)
2026-03-09 9:47 ` [PATCH v2 05/13] udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller Laurent Vivier
@ 2026-03-09 9:47 ` Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 07/13] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6() Laurent Vivier
` (6 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Laurent Vivier @ 2026-03-09 9:47 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Add a counterpart to IOV_PEEK_HEADER() that writes header data back
to an iov_tail after modification. If the header pointer matches the
original iov buffer location, it only advances the offset. Otherwise,
it copies the data using iov_from_buf().
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
iov.c | 24 ++++++++++++++++++++++++
iov.h | 14 +++++++++++++-
2 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/iov.c b/iov.c
index cd48667226f3..296f24b61067 100644
--- a/iov.c
+++ b/iov.c
@@ -305,6 +305,30 @@ void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align)
return v;
}
+/**
+ * iov_put_header_() - Write header back to an IOV tail
+ * @tail: IOV tail to write header to
+ * @v: Pointer to header data to write
+ * @len: Length of header to write, in bytes
+ *
+ * Return: number of bytes written
+ */
+/* cppcheck-suppress unusedFunction */
+size_t iov_put_header_(struct iov_tail *tail, const void *v, size_t len)
+{
+ size_t l = len;
+
+ /* iov_peek_header_() already called iov_check_header() */
+ if ((char *)tail->iov[0].iov_base + tail->off != v)
+ l = iov_from_buf(tail->iov, tail->cnt, tail->off, v, len);
+
+ ASSERT(l == len);
+
+ tail->off += l;
+
+ return l;
+}
+
/**
* iov_remove_header_() - Remove a header from an IOV tail
* @tail: IOV tail to remove header from (modified)
diff --git a/iov.h b/iov.h
index d295d05b3bab..fe7c5163d2ab 100644
--- a/iov.h
+++ b/iov.h
@@ -90,6 +90,7 @@ bool iov_tail_prune(struct iov_tail *tail);
size_t iov_tail_size(struct iov_tail *tail);
bool iov_drop_header(struct iov_tail *tail, size_t len);
void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align);
+size_t iov_put_header_(struct iov_tail *tail, const void *v, size_t len);
void *iov_remove_header_(struct iov_tail *tail, void *v, size_t len, size_t align);
ssize_t iov_tail_clone(struct iovec *dst_iov, size_t dst_iov_cnt,
struct iov_tail *tail);
@@ -112,6 +113,16 @@ ssize_t iov_tail_clone(struct iovec *dst_iov, size_t dst_iov_cnt,
sizeof(var_), \
__alignof__(var_))))
+/**
+ * IOV_PUT_HEADER() - Write header back to an IOV tail
+ * @tail_: IOV tail to write header to
+ * @var_: Pointer to a variable containing the header data to write
+ *
+ * Return: number of bytes written
+ */
+#define IOV_PUT_HEADER(tail_, var_) \
+ (iov_put_header_((tail_), (var_), sizeof(*var_)))
+
/**
* IOV_REMOVE_HEADER() - Remove and return typed header from an IOV tail
* @tail_: IOV tail to remove header from (modified)
@@ -130,7 +141,8 @@ ssize_t iov_tail_clone(struct iovec *dst_iov, size_t dst_iov_cnt,
((__typeof__(var_) *)(iov_remove_header_((tail_), &(var_), \
sizeof(var_), __alignof__(var_))))
-/** IOV_DROP_HEADER() - Remove a typed header from an IOV tail
+/**
+ * IOV_DROP_HEADER() - Remove a typed header from an IOV tail
* @tail_: IOV tail to remove header from (modified)
* @type_: Data type of the header to remove
*
--
2.53.0
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH v2 07/13] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6()
2026-03-09 9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (5 preceding siblings ...)
2026-03-09 9:47 ` [PATCH v2 06/13] iov: Add IOV_PUT_HEADER() to write header data back to iov_tail Laurent Vivier
@ 2026-03-09 9:47 ` Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 08/13] udp_vu: Use iov_tail in udp_vu_prepare() Laurent Vivier
` (5 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Laurent Vivier @ 2026-03-09 9:47 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Change udp_update_hdr4() and udp_update_hdr6() to take a separate
struct udphdr pointer and an iov_tail for the payload, instead of a
struct udp_payload_t pointer and an explicit data length.
This decouples the header update functions from the udp_payload_t memory
layout, which assumes all headers and data sit in a single contiguous
buffer. The vhost-user path uses virtqueue-provided scatter-gather
buffers where this assumption does not hold; passing an iov_tail lets
both the tap path and the vhost-user path share the same functions
without casting through layout-specific helpers.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
udp.c | 72 +++++++++++++++++++++++---------------------------
udp_internal.h | 10 ++++---
udp_vu.c | 14 ++++++++--
3 files changed, 51 insertions(+), 45 deletions(-)
diff --git a/udp.c b/udp.c
index 464aa09365cc..6840e8843e68 100644
--- a/udp.c
+++ b/udp.c
@@ -255,20 +255,20 @@ static void udp_iov_init(const struct ctx *c)
/**
* udp_update_hdr4() - Update headers for one IPv4 datagram
* @ip4h: Pre-filled IPv4 header (except for tot_len and saddr)
- * @bp: Pointer to udp_payload_t to update
+ * @uh: UDP header to update
+ * @payload: UDP payload
* @toside: Flowside for destination side
- * @dlen: Length of UDP payload
* @no_udp_csum: Do not set UDP checksum
*
* Return: size of IPv4 payload (UDP header + data)
*/
-size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
- const struct flowside *toside, size_t dlen,
- bool no_udp_csum)
+size_t udp_update_hdr4(struct iphdr *ip4h, struct udphdr *uh,
+ struct iov_tail *payload,
+ const struct flowside *toside, bool no_udp_csum)
{
const struct in_addr *src = inany_v4(&toside->oaddr);
const struct in_addr *dst = inany_v4(&toside->eaddr);
- size_t l4len = dlen + sizeof(bp->uh);
+ size_t l4len = iov_tail_size(payload) + sizeof(*uh);
size_t l3len = l4len + sizeof(*ip4h);
ASSERT(src && dst);
@@ -278,19 +278,13 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
ip4h->saddr = src->s_addr;
ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, *src, *dst);
- bp->uh.source = htons(toside->oport);
- bp->uh.dest = htons(toside->eport);
- bp->uh.len = htons(l4len);
- if (no_udp_csum) {
- bp->uh.check = 0;
- } else {
- const struct iovec iov = {
- .iov_base = bp->data,
- .iov_len = dlen
- };
- struct iov_tail data = IOV_TAIL(&iov, 1, 0);
- csum_udp4(&bp->uh, *src, *dst, &data);
- }
+ uh->source = htons(toside->oport);
+ uh->dest = htons(toside->eport);
+ uh->len = htons(l4len);
+ if (no_udp_csum)
+ uh->check = 0;
+ else
+ csum_udp4(uh, *src, *dst, payload);
return l4len;
}
@@ -299,18 +293,18 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
* udp_update_hdr6() - Update headers for one IPv6 datagram
* @ip6h: Pre-filled IPv6 header (except for payload_len and
* addresses)
- * @bp: Pointer to udp_payload_t to update
+ * @uh: UDP header to update
+ * @payload: UDP payload
* @toside: Flowside for destination side
- * @dlen: Length of UDP payload
* @no_udp_csum: Do not set UDP checksum
*
* Return: size of IPv6 payload (UDP header + data)
*/
-size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
- const struct flowside *toside, size_t dlen,
- bool no_udp_csum)
+size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udphdr *uh,
+ struct iov_tail *payload,
+ const struct flowside *toside, bool no_udp_csum)
{
- uint16_t l4len = dlen + sizeof(bp->uh);
+ uint16_t l4len = iov_tail_size(payload) + sizeof(*uh);
ip6h->payload_len = htons(l4len);
ip6h->daddr = toside->eaddr.a6;
@@ -319,22 +313,17 @@ size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
ip6h->nexthdr = IPPROTO_UDP;
ip6h->hop_limit = 255;
- bp->uh.source = htons(toside->oport);
- bp->uh.dest = htons(toside->eport);
- bp->uh.len = ip6h->payload_len;
+ uh->source = htons(toside->oport);
+ uh->dest = htons(toside->eport);
+ uh->len = ip6h->payload_len;
if (no_udp_csum) {
/* 0 is an invalid checksum for UDP IPv6 and dropped by
* the kernel stack, even if the checksum is disabled by virtio
* flags. We need to put any non-zero value here.
*/
- bp->uh.check = 0xffff;
+ uh->check = 0xffff;
} else {
- const struct iovec iov = {
- .iov_base = bp->data,
- .iov_len = dlen
- };
- struct iov_tail data = IOV_TAIL(&iov, 1, 0);
- csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data);
+ csum_udp6(uh, &toside->oaddr.a6, &toside->eaddr.a6, payload);
}
return l4len;
@@ -374,12 +363,17 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
struct ethhdr *eh = (*tap_iov)[UDP_IOV_ETH].iov_base;
struct udp_payload_t *bp = &udp_payload[idx];
struct udp_meta_t *bm = &udp_meta[idx];
+ const struct iovec iov = {
+ .iov_base = bp->data,
+ .iov_len = mmh[idx].msg_len,
+ };
+ struct iov_tail payload = IOV_TAIL(&iov, 1, 0);
size_t l4len, l2len;
eth_update_mac(eh, NULL, tap_omac);
if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) {
- l4len = udp_update_hdr6(&bm->ip6h, bp, toside,
- mmh[idx].msg_len, no_udp_csum);
+ l4len = udp_update_hdr6(&bm->ip6h, &bp->uh, &payload, toside,
+ no_udp_csum);
l2len = MAX(l4len + sizeof(bm->ip6h) + ETH_HLEN, ETH_ZLEN);
tap_hdr_update(&bm->taph, l2len);
@@ -387,8 +381,8 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
eh->h_proto = htons_constant(ETH_P_IPV6);
(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
} else {
- l4len = udp_update_hdr4(&bm->ip4h, bp, toside,
- mmh[idx].msg_len, no_udp_csum);
+ l4len = udp_update_hdr4(&bm->ip4h, &bp->uh, &payload, toside,
+ no_udp_csum);
l2len = MAX(l4len + sizeof(bm->ip4h) + ETH_HLEN, ETH_ZLEN);
tap_hdr_update(&bm->taph, l2len);
diff --git a/udp_internal.h b/udp_internal.h
index 64e457748324..fba5ef33cf99 100644
--- a/udp_internal.h
+++ b/udp_internal.h
@@ -25,11 +25,13 @@ struct udp_payload_t {
} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
#endif
-size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
- const struct flowside *toside, size_t dlen,
+size_t udp_update_hdr4(struct iphdr *ip4h, struct udphdr *uh,
+ struct iov_tail *payload,
+ const struct flowside *toside,
bool no_udp_csum);
-size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
- const struct flowside *toside, size_t dlen,
+size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udphdr *uh,
+ struct iov_tail *payload,
+ const struct flowside *toside,
bool no_udp_csum);
void udp_sock_fwd(const struct ctx *c, int s, int rule_hint,
uint8_t frompif, in_port_t port, const struct timespec *now);
diff --git a/udp_vu.c b/udp_vu.c
index 27ae93de4420..2a5d3f822bf6 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -122,21 +122,31 @@ static size_t udp_vu_prepare(const struct ctx *c, const struct iov_tail *data,
if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
struct iphdr *iph = vu_ip(iov[0].iov_base);
struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base);
+ const struct iovec payload_iov = {
+ .iov_base = bp->data,
+ .iov_len = dlen,
+ };
+ struct iov_tail payload = IOV_TAIL(&payload_iov, 1, 0);
eh->h_proto = htons(ETH_P_IP);
*iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
- l4len = udp_update_hdr4(iph, bp, toside, dlen, true);
+ l4len = udp_update_hdr4(iph, &bp->uh, &payload, toside, true);
} else {
struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base);
struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base);
+ const struct iovec payload_iov = {
+ .iov_base = bp->data,
+ .iov_len = dlen,
+ };
+ struct iov_tail payload = IOV_TAIL(&payload_iov, 1, 0);
eh->h_proto = htons(ETH_P_IPV6);
*ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
- l4len = udp_update_hdr6(ip6h, bp, toside, dlen, true);
+ l4len = udp_update_hdr6(ip6h, &bp->uh, &payload, toside, true);
}
return l4len;
--
2.53.0
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH v2 08/13] udp_vu: Use iov_tail in udp_vu_prepare()
2026-03-09 9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (6 preceding siblings ...)
2026-03-09 9:47 ` [PATCH v2 07/13] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6() Laurent Vivier
@ 2026-03-09 9:47 ` Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 09/13] vu_common: Pass iov_tail to vu_set_vnethdr() Laurent Vivier
` (4 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Laurent Vivier @ 2026-03-09 9:47 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Rework udp_vu_prepare() to use IOV_REMOVE_HEADER() and IOV_PUT_HEADER()
to walk through Ethernet, IP and UDP headers instead of the layout-specific
helpers (vu_eth(), vu_ip(), vu_payloadv4(), vu_payloadv6()) that assume a
contiguous buffer. The payload length is now implicit in the iov_tail, so
drop the dlen parameter.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
iov.c | 1 -
udp_vu.c | 64 ++++++++++++++++++++++++++++++--------------------------
2 files changed, 34 insertions(+), 31 deletions(-)
diff --git a/iov.c b/iov.c
index 296f24b61067..1f554f5ac297 100644
--- a/iov.c
+++ b/iov.c
@@ -313,7 +313,6 @@ void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align)
*
* Return: number of bytes written
*/
-/* cppcheck-suppress unusedFunction */
size_t iov_put_header_(struct iov_tail *tail, const void *v, size_t len)
{
size_t l = len;
diff --git a/udp_vu.c b/udp_vu.c
index 2a5d3f822bf6..a21a03dbf23e 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -101,52 +101,54 @@ static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6)
* @c: Execution context
* @data: IO vector tail for the frame
* @toside: Address information for one side of the flow
- * @dlen: Packet data length
*
* Return: Layer-4 length
*/
static size_t udp_vu_prepare(const struct ctx *c, const struct iov_tail *data,
- const struct flowside *toside, ssize_t dlen)
+ const struct flowside *toside)
{
- const struct iovec *iov = data->iov;
- struct ethhdr *eh;
+ struct iov_tail current = *data;
+ struct ethhdr *eh, eh_storage;
+ struct udphdr *uh, uh_storage;
size_t l4len;
/* ethernet header */
- eh = vu_eth(iov[0].iov_base);
+ eh = IOV_REMOVE_HEADER(¤t, eh_storage);
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
/* initialize header */
if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
- struct iphdr *iph = vu_ip(iov[0].iov_base);
- struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base);
- const struct iovec payload_iov = {
- .iov_base = bp->data,
- .iov_len = dlen,
- };
- struct iov_tail payload = IOV_TAIL(&payload_iov, 1, 0);
+ struct iphdr *iph, iph_storage;
eh->h_proto = htons(ETH_P_IP);
+ iph = IOV_REMOVE_HEADER(¤t, iph_storage);
*iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
- l4len = udp_update_hdr4(iph, &bp->uh, &payload, toside, true);
+ uh = IOV_REMOVE_HEADER(¤t, uh_storage);
+ l4len = udp_update_hdr4(iph, uh, ¤t, toside, true);
+
+ current = *data;
+ IOV_PUT_HEADER(¤t, eh);
+ IOV_PUT_HEADER(¤t, iph);
+ IOV_PUT_HEADER(¤t, uh);
} else {
- struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base);
- struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base);
- const struct iovec payload_iov = {
- .iov_base = bp->data,
- .iov_len = dlen,
- };
- struct iov_tail payload = IOV_TAIL(&payload_iov, 1, 0);
+ struct ipv6hdr *ip6h, ip6h_storage;
eh->h_proto = htons(ETH_P_IPV6);
+ ip6h = IOV_REMOVE_HEADER(¤t, ip6h_storage);
*ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
- l4len = udp_update_hdr6(ip6h, &bp->uh, &payload, toside, true);
+ uh = IOV_REMOVE_HEADER(¤t, uh_storage);
+ l4len = udp_update_hdr6(ip6h, uh, ¤t, toside, true);
+
+ current = *data;
+ IOV_PUT_HEADER(¤t, eh);
+ IOV_PUT_HEADER(¤t, ip6h);
+ IOV_PUT_HEADER(¤t, uh);
}
return l4len;
@@ -165,9 +167,10 @@ static void udp_vu_csum(const struct flowside *toside,
struct iov_tail payload = *data;
struct udphdr *uh, uh_storage;
bool ipv4 = src4 && dst4;
+ int hdrlen = sizeof(struct ethhdr) +
+ (ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr));
- iov_drop_header(&payload,
- udp_vu_hdrlen(!ipv4) - sizeof(struct udphdr));
+ iov_drop_header(&payload, hdrlen);
uh = IOV_REMOVE_HEADER(&payload, uh_storage);
if (ipv4)
@@ -207,9 +210,9 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
}
for (i = 0; i < n; i++) {
+ int elem_cnt, elem_used;
size_t iov_cnt;
ssize_t dlen;
- int elem_cnt;
vu_init_elem(elem, iov_vu, ARRAY_SIZE(elem));
@@ -224,19 +227,20 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
vu_queue_rewind(vq, elem_cnt);
break;
}
+ elem_used = iov_cnt;
/* release unused buffers */
- vu_queue_rewind(vq, elem_cnt - iov_cnt);
-
+ vu_queue_rewind(vq, elem_cnt - elem_used);
if (iov_cnt > 0) {
struct iov_tail data = IOV_TAIL(iov_vu, iov_cnt, 0);
- vu_set_vnethdr(iov_vu[0].iov_base, iov_cnt);
- udp_vu_prepare(c, &data, toside, dlen);
+ vu_set_vnethdr(iov_vu[0].iov_base, elem_used);
+ iov_drop_header(&data, VNET_HLEN);
+ udp_vu_prepare(c, &data, toside);
if (*c->pcap) {
udp_vu_csum(toside, &data);
- pcap_iov(data.iov, data.cnt, VNET_HLEN);
+ pcap_iov(data.iov, data.cnt, data.off);
}
- vu_flush(vdev, vq, elem, data.cnt);
+ vu_flush(vdev, vq, elem, elem_used);
}
}
}
--
2.53.0
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH v2 09/13] vu_common: Pass iov_tail to vu_set_vnethdr()
2026-03-09 9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (7 preceding siblings ...)
2026-03-09 9:47 ` [PATCH v2 08/13] udp_vu: Use iov_tail in udp_vu_prepare() Laurent Vivier
@ 2026-03-09 9:47 ` Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 10/13] vu_common: Accept explicit iovec counts in vu_set_element() Laurent Vivier
` (3 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Laurent Vivier @ 2026-03-09 9:47 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Refactor vu_set_vnethdr() to take an iov_tail pointer instead of a
direct pointer to the virtio_net_hdr_mrg_rxbuf structure.
This makes the function use IOV_PEEK_HEADER() and IOV_PUT_HEADER()
to read and write the virtio-net header through the iov_tail abstraction.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
tcp_vu.c | 8 +++++---
udp_vu.c | 3 +--
vu_common.c | 21 +++++++++++++++------
vu_common.h | 2 +-
4 files changed, 22 insertions(+), 12 deletions(-)
diff --git a/tcp_vu.c b/tcp_vu.c
index 8da2dcfe78d0..688f48905d46 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -94,10 +94,11 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
if (elem_cnt != 1)
return -1;
- ASSERT(flags_elem[0].in_sg[0].iov_len >=
+ payload = IOV_TAIL(&flags_elem[0].in_sg[0], elem_cnt, 0);
+ ASSERT(iov_tail_size(&payload) >=
MAX(hdrlen + sizeof(*opts), ETH_ZLEN + VNET_HLEN));
- vu_set_vnethdr(flags_elem[0].in_sg[0].iov_base, 1);
+ vu_set_vnethdr(&payload, 1);
eh = vu_eth(flags_elem[0].in_sg[0].iov_base);
@@ -454,6 +455,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
for (i = 0, previous_dlen = -1, check = NULL; i < head_cnt; i++) {
struct iovec *iov = &elem[head[i]].in_sg[0];
int buf_cnt = head[i + 1] - head[i];
+ struct iov_tail data = IOV_TAIL(iov, buf_cnt, 0);
size_t frame_size = iov_size(iov, buf_cnt);
bool push = i == head_cnt - 1;
ssize_t dlen;
@@ -461,7 +463,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
ASSERT(frame_size >= hdrlen);
dlen = frame_size - hdrlen;
- vu_set_vnethdr(iov->iov_base, buf_cnt);
+ vu_set_vnethdr(&data, buf_cnt);
/* The IPv4 header checksum varies only with dlen */
if (previous_dlen != dlen)
diff --git a/udp_vu.c b/udp_vu.c
index a21a03dbf23e..414750ff742a 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -233,8 +233,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
vu_queue_rewind(vq, elem_cnt - elem_used);
if (iov_cnt > 0) {
struct iov_tail data = IOV_TAIL(iov_vu, iov_cnt, 0);
- vu_set_vnethdr(iov_vu[0].iov_base, elem_used);
- iov_drop_header(&data, VNET_HLEN);
+ vu_set_vnethdr(&data, elem_used);
udp_vu_prepare(c, &data, toside);
if (*c->pcap) {
udp_vu_csum(toside, &data);
diff --git a/vu_common.c b/vu_common.c
index 8afa5199908f..3538e59581b7 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -120,17 +120,24 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
}
/**
- * vu_set_vnethdr() - set virtio-net headers
- * @vnethdr: Address of the header to set
+ * vu_set_vnethdr() - set virtio-net header
+ * @data: IOV tail to write header to, updated to
+ * point after the virtio-net header
* @num_buffers: Number of guest buffers of the frame
*/
-void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr, int num_buffers)
+void vu_set_vnethdr(struct iov_tail *data, int num_buffers)
{
+ struct virtio_net_hdr_mrg_rxbuf vnethdr_storage, *vnethdr;
+
+ vnethdr = IOV_PEEK_HEADER(data, vnethdr_storage);
+
vnethdr->hdr = VU_HEADER;
/* Note: if VIRTIO_NET_F_MRG_RXBUF is not negotiated,
* num_buffers must be 1
*/
vnethdr->num_buffers = htole16(num_buffers);
+
+ IOV_PUT_HEADER(data, vnethdr);
}
/**
@@ -267,6 +274,7 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
+ struct iov_tail data;
size_t total;
int elem_cnt;
int i;
@@ -295,15 +303,16 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
}
elem_cnt = iov_truncate(in_sg, elem_cnt, size);
- vu_set_vnethdr(in_sg[0].iov_base, elem_cnt);
+ data = IOV_TAIL(&in_sg[0], elem_cnt, 0);
+ vu_set_vnethdr(&data, elem_cnt);
size -= VNET_HLEN;
/* copy data from the buffer to the iovec */
- iov_from_buf(in_sg, elem_cnt, VNET_HLEN, buf, size);
+ iov_from_buf(in_sg, elem_cnt, data.off, buf, size);
if (*c->pcap)
- pcap_iov(in_sg, elem_cnt, VNET_HLEN);
+ pcap_iov(data.iov, data.cnt, data.off);
vu_flush(vdev, vq, elem, elem_cnt);
diff --git a/vu_common.h b/vu_common.h
index 5de0c987b936..d068c61695f8 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -55,7 +55,7 @@ void vu_init_elem(struct vu_virtq_element *elem, struct iovec *iov,
int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int max_elem, size_t size,
size_t *collected);
-void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr, int num_buffers);
+void vu_set_vnethdr(struct iov_tail *data, int num_buffers);
void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int elem_cnt);
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
--
2.53.0
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH v2 10/13] vu_common: Accept explicit iovec counts in vu_set_element()
2026-03-09 9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (8 preceding siblings ...)
2026-03-09 9:47 ` [PATCH v2 09/13] vu_common: Pass iov_tail to vu_set_vnethdr() Laurent Vivier
@ 2026-03-09 9:47 ` Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 11/13] vu_common: Accept explicit iovec count per element in vu_init_elem() Laurent Vivier
` (2 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Laurent Vivier @ 2026-03-09 9:47 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier, David Gibson
Previously, vu_set_element() derived the number of iovec entries from
whether the pointer was NULL or not (using !!out_sg and !!in_sg). This
implicitly limited each virtqueue element to at most one iovec per
direction.
Change the function signature to accept explicit out_num and in_num
parameters, allowing callers to specify multiple iovec entries per
element when needed. Update all existing call sites to pass the
equivalent values (0 for NULL pointers, 1 for valid pointers).
No functional change.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp_vu.c | 4 ++--
vu_common.c | 5 +++--
vu_common.h | 13 ++++++++-----
3 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/tcp_vu.c b/tcp_vu.c
index 688f48905d46..1916c703a125 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -87,7 +87,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
hdrlen = tcp_vu_hdrlen(CONN_V6(conn));
- vu_set_element(&flags_elem[0], NULL, &flags_iov[0]);
+ vu_set_element(&flags_elem[0], 0, NULL, 1, &flags_iov[0]);
elem_cnt = vu_collect(vdev, vq, &flags_elem[0], 1,
hdrlen + sizeof(*opts), NULL);
@@ -151,7 +151,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
nb_ack = 1;
if (flags & DUP_ACK) {
- vu_set_element(&flags_elem[1], NULL, &flags_iov[1]);
+ vu_set_element(&flags_elem[1], 0, NULL, 1, &flags_iov[1]);
elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1,
hdrlen + optlen, NULL);
diff --git a/vu_common.c b/vu_common.c
index 3538e59581b7..12746db066c4 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -59,12 +59,13 @@ int vu_packet_check_range(struct vdev_memory *memory,
* @iov: Array of iovec to assign to virtqueue element
* @elem_cnt: Number of virtqueue element
*/
-void vu_init_elem(struct vu_virtq_element *elem, struct iovec *iov, int elem_cnt)
+void vu_init_elem(struct vu_virtq_element *elem, struct iovec *iov,
+ int elem_cnt)
{
int i;
for (i = 0; i < elem_cnt; i++)
- vu_set_element(&elem[i], NULL, &iov[i]);
+ vu_set_element(&elem[i], 0, NULL, 1, &iov[i]);
}
/**
diff --git a/vu_common.h b/vu_common.h
index d068c61695f8..0d4ccdceab69 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -38,15 +38,18 @@ static inline void *vu_payloadv6(void *base)
/**
* vu_set_element() - Initialize a vu_virtq_element
* @elem: Element to initialize
- * @out_sg: One out iovec entry to set in elem
- * @in_sg: One in iovec entry to set in elem
+ * @out_num: Number of outgoing iovec buffers
+ * @out_sg: Out iovec entry to set in elem
+ * @in_num: Number of incoming iovec buffers
+ * @in_sg: In iovec entry to set in elem
*/
static inline void vu_set_element(struct vu_virtq_element *elem,
- struct iovec *out_sg, struct iovec *in_sg)
+ unsigned int out_num, struct iovec *out_sg,
+ unsigned int in_num, struct iovec *in_sg)
{
- elem->out_num = !!out_sg;
+ elem->out_num = out_num;
elem->out_sg = out_sg;
- elem->in_num = !!in_sg;
+ elem->in_num = in_num;
elem->in_sg = in_sg;
}
--
2.53.0
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH v2 11/13] vu_common: Accept explicit iovec count per element in vu_init_elem()
2026-03-09 9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (9 preceding siblings ...)
2026-03-09 9:47 ` [PATCH v2 10/13] vu_common: Accept explicit iovec counts in vu_set_element() Laurent Vivier
@ 2026-03-09 9:47 ` Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 12/13] vu_common: Prepare to use multibuffer with guest RX Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 13/13] vhost-user,udp: Use 2 iovec entries per element Laurent Vivier
12 siblings, 0 replies; 14+ messages in thread
From: Laurent Vivier @ 2026-03-09 9:47 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier, David Gibson
Extend vu_init_elem() to accept an iov_per_elem parameter specifying
how many iovec entries to assign to each virtqueue element. The iov
array is now strided by iov_per_elem rather than 1.
Update all callers to pass 1, preserving existing behavior.
No functional change.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp_vu.c | 2 +-
udp_vu.c | 2 +-
vu_common.c | 19 ++++++++++---------
vu_common.h | 2 +-
4 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/tcp_vu.c b/tcp_vu.c
index 1916c703a125..c0e4589f8193 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -211,7 +211,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
hdrlen = tcp_vu_hdrlen(v6);
- vu_init_elem(elem, &iov_vu[DISCARD_IOV_NUM], ARRAY_SIZE(elem));
+ vu_init_elem(elem, &iov_vu[DISCARD_IOV_NUM], ARRAY_SIZE(elem), 1);
elem_cnt = 0;
*head_cnt = 0;
diff --git a/udp_vu.c b/udp_vu.c
index 414750ff742a..cb5274aa1d81 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -214,7 +214,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
size_t iov_cnt;
ssize_t dlen;
- vu_init_elem(elem, iov_vu, ARRAY_SIZE(elem));
+ vu_init_elem(elem, iov_vu, ARRAY_SIZE(elem), 1);
elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem),
IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL);
diff --git a/vu_common.c b/vu_common.c
index 12746db066c4..94bd8d789a91 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -54,18 +54,19 @@ int vu_packet_check_range(struct vdev_memory *memory,
}
/**
- * vu_init_elem() - initialize an array of virtqueue elements with 1 iov in each
- * @elem: Array of virtqueue elements to initialize
- * @iov: Array of iovec to assign to virtqueue element
- * @elem_cnt: Number of virtqueue element
+ * vu_init_elem() - Initialize an array of virtqueue elements
+ * @elem: Array of virtqueue elements to initialize
+ * @iov: Array of iovecs to assign to virtqueue elements
+ * @elem_cnt: Number of virtqueue elements
+ * @iov_per_elem: Number of iovecs per element
*/
void vu_init_elem(struct vu_virtq_element *elem, struct iovec *iov,
- int elem_cnt)
+ int elem_cnt, int iov_per_elem)
{
- int i;
+ int i, j;
- for (i = 0; i < elem_cnt; i++)
- vu_set_element(&elem[i], 0, NULL, 1, &iov[i]);
+ for (i = 0, j = 0; i < elem_cnt; i++, j += iov_per_elem)
+ vu_set_element(&elem[i], 0, NULL, iov_per_elem, &iov[j]);
}
/**
@@ -287,7 +288,7 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
return -1;
}
- vu_init_elem(elem, in_sg, ARRAY_SIZE(elem));
+ vu_init_elem(elem, in_sg, ARRAY_SIZE(elem), 1);
size += VNET_HLEN;
elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem), size, &total);
diff --git a/vu_common.h b/vu_common.h
index 0d4ccdceab69..c47e06885e3f 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -54,7 +54,7 @@ static inline void vu_set_element(struct vu_virtq_element *elem,
}
void vu_init_elem(struct vu_virtq_element *elem, struct iovec *iov,
- int elem_cnt);
+ int elem_cnt, int iov_per_elem);
int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int max_elem, size_t size,
size_t *collected);
--
2.53.0
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH v2 12/13] vu_common: Prepare to use multibuffer with guest RX
2026-03-09 9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (10 preceding siblings ...)
2026-03-09 9:47 ` [PATCH v2 11/13] vu_common: Accept explicit iovec count per element in vu_init_elem() Laurent Vivier
@ 2026-03-09 9:47 ` Laurent Vivier
2026-03-09 9:47 ` [PATCH v2 13/13] vhost-user,udp: Use 2 iovec entries per element Laurent Vivier
12 siblings, 0 replies; 14+ messages in thread
From: Laurent Vivier @ 2026-03-09 9:47 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier, David Gibson
1b95bd6fa114 ("vhost_user: fix multibuffer from linux") introduces
multibuffer with TX (from the guest), but with iPXE we need to handle
also multibuffer for RX (to the guest). This patch makes the parameter
generic and global.
No functional change.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
vu_common.c | 6 ++----
vu_common.h | 2 ++
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/vu_common.c b/vu_common.c
index 94bd8d789a91..3225aca53ea6 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -20,8 +20,6 @@
#include "migrate.h"
#include "epoll_ctl.h"
-#define VU_MAX_TX_BUFFER_NB 2
-
/**
* vu_packet_check_range() - Check if a given memory zone is contained in
* a mapped guest memory region
@@ -203,11 +201,11 @@ static void vu_handle_tx(struct vu_dev *vdev, int index,
count = 0;
out_sg_count = 0;
while (count < ARRAY_SIZE(elem) &&
- out_sg_count + VU_MAX_TX_BUFFER_NB <= VIRTQUEUE_MAX_SIZE) {
+ out_sg_count + IOV_PER_ELEM <= VIRTQUEUE_MAX_SIZE) {
int ret;
struct iov_tail data;
- elem[count].out_num = VU_MAX_TX_BUFFER_NB;
+ elem[count].out_num = IOV_PER_ELEM;
elem[count].out_sg = &out_sg[out_sg_count];
elem[count].in_num = 0;
elem[count].in_sg = NULL;
diff --git a/vu_common.h b/vu_common.h
index c47e06885e3f..47490d91a57d 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -15,6 +15,8 @@
#include "ip.h"
#include "virtio.h"
+#define IOV_PER_ELEM (2)
+
static inline void *vu_eth(void *base)
{
return ((char *)base + VNET_HLEN);
--
2.53.0
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH v2 13/13] vhost-user,udp: Use 2 iovec entries per element
2026-03-09 9:47 [PATCH v2 00/13] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (11 preceding siblings ...)
2026-03-09 9:47 ` [PATCH v2 12/13] vu_common: Prepare to use multibuffer with guest RX Laurent Vivier
@ 2026-03-09 9:47 ` Laurent Vivier
12 siblings, 0 replies; 14+ messages in thread
From: Laurent Vivier @ 2026-03-09 9:47 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
iPXE places the vnet header in one virtqueue descriptor and the payload
in another. When passt maps these descriptors, it needs two iovecs per
virtqueue element to handle this layout.
Without this, passt crashes with:
ASSERTION FAILED in virtqueue_map_desc (virtio.c:403): num_sg < max_num_sg
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
udp_vu.c | 8 ++++----
vu_common.c | 15 +++++++++++----
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/udp_vu.c b/udp_vu.c
index cb5274aa1d81..47659b0402fd 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -34,7 +34,7 @@
#include "vu_common.h"
static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE];
-static struct vu_virtq_element elem [VIRTQUEUE_MAX_SIZE];
+static struct vu_virtq_element elem [VIRTQUEUE_MAX_SIZE / IOV_PER_ELEM];
/**
* udp_vu_hdrlen() - Sum size of all headers, from UDP to virtio-net
@@ -214,20 +214,20 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
size_t iov_cnt;
ssize_t dlen;
- vu_init_elem(elem, iov_vu, ARRAY_SIZE(elem), 1);
+ vu_init_elem(elem, iov_vu, ARRAY_SIZE(elem), IOV_PER_ELEM);
elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem),
IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL);
if (elem_cnt == 0)
break;
- iov_cnt = elem_cnt;
+ iov_cnt = (size_t)elem_cnt * IOV_PER_ELEM;
dlen = udp_vu_sock_recv(iov_vu, &iov_cnt, s, v6);
if (dlen < 0) {
vu_queue_rewind(vq, elem_cnt);
break;
}
- elem_used = iov_cnt;
+ elem_used = DIV_ROUND_UP(iov_cnt, IOV_PER_ELEM);
/* release unused buffers */
vu_queue_rewind(vq, elem_cnt - elem_used);
diff --git a/vu_common.c b/vu_common.c
index 3225aca53ea6..a2867a293184 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -63,8 +63,15 @@ void vu_init_elem(struct vu_virtq_element *elem, struct iovec *iov,
{
int i, j;
- for (i = 0, j = 0; i < elem_cnt; i++, j += iov_per_elem)
+ for (i = 0, j = 0; i < elem_cnt; i++, j += iov_per_elem) {
+ int k;
+
+ for (k = 0; k < iov_per_elem; k++) {
+ iov[j + k].iov_base = NULL;
+ iov[j + k].iov_len = 0;
+ }
vu_set_element(&elem[i], 0, NULL, iov_per_elem, &iov[j]);
+ }
}
/**
@@ -272,7 +279,7 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
{
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
- struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
+ struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE / IOV_PER_ELEM];
struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
struct iov_tail data;
size_t total;
@@ -286,7 +293,7 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
return -1;
}
- vu_init_elem(elem, in_sg, ARRAY_SIZE(elem), 1);
+ vu_init_elem(elem, in_sg, ARRAY_SIZE(elem), IOV_PER_ELEM);
size += VNET_HLEN;
elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem), size, &total);
@@ -303,7 +310,7 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
}
elem_cnt = iov_truncate(in_sg, elem_cnt, size);
- data = IOV_TAIL(&in_sg[0], elem_cnt, 0);
+ data = IOV_TAIL(&in_sg[0], (size_t)(elem_cnt * IOV_PER_ELEM), 0);
vu_set_vnethdr(&data, elem_cnt);
size -= VNET_HLEN;
--
2.53.0
^ permalink raw reply [flat|nested] 14+ messages in thread