* [PATCH v5 1/8] iov: Introduce iov_memset()
2026-03-27 17:58 [PATCH v5 0/8] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
@ 2026-03-27 17:58 ` Laurent Vivier
2026-03-27 17:58 ` [PATCH v5 2/8] vu_common: Move vnethdr setup into vu_flush() Laurent Vivier
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Laurent Vivier @ 2026-03-27 17:58 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Add a helper to set a range of bytes across an IO vector to a given
value, similar to memset() but operating over scatter-gather buffers.
It skips to the given offset and fills across iovec entries up to the
requested length.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
iov.c | 27 +++++++++++++++++++++++++++
iov.h | 2 ++
2 files changed, 29 insertions(+)
diff --git a/iov.c b/iov.c
index ae0743931d18..0188acdf5eba 100644
--- a/iov.c
+++ b/iov.c
@@ -170,6 +170,33 @@ size_t iov_truncate(struct iovec *iov, size_t iov_cnt, size_t size)
return i;
}
+/**
+ * iov_memset() - Set bytes of an IO vector to a given value
+ * @iov: IO vector
+ * @iov_cnt: Number of elements in @iov
+ * @offset: Byte offset in the iovec at which to start
+ * @c: Byte value to fill with
+ * @length: Number of bytes to set
+ * Will write less than @length bytes if it runs out of space in
+ * the iov
+ */
+/* cppcheck-suppress unusedFunction */
+void iov_memset(const struct iovec *iov, size_t iov_cnt, size_t offset, int c,
+ size_t length)
+{
+ size_t i;
+
+ i = iov_skip_bytes(iov, iov_cnt, offset, &offset);
+
+ for ( ; i < iov_cnt && length; i++) {
+ size_t n = MIN(iov[i].iov_len - offset, length);
+
+ memset((char *)iov[i].iov_base + offset, c, n);
+ offset = 0;
+ length -= n;
+ }
+}
+
/**
* iov_tail_prune() - Remove any unneeded buffers from an IOV tail
* @tail: IO vector tail (modified)
diff --git a/iov.h b/iov.h
index b4e50b0fca5a..d295d05b3bab 100644
--- a/iov.h
+++ b/iov.h
@@ -30,6 +30,8 @@ size_t iov_to_buf(const struct iovec *iov, size_t iov_cnt,
size_t offset, void *buf, size_t bytes);
size_t iov_size(const struct iovec *iov, size_t iov_cnt);
size_t iov_truncate(struct iovec *iov, size_t iov_cnt, size_t size);
+void iov_memset(const struct iovec *iov, size_t iov_cnt, size_t offset, int c,
+ size_t length);
/*
* DOC: Theory of Operation, struct iov_tail
--
2.53.0
^ permalink raw reply [flat|nested] 9+ messages in thread* [PATCH v5 2/8] vu_common: Move vnethdr setup into vu_flush()
2026-03-27 17:58 [PATCH v5 0/8] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
2026-03-27 17:58 ` [PATCH v5 1/8] iov: Introduce iov_memset() Laurent Vivier
@ 2026-03-27 17:58 ` Laurent Vivier
2026-03-27 17:58 ` [PATCH v5 3/8] vhost-user: Centralise Ethernet frame padding in vu_collect(), vu_pad() and vu_flush() Laurent Vivier
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Laurent Vivier @ 2026-03-27 17:58 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Every caller of vu_flush() was calling vu_set_vnethdr() beforehand with
the same pattern. Move it into vu_flush().
Remove vu_queue_notify() from vu_flush() and let callers invoke it
explicitly. This allows paths that perform multiple flushes, such as
tcp_vu_send_flag() and tcp_vu_data_from_sock(), to issue a single guest
notification at the end.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
tcp_vu.c | 19 ++++++++-----------
udp_vu.c | 3 +--
vu_common.c | 9 +++++----
vu_common.h | 1 -
4 files changed, 14 insertions(+), 18 deletions(-)
diff --git a/tcp_vu.c b/tcp_vu.c
index dc0e17c0f03f..0cd01190d612 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -82,7 +82,6 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
struct ethhdr *eh;
uint32_t seq;
int elem_cnt;
- int nb_ack;
int ret;
hdrlen = tcp_vu_hdrlen(CONN_V6(conn));
@@ -97,8 +96,6 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
assert(flags_elem[0].in_sg[0].iov_len >=
MAX(hdrlen + sizeof(*opts), ETH_ZLEN + VNET_HLEN));
- vu_set_vnethdr(flags_elem[0].in_sg[0].iov_base, 1);
-
eh = vu_eth(flags_elem[0].in_sg[0].iov_base);
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
@@ -143,9 +140,10 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
l2len = optlen + hdrlen - VNET_HLEN;
vu_pad(&flags_elem[0].in_sg[0], l2len);
+ vu_flush(vdev, vq, flags_elem, 1);
+
if (*c->pcap)
pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN);
- nb_ack = 1;
if (flags & DUP_ACK) {
elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1,
@@ -157,14 +155,14 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
memcpy(flags_elem[1].in_sg[0].iov_base,
flags_elem[0].in_sg[0].iov_base,
flags_elem[0].in_sg[0].iov_len);
- nb_ack++;
+
+ vu_flush(vdev, vq, &flags_elem[1], 1);
if (*c->pcap)
pcap_iov(&flags_elem[1].in_sg[0], 1, VNET_HLEN);
}
}
-
- vu_flush(vdev, vq, flags_elem, nb_ack);
+ vu_queue_notify(vdev, vq);
return 0;
}
@@ -451,7 +449,6 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
assert(frame_size >= hdrlen);
dlen = frame_size - hdrlen;
- vu_set_vnethdr(iov->iov_base, buf_cnt);
/* The IPv4 header checksum varies only with dlen */
if (previous_dlen != dlen)
@@ -464,14 +461,14 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
l2len = dlen + hdrlen - VNET_HLEN;
vu_pad(iov, l2len);
+ vu_flush(vdev, vq, &elem[head[i]], buf_cnt);
+
if (*c->pcap)
pcap_iov(iov, buf_cnt, VNET_HLEN);
conn->seq_to_tap += dlen;
}
-
- /* send packets */
- vu_flush(vdev, vq, elem, iov_cnt);
+ vu_queue_notify(vdev, vq);
conn_flag(c, conn, ACK_FROM_TAP_DUE);
diff --git a/udp_vu.c b/udp_vu.c
index cc69654398f0..f8629af58ab5 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -124,8 +124,6 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
l2len = *dlen + hdrlen - VNET_HLEN;
vu_pad(&iov_vu[0], l2len);
- vu_set_vnethdr(iov_vu[0].iov_base, elem_used);
-
/* release unused buffers */
vu_queue_rewind(vq, elem_cnt - elem_used);
@@ -230,6 +228,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
pcap_iov(iov_vu, iov_used, VNET_HLEN);
}
vu_flush(vdev, vq, elem, iov_used);
+ vu_queue_notify(vdev, vq);
}
}
}
diff --git a/vu_common.c b/vu_common.c
index 92381cd33c85..7627fad5976b 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -118,7 +118,8 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
* @vnethdr: Address of the header to set
* @num_buffers: Number of guest buffers of the frame
*/
-void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr, int num_buffers)
+static void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr,
+ int num_buffers)
{
vnethdr->hdr = VU_HEADER;
/* Note: if VIRTIO_NET_F_MRG_RXBUF is not negotiated,
@@ -139,6 +140,8 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
{
int i;
+ vu_set_vnethdr(elem[0].in_sg[0].iov_base, elem_cnt);
+
for (i = 0; i < elem_cnt; i++) {
size_t elem_size = iov_size(elem[i].in_sg, elem[i].in_num);
@@ -146,7 +149,6 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
}
vu_queue_flush(vdev, vq, elem_cnt);
- vu_queue_notify(vdev, vq);
}
/**
@@ -260,8 +262,6 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
goto err;
}
- vu_set_vnethdr(in_sg[0].iov_base, elem_cnt);
-
total -= VNET_HLEN;
/* copy data from the buffer to the iovec */
@@ -271,6 +271,7 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
pcap_iov(in_sg, in_total, VNET_HLEN);
vu_flush(vdev, vq, elem, elem_cnt);
+ vu_queue_notify(vdev, vq);
trace("vhost-user sent %zu", total);
diff --git a/vu_common.h b/vu_common.h
index 7b060eb6184f..4037ab765b7d 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -39,7 +39,6 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int max_elem,
struct iovec *in_sg, size_t max_in_sg, size_t *in_total,
size_t size, size_t *collected);
-void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr, int num_buffers);
void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int elem_cnt);
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
--
2.53.0
^ permalink raw reply [flat|nested] 9+ messages in thread* [PATCH v5 3/8] vhost-user: Centralise Ethernet frame padding in vu_collect(), vu_pad() and vu_flush()
2026-03-27 17:58 [PATCH v5 0/8] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
2026-03-27 17:58 ` [PATCH v5 1/8] iov: Introduce iov_memset() Laurent Vivier
2026-03-27 17:58 ` [PATCH v5 2/8] vu_common: Move vnethdr setup into vu_flush() Laurent Vivier
@ 2026-03-27 17:58 ` Laurent Vivier
2026-03-27 17:58 ` [PATCH v5 4/8] udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller Laurent Vivier
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Laurent Vivier @ 2026-03-27 17:58 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
The per-protocol padding done by vu_pad() in tcp_vu.c and udp_vu.c was
only correct for single-buffer frames, and assumed the padding area always
fell within the first iov. It also relied on each caller computing the
right MAX(..., ETH_ZLEN + VNET_HLEN) size for vu_collect() and calling
vu_pad() at the right point.
Centralise padding logic into three shared vhost-user helpers instead:
- vu_collect() now ensures at least ETH_ZLEN + VNET_HLEN bytes of buffer
space are collected, so there is always room for a minimum-sized frame.
- vu_pad() replaces the old single-iov helper with a new implementation
that takes a full iovec array plus a 'skipped' byte count. It uses a
new iov_memset() helper in iov.c to zero-fill the padding area across
iovec boundaries, then calls iov_truncate() to set the logical frame
size.
- vu_flush() computes the actual frame length (accounting for
VIRTIO_NET_F_MRG_RXBUF multi-buffer frames) and passes the padded
length to vu_queue_fill().
Callers in tcp_vu.c, udp_vu.c and vu_send_single() now use the new
vu_pad() in place of the old pad-then-truncate sequences and the
MAX(..., ETH_ZLEN + VNET_HLEN) size calculations passed to vu_collect().
Centralising padding here will also ease the move to multi-iovec per
element support, since there will be a single place to update.
In vu_send_single(), fix padding, truncation and data copy to use the
requested frame size rather than the total available buffer space from
vu_collect(), which could be larger. Also add matching padding, truncation
and explicit size to vu_collect() for the DUP_ACK path in
tcp_vu_send_flag().
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
iov.c | 1 -
tcp_vu.c | 23 ++++++-------------
udp_vu.c | 9 ++------
vu_common.c | 63 ++++++++++++++++++++++++++++++++++-------------------
vu_common.h | 2 +-
5 files changed, 50 insertions(+), 48 deletions(-)
diff --git a/iov.c b/iov.c
index 0188acdf5eba..8134b8c9f988 100644
--- a/iov.c
+++ b/iov.c
@@ -180,7 +180,6 @@ size_t iov_truncate(struct iovec *iov, size_t iov_cnt, size_t size)
* Will write less than @length bytes if it runs out of space in
* the iov
*/
-/* cppcheck-suppress unusedFunction */
void iov_memset(const struct iovec *iov, size_t iov_cnt, size_t offset, int c,
size_t length)
{
diff --git a/tcp_vu.c b/tcp_vu.c
index 0cd01190d612..7d3285152ad9 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -72,12 +72,12 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
struct vu_virtq_element flags_elem[2];
- size_t optlen, hdrlen, l2len;
struct ipv6hdr *ip6h = NULL;
struct iphdr *ip4h = NULL;
struct iovec flags_iov[2];
struct tcp_syn_opts *opts;
struct iov_tail payload;
+ size_t optlen, hdrlen;
struct tcphdr *th;
struct ethhdr *eh;
uint32_t seq;
@@ -88,7 +88,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
elem_cnt = vu_collect(vdev, vq, &flags_elem[0], 1,
&flags_iov[0], 1, NULL,
- MAX(hdrlen + sizeof(*opts), ETH_ZLEN + VNET_HLEN), NULL);
+ hdrlen + sizeof(*opts), NULL);
if (elem_cnt != 1)
return -1;
@@ -128,7 +128,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
return ret;
}
- iov_truncate(&flags_iov[0], 1, hdrlen + optlen);
+ vu_pad(&flags_iov[0], 1, 0, hdrlen + optlen);
payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen);
if (flags & KEEPALIVE)
@@ -137,9 +137,6 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload,
NULL, seq, !*c->pcap);
- l2len = optlen + hdrlen - VNET_HLEN;
- vu_pad(&flags_elem[0].in_sg[0], l2len);
-
vu_flush(vdev, vq, flags_elem, 1);
if (*c->pcap)
@@ -148,14 +145,14 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
if (flags & DUP_ACK) {
elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1,
&flags_iov[1], 1, NULL,
- flags_elem[0].in_sg[0].iov_len, NULL);
+ hdrlen + optlen, NULL);
if (elem_cnt == 1 &&
flags_elem[1].in_sg[0].iov_len >=
flags_elem[0].in_sg[0].iov_len) {
+ vu_pad(&flags_iov[1], 1, 0, hdrlen + optlen);
memcpy(flags_elem[1].in_sg[0].iov_base,
flags_elem[0].in_sg[0].iov_base,
flags_elem[0].in_sg[0].iov_len);
-
vu_flush(vdev, vq, &flags_elem[1], 1);
if (*c->pcap)
@@ -211,7 +208,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
ARRAY_SIZE(elem) - elem_cnt,
&iov_vu[DISCARD_IOV_NUM + iov_used],
VIRTQUEUE_MAX_SIZE - iov_used, &in_total,
- MAX(MIN(mss, fillsize) + hdrlen, ETH_ZLEN + VNET_HLEN),
+ MIN(mss, fillsize) + hdrlen,
&frame_size);
if (cnt == 0)
break;
@@ -247,8 +244,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
if (!peek_offset_cap)
ret -= already_sent;
- /* adjust iov number and length of the last iov */
- i = iov_truncate(&iov_vu[DISCARD_IOV_NUM], iov_used, ret);
+ i = vu_pad(&iov_vu[DISCARD_IOV_NUM], iov_used, hdrlen, ret);
/* adjust head count */
while (*head_cnt > 0 && head[*head_cnt - 1] >= i)
@@ -444,7 +440,6 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
size_t frame_size = iov_size(iov, buf_cnt);
bool push = i == head_cnt - 1;
ssize_t dlen;
- size_t l2len;
assert(frame_size >= hdrlen);
@@ -457,10 +452,6 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push);
- /* Pad first/single buffer only, it's at least ETH_ZLEN long */
- l2len = dlen + hdrlen - VNET_HLEN;
- vu_pad(iov, l2len);
-
vu_flush(vdev, vq, &elem[head[i]], buf_cnt);
if (*c->pcap)
diff --git a/udp_vu.c b/udp_vu.c
index f8629af58ab5..537e9c92cfa6 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -73,8 +73,7 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
const struct vu_dev *vdev = c->vdev;
int elem_cnt, elem_used, iov_used;
struct msghdr msg = { 0 };
- size_t hdrlen, l2len;
- size_t iov_cnt;
+ size_t iov_cnt, hdrlen;
assert(!c->no_udp);
@@ -117,13 +116,9 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen;
iov_vu[0].iov_len += hdrlen;
- iov_used = iov_truncate(iov_vu, iov_cnt, *dlen + hdrlen);
+ iov_used = vu_pad(iov_vu, iov_cnt, 0, *dlen + hdrlen);
elem_used = iov_used; /* one iovec per element */
- /* pad frame to 60 bytes: first buffer is at least ETH_ZLEN long */
- l2len = *dlen + hdrlen - VNET_HLEN;
- vu_pad(&iov_vu[0], l2len);
-
/* release unused buffers */
vu_queue_rewind(vq, elem_cnt - elem_used);
diff --git a/vu_common.c b/vu_common.c
index 7627fad5976b..3bc6f1f42a8e 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -74,6 +74,7 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
size_t current_iov = 0;
int elem_cnt = 0;
+ size = MAX(size, ETH_ZLEN + VNET_HLEN); /* Ethernet minimum size */
while (current_size < size && elem_cnt < max_elem &&
current_iov < max_in_sg) {
int ret;
@@ -113,13 +114,31 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
return elem_cnt;
}
+/**
+ * vu_pad() - Pad short frames to minimum Ethernet length and truncate iovec
+ * @iov: Pointer to iovec array
+ * @cnt: Number of entries in @iov
+ * @skipped: Bytes already accounted for in the frame but not in @iov
+ * @size: Data length in @iov
+ *
+ * Return: number of iovec entries after truncation
+ */
+size_t vu_pad(struct iovec *iov, size_t cnt, size_t skipped, size_t size)
+{
+ if (skipped + size < ETH_ZLEN + VNET_HLEN) {
+ iov_memset(iov, cnt, size, 0,
+ ETH_ZLEN + VNET_HLEN - (skipped + size));
+ }
+
+ return iov_truncate(iov, cnt, size);
+}
+
/**
* vu_set_vnethdr() - set virtio-net headers
* @vnethdr: Address of the header to set
* @num_buffers: Number of guest buffers of the frame
*/
-static void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr,
- int num_buffers)
+static void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr, int num_buffers)
{
vnethdr->hdr = VU_HEADER;
/* Note: if VIRTIO_NET_F_MRG_RXBUF is not negotiated,
@@ -138,15 +157,25 @@ static void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr,
void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int elem_cnt)
{
+ size_t len, padding, elem_size;
int i;
vu_set_vnethdr(elem[0].in_sg[0].iov_base, elem_cnt);
- for (i = 0; i < elem_cnt; i++) {
- size_t elem_size = iov_size(elem[i].in_sg, elem[i].in_num);
-
+ len = 0;
+ for (i = 0; i < elem_cnt - 1; i++) {
+ elem_size = iov_size(elem[i].in_sg, elem[i].in_num);
vu_queue_fill(vdev, vq, &elem[i], elem_size, i);
+ len += elem_size;
}
+ /* pad the last element to have an Ethernet minimum size */
+ elem_size = iov_size(elem[i].in_sg, elem[i].in_num);
+ if (ETH_ZLEN + VNET_HLEN > len + elem_size)
+ padding = ETH_ZLEN + VNET_HLEN - (len + elem_size);
+ else
+ padding = 0;
+
+ vu_queue_fill(vdev, vq, &elem[i], elem_size + padding, i);
vu_queue_flush(vdev, vq, elem_cnt);
}
@@ -262,10 +291,12 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
goto err;
}
- total -= VNET_HLEN;
+ in_total = vu_pad(in_sg, in_total, 0, size);
+
+ size -= VNET_HLEN;
/* copy data from the buffer to the iovec */
- iov_from_buf(in_sg, in_total, VNET_HLEN, buf, total);
+ iov_from_buf(in_sg, in_total, VNET_HLEN, buf, size);
if (*c->pcap)
pcap_iov(in_sg, in_total, VNET_HLEN);
@@ -273,26 +304,12 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
vu_flush(vdev, vq, elem, elem_cnt);
vu_queue_notify(vdev, vq);
- trace("vhost-user sent %zu", total);
+ trace("vhost-user sent %zu", size);
- return total;
+ return size;
err:
for (i = 0; i < elem_cnt; i++)
vu_queue_detach_element(vq);
return -1;
}
-
-/**
- * vu_pad() - Pad 802.3 frame to minimum length (60 bytes) if needed
- * @iov: Buffer in iovec array where end of 802.3 frame is stored
- * @l2len: Layer-2 length already filled in frame
- */
-void vu_pad(struct iovec *iov, size_t l2len)
-{
- if (l2len >= ETH_ZLEN)
- return;
-
- memset((char *)iov->iov_base + iov->iov_len, 0, ETH_ZLEN - l2len);
- iov->iov_len += ETH_ZLEN - l2len;
-}
diff --git a/vu_common.h b/vu_common.h
index 4037ab765b7d..13e0126fb16c 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -39,11 +39,11 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int max_elem,
struct iovec *in_sg, size_t max_in_sg, size_t *in_total,
size_t size, size_t *collected);
+size_t vu_pad(struct iovec *iov, size_t cnt, size_t skipped, size_t size);
void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int elem_cnt);
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
const struct timespec *now);
int vu_send_single(const struct ctx *c, const void *buf, size_t size);
-void vu_pad(struct iovec *iov, size_t l2len);
#endif /* VU_COMMON_H */
--
2.53.0
^ permalink raw reply [flat|nested] 9+ messages in thread* [PATCH v5 4/8] udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller
2026-03-27 17:58 [PATCH v5 0/8] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (2 preceding siblings ...)
2026-03-27 17:58 ` [PATCH v5 3/8] vhost-user: Centralise Ethernet frame padding in vu_collect(), vu_pad() and vu_flush() Laurent Vivier
@ 2026-03-27 17:58 ` Laurent Vivier
2026-03-27 17:58 ` [PATCH v5 5/8] udp_vu: Pass iov explicitly to helpers instead of using file-scoped array Laurent Vivier
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Laurent Vivier @ 2026-03-27 17:58 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
udp_vu_sock_recv() currently mixes two concerns: receiving data from the
socket and managing virtqueue buffers (collecting, rewinding, releasing).
This makes the function harder to reason about and couples socket I/O
with virtqueue state.
Move all virtqueue operations, vu_collect(), vu_init_elem(),
vu_queue_rewind(), vu_set_vnethdr(), and the queue-readiness check, into
udp_vu_sock_to_tap(), which is the only caller. This turns
udp_vu_sock_recv() into a pure socket receive function that simply reads
into the provided iov array and adjusts its length.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
udp_vu.c | 96 +++++++++++++++++++++++++++++---------------------------
1 file changed, 49 insertions(+), 47 deletions(-)
diff --git a/udp_vu.c b/udp_vu.c
index 537e9c92cfa6..90d5cd71bab2 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -58,45 +58,22 @@ static size_t udp_vu_hdrlen(bool v6)
/**
* udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers
- * @c: Execution context
- * @vq: virtqueue to use to receive data
* @s: Socket to receive from
* @v6: Set for IPv6 connections
- * @dlen: Size of received data (output)
+ * @iov_cnt: Number of collected iov in iov_vu (input)
+ * Number of iov entries used to store the datagram (output)
*
- * Return: number of iov entries used to store the datagram, 0 if the datagram
- * was discarded because the virtqueue is not ready, -1 on error
+ * Return: size of received data, -1 on error
*/
-static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
- bool v6, ssize_t *dlen)
+static ssize_t udp_vu_sock_recv(int s, bool v6, size_t *iov_cnt)
{
- const struct vu_dev *vdev = c->vdev;
- int elem_cnt, elem_used, iov_used;
struct msghdr msg = { 0 };
- size_t iov_cnt, hdrlen;
-
- assert(!c->no_udp);
-
- if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
- debug("Got UDP packet, but RX virtqueue not usable yet");
-
- if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)
- debug_perror("Failed to discard datagram");
-
- return 0;
- }
+ size_t hdrlen;
+ ssize_t dlen;
/* compute L2 header length */
hdrlen = udp_vu_hdrlen(v6);
- elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem),
- iov_vu, ARRAY_SIZE(iov_vu), &iov_cnt,
- IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL);
- if (elem_cnt == 0)
- return -1;
-
- assert((size_t)elem_cnt == iov_cnt); /* one iovec per element */
-
/* reserve space for the headers */
assert(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN));
iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen;
@@ -104,25 +81,19 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
/* read data from the socket */
msg.msg_iov = iov_vu;
- msg.msg_iovlen = iov_cnt;
+ msg.msg_iovlen = *iov_cnt;
- *dlen = recvmsg(s, &msg, 0);
- if (*dlen < 0) {
- vu_queue_rewind(vq, elem_cnt);
+ dlen = recvmsg(s, &msg, 0);
+ if (dlen < 0)
return -1;
- }
/* restore the pointer to the headers address */
iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen;
iov_vu[0].iov_len += hdrlen;
- iov_used = vu_pad(iov_vu, iov_cnt, 0, *dlen + hdrlen);
- elem_used = iov_used; /* one iovec per element */
+ *iov_cnt = vu_pad(iov_vu, *iov_cnt, 0, dlen + hdrlen);
- /* release unused buffers */
- vu_queue_rewind(vq, elem_cnt - elem_used);
-
- return iov_used;
+ return dlen;
}
/**
@@ -208,21 +179,52 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
int i;
+ assert(!c->no_udp);
+
+ if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
+ struct msghdr msg = { 0 };
+
+ debug("Got UDP packet, but RX virtqueue not usable yet");
+
+ for (i = 0; i < n; i++) {
+ if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)
+ debug_perror("Failed to discard datagram");
+ }
+
+ return;
+ }
+
for (i = 0; i < n; i++) {
+ unsigned elem_cnt, elem_used;
+ size_t iov_cnt;
ssize_t dlen;
- int iov_used;
- iov_used = udp_vu_sock_recv(c, vq, s, v6, &dlen);
- if (iov_used < 0)
+ elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem),
+ iov_vu, ARRAY_SIZE(iov_vu), &iov_cnt,
+ IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL);
+ if (elem_cnt == 0)
+ break;
+
+ assert((size_t)elem_cnt == iov_cnt); /* one iovec per element */
+
+ dlen = udp_vu_sock_recv(s, v6, &iov_cnt);
+ if (dlen < 0) {
+ vu_queue_rewind(vq, iov_cnt);
break;
+ }
+
+ elem_used = iov_cnt; /* one iovec per element */
+
+ /* release unused buffers */
+ vu_queue_rewind(vq, elem_cnt - elem_used);
- if (iov_used > 0) {
+ if (iov_cnt > 0) {
udp_vu_prepare(c, toside, dlen);
if (*c->pcap) {
- udp_vu_csum(toside, iov_used);
- pcap_iov(iov_vu, iov_used, VNET_HLEN);
+ udp_vu_csum(toside, iov_cnt);
+ pcap_iov(iov_vu, iov_cnt, VNET_HLEN);
}
- vu_flush(vdev, vq, elem, iov_used);
+ vu_flush(vdev, vq, elem, iov_cnt);
vu_queue_notify(vdev, vq);
}
}
--
2.53.0
^ permalink raw reply [flat|nested] 9+ messages in thread* [PATCH v5 5/8] udp_vu: Pass iov explicitly to helpers instead of using file-scoped array
2026-03-27 17:58 [PATCH v5 0/8] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (3 preceding siblings ...)
2026-03-27 17:58 ` [PATCH v5 4/8] udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller Laurent Vivier
@ 2026-03-27 17:58 ` Laurent Vivier
2026-03-27 17:58 ` [PATCH v5 6/8] udp_vu: Allow virtqueue elements with multiple iovec entries Laurent Vivier
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Laurent Vivier @ 2026-03-27 17:58 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
udp_vu_sock_recv(), udp_vu_prepare(), and udp_vu_csum() all operated on
the file-scoped iov_vu[] array directly. Pass iov and count as explicit
parameters instead, and move iov_vu[] and elem[] to function-local
statics in udp_vu_sock_to_tap(), the only function that needs them.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
udp_vu.c | 62 +++++++++++++++++++++++++++++---------------------------
1 file changed, 32 insertions(+), 30 deletions(-)
diff --git a/udp_vu.c b/udp_vu.c
index 90d5cd71bab2..d9de97ac5e13 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -33,9 +33,6 @@
#include "udp_vu.h"
#include "vu_common.h"
-static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE];
-static struct vu_virtq_element elem [VIRTQUEUE_MAX_SIZE];
-
/**
* udp_vu_hdrlen() - Sum size of all headers, from UDP to virtio-net
* @v6: Set for IPv6 packet
@@ -58,14 +55,14 @@ static size_t udp_vu_hdrlen(bool v6)
/**
* udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers
+ * @iov: IO vector for the frame (in/out)
+ * @cnt: Number of IO vector entries (in/out)
* @s: Socket to receive from
* @v6: Set for IPv6 connections
- * @iov_cnt: Number of collected iov in iov_vu (input)
- * Number of iov entries used to store the datagram (output)
*
* Return: size of received data, -1 on error
*/
-static ssize_t udp_vu_sock_recv(int s, bool v6, size_t *iov_cnt)
+static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6)
{
struct msghdr msg = { 0 };
size_t hdrlen;
@@ -75,23 +72,23 @@ static ssize_t udp_vu_sock_recv(int s, bool v6, size_t *iov_cnt)
hdrlen = udp_vu_hdrlen(v6);
/* reserve space for the headers */
- assert(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN));
- iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen;
- iov_vu[0].iov_len -= hdrlen;
+ assert(iov[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN));
+ iov[0].iov_base = (char *)iov[0].iov_base + hdrlen;
+ iov[0].iov_len -= hdrlen;
/* read data from the socket */
- msg.msg_iov = iov_vu;
- msg.msg_iovlen = *iov_cnt;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = *cnt;
dlen = recvmsg(s, &msg, 0);
if (dlen < 0)
return -1;
/* restore the pointer to the headers address */
- iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen;
- iov_vu[0].iov_len += hdrlen;
+ iov[0].iov_base = (char *)iov[0].iov_base - hdrlen;
+ iov[0].iov_len += hdrlen;
- *iov_cnt = vu_pad(iov_vu, *iov_cnt, 0, dlen + hdrlen);
+ *cnt = vu_pad(iov, *cnt, 0, dlen + hdrlen);
return dlen;
}
@@ -99,27 +96,28 @@ static ssize_t udp_vu_sock_recv(int s, bool v6, size_t *iov_cnt)
/**
* udp_vu_prepare() - Prepare the packet header
* @c: Execution context
+ * @iov: IO vector for the frame (including vnet header)
* @toside: Address information for one side of the flow
* @dlen: Packet data length
*
* Return: Layer-4 length
*/
-static size_t udp_vu_prepare(const struct ctx *c,
+static size_t udp_vu_prepare(const struct ctx *c, const struct iovec *iov,
const struct flowside *toside, ssize_t dlen)
{
struct ethhdr *eh;
size_t l4len;
/* ethernet header */
- eh = vu_eth(iov_vu[0].iov_base);
+ eh = vu_eth(iov[0].iov_base);
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
/* initialize header */
if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
- struct iphdr *iph = vu_ip(iov_vu[0].iov_base);
- struct udp_payload_t *bp = vu_payloadv4(iov_vu[0].iov_base);
+ struct iphdr *iph = vu_ip(iov[0].iov_base);
+ struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base);
eh->h_proto = htons(ETH_P_IP);
@@ -127,8 +125,8 @@ static size_t udp_vu_prepare(const struct ctx *c,
l4len = udp_update_hdr4(iph, bp, toside, dlen, true);
} else {
- struct ipv6hdr *ip6h = vu_ip(iov_vu[0].iov_base);
- struct udp_payload_t *bp = vu_payloadv6(iov_vu[0].iov_base);
+ struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base);
+ struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base);
eh->h_proto = htons(ETH_P_IPV6);
@@ -143,23 +141,25 @@ static size_t udp_vu_prepare(const struct ctx *c,
/**
* udp_vu_csum() - Calculate and set checksum for a UDP packet
* @toside: Address information for one side of the flow
- * @iov_used: Number of used iov_vu items
+ * @iov: IO vector for the frame
+ * @cnt: Number of IO vector entries
*/
-static void udp_vu_csum(const struct flowside *toside, int iov_used)
+static void udp_vu_csum(const struct flowside *toside, const struct iovec *iov,
+ size_t cnt)
{
const struct in_addr *src4 = inany_v4(&toside->oaddr);
const struct in_addr *dst4 = inany_v4(&toside->eaddr);
- char *base = iov_vu[0].iov_base;
+ char *base = iov[0].iov_base;
struct udp_payload_t *bp;
struct iov_tail data;
if (src4 && dst4) {
bp = vu_payloadv4(base);
- data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base);
+ data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
csum_udp4(&bp->uh, *src4, *dst4, &data);
} else {
bp = vu_payloadv6(base);
- data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base);
+ data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data);
}
}
@@ -174,7 +174,9 @@ static void udp_vu_csum(const struct flowside *toside, int iov_used)
void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
{
const struct flowside *toside = flowside_at_sidx(tosidx);
+ static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
+ static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE];
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
int i;
@@ -207,9 +209,9 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
assert((size_t)elem_cnt == iov_cnt); /* one iovec per element */
- dlen = udp_vu_sock_recv(s, v6, &iov_cnt);
+ dlen = udp_vu_sock_recv(iov_vu, &iov_cnt, s, v6);
if (dlen < 0) {
- vu_queue_rewind(vq, iov_cnt);
+ vu_queue_rewind(vq, elem_cnt);
break;
}
@@ -219,12 +221,12 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
vu_queue_rewind(vq, elem_cnt - elem_used);
if (iov_cnt > 0) {
- udp_vu_prepare(c, toside, dlen);
+ udp_vu_prepare(c, iov_vu, toside, dlen);
if (*c->pcap) {
- udp_vu_csum(toside, iov_cnt);
+ udp_vu_csum(toside, iov_vu, iov_cnt);
pcap_iov(iov_vu, iov_cnt, VNET_HLEN);
}
- vu_flush(vdev, vq, elem, iov_cnt);
+ vu_flush(vdev, vq, elem, elem_used);
vu_queue_notify(vdev, vq);
}
}
--
2.53.0
^ permalink raw reply [flat|nested] 9+ messages in thread* [PATCH v5 6/8] udp_vu: Allow virtqueue elements with multiple iovec entries
2026-03-27 17:58 [PATCH v5 0/8] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (4 preceding siblings ...)
2026-03-27 17:58 ` [PATCH v5 5/8] udp_vu: Pass iov explicitly to helpers instead of using file-scoped array Laurent Vivier
@ 2026-03-27 17:58 ` Laurent Vivier
2026-03-27 17:58 ` [PATCH v5 7/8] iov: Introduce IOV_PUSH_HEADER() macro Laurent Vivier
2026-03-27 17:58 ` [PATCH v5 8/8] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6() Laurent Vivier
7 siblings, 0 replies; 9+ messages in thread
From: Laurent Vivier @ 2026-03-27 17:58 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
The previous code assumed a 1:1 mapping between virtqueue elements and
iovec entries (enforced by an assert). Drop that assumption to allow
elements that span multiple iovecs: track elem_used separately by
walking the element list against the iov count returned after padding.
This also fixes vu_queue_rewind() and vu_flush() to use the element
count rather than the iov count.
Use iov_tail_clone() in udp_vu_sock_recv() to handle header offset,
replacing the manual base/len adjustment and restore pattern.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
udp_vu.c | 29 ++++++++++++++---------------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/udp_vu.c b/udp_vu.c
index d9de97ac5e13..ea536e2ad240 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -64,30 +64,25 @@ static size_t udp_vu_hdrlen(bool v6)
*/
static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6)
{
+ struct iovec msg_iov[*cnt];
struct msghdr msg = { 0 };
+ struct iov_tail payload;
size_t hdrlen;
ssize_t dlen;
/* compute L2 header length */
hdrlen = udp_vu_hdrlen(v6);
- /* reserve space for the headers */
- assert(iov[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN));
- iov[0].iov_base = (char *)iov[0].iov_base + hdrlen;
- iov[0].iov_len -= hdrlen;
+ payload = IOV_TAIL(iov, *cnt, hdrlen);
- /* read data from the socket */
- msg.msg_iov = iov;
- msg.msg_iovlen = *cnt;
+ msg.msg_iov = msg_iov;
+ msg.msg_iovlen = iov_tail_clone(msg.msg_iov, payload.cnt, &payload);
+ /* read data from the socket */
dlen = recvmsg(s, &msg, 0);
if (dlen < 0)
return -1;
- /* restore the pointer to the headers address */
- iov[0].iov_base = (char *)iov[0].iov_base - hdrlen;
- iov[0].iov_len += hdrlen;
-
*cnt = vu_pad(iov, *cnt, 0, dlen + hdrlen);
return dlen;
@@ -197,7 +192,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
}
for (i = 0; i < n; i++) {
- unsigned elem_cnt, elem_used;
+ unsigned elem_cnt, elem_used, j, k;
size_t iov_cnt;
ssize_t dlen;
@@ -207,15 +202,19 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
if (elem_cnt == 0)
break;
- assert((size_t)elem_cnt == iov_cnt); /* one iovec per element */
-
dlen = udp_vu_sock_recv(iov_vu, &iov_cnt, s, v6);
if (dlen < 0) {
vu_queue_rewind(vq, elem_cnt);
break;
}
- elem_used = iov_cnt; /* one iovec per element */
+ elem_used = 0;
+ for (j = 0, k = 0; k < iov_cnt && j < elem_cnt; j++) {
+ if (k + elem[j].in_num > iov_cnt)
+ elem[j].in_num = iov_cnt - k;
+ k += elem[j].in_num;
+ elem_used++;
+ }
/* release unused buffers */
vu_queue_rewind(vq, elem_cnt - elem_used);
--
2.53.0
^ permalink raw reply [flat|nested] 9+ messages in thread* [PATCH v5 7/8] iov: Introduce IOV_PUSH_HEADER() macro
2026-03-27 17:58 [PATCH v5 0/8] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (5 preceding siblings ...)
2026-03-27 17:58 ` [PATCH v5 6/8] udp_vu: Allow virtqueue elements with multiple iovec entries Laurent Vivier
@ 2026-03-27 17:58 ` Laurent Vivier
2026-03-27 17:58 ` [PATCH v5 8/8] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6() Laurent Vivier
7 siblings, 0 replies; 9+ messages in thread
From: Laurent Vivier @ 2026-03-27 17:58 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Add iov_push_header_() and its typed wrapper IOV_PUSH_HEADER() to write
a header into an iov_tail at the current offset and advance past it.
This is the write counterpart to IOV_PEEK_HEADER() / IOV_REMOVE_HEADER(),
using iov_from_buf() to copy the header data across iovec boundaries.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
iov.c | 23 +++++++++++++++++++++++
iov.h | 11 +++++++++++
2 files changed, 34 insertions(+)
diff --git a/iov.c b/iov.c
index 8134b8c9f988..3b7174db3352 100644
--- a/iov.c
+++ b/iov.c
@@ -308,6 +308,29 @@ void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align)
return v;
}
+/**
+ * iov_push_header_() - Write a new header to an IOV tail
+ * @tail: IOV tail to write header to
+ * @v: Pointer to header data to write
+ * @len: Length of header to write, in bytes
+ *
+ * Return: number of bytes written
+ */
+/* cppcheck-suppress unusedFunction */
+size_t iov_push_header_(struct iov_tail *tail, const void *v, size_t len)
+{
+ size_t l;
+
+ if (!iov_tail_prune(tail))
+ return 0; /* No space */
+
+ l = iov_from_buf(tail->iov, tail->cnt, tail->off, v, len);
+
+ tail->off = tail->off + l;
+
+ return l;
+}
+
/**
* iov_remove_header_() - Remove a header from an IOV tail
* @tail: IOV tail to remove header from (modified)
diff --git a/iov.h b/iov.h
index d295d05b3bab..7f0eba4ed71c 100644
--- a/iov.h
+++ b/iov.h
@@ -90,6 +90,7 @@ bool iov_tail_prune(struct iov_tail *tail);
size_t iov_tail_size(struct iov_tail *tail);
bool iov_drop_header(struct iov_tail *tail, size_t len);
void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align);
+size_t iov_push_header_(struct iov_tail *tail, const void *v, size_t len);
void *iov_remove_header_(struct iov_tail *tail, void *v, size_t len, size_t align);
ssize_t iov_tail_clone(struct iovec *dst_iov, size_t dst_iov_cnt,
struct iov_tail *tail);
@@ -112,6 +113,16 @@ ssize_t iov_tail_clone(struct iovec *dst_iov, size_t dst_iov_cnt,
sizeof(var_), \
__alignof__(var_))))
+/**
+ * IOV_PUSH_HEADER() - Write a new header to an IOV tail
+ * @tail_: IOV tail to write header to
+ * @var_: A variable containing the header data to write
+ *
+ * Return: number of bytes written
+ */
+#define IOV_PUSH_HEADER(tail_, var_) \
+ (iov_push_header_((tail_), &(var_), sizeof(var_)))
+
/**
* IOV_REMOVE_HEADER() - Remove and return typed header from an IOV tail
* @tail_: IOV tail to remove header from (modified)
--
2.53.0
^ permalink raw reply [flat|nested] 9+ messages in thread* [PATCH v5 8/8] udp: Pass iov_tail to udp_update_hdr4()/udp_update_hdr6()
2026-03-27 17:58 [PATCH v5 0/8] vhost-user,udp: Handle multiple iovec entries per virtqueue element Laurent Vivier
` (6 preceding siblings ...)
2026-03-27 17:58 ` [PATCH v5 7/8] iov: Introduce IOV_PUSH_HEADER() macro Laurent Vivier
@ 2026-03-27 17:58 ` Laurent Vivier
7 siblings, 0 replies; 9+ messages in thread
From: Laurent Vivier @ 2026-03-27 17:58 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Change udp_update_hdr4() and udp_update_hdr6() to take an iov_tail
pointing at the UDP frame instead of a contiguous udp_payload_t buffer
and explicit data length. This lets vhost-user pass scatter-gather
virtqueue buffers directly without an intermediate copy.
The UDP header is built into a local struct udphdr and written back with
IOV_PUSH_HEADER(). On the tap side, udp_tap_prepare() wraps the
existing udp_payload_t in a two-element iov to match the new interface.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
iov.c | 1 -
udp.c | 81 +++++++++++++++++++++++++++-----------------------
udp_internal.h | 10 +++----
udp_vu.c | 80 ++++++++++++++++++++++++++-----------------------
4 files changed, 89 insertions(+), 83 deletions(-)
diff --git a/iov.c b/iov.c
index 3b7174db3352..23cc247fd6f2 100644
--- a/iov.c
+++ b/iov.c
@@ -316,7 +316,6 @@ void *iov_peek_header_(struct iov_tail *tail, void *v, size_t len, size_t align)
*
* Return: number of bytes written
*/
-/* cppcheck-suppress unusedFunction */
size_t iov_push_header_(struct iov_tail *tail, const void *v, size_t len)
{
size_t l;
diff --git a/udp.c b/udp.c
index 1fc5a42c5ca7..86b745cecb42 100644
--- a/udp.c
+++ b/udp.c
@@ -255,21 +255,20 @@ static void udp_iov_init(const struct ctx *c)
/**
* udp_update_hdr4() - Update headers for one IPv4 datagram
* @ip4h: Pre-filled IPv4 header (except for tot_len and saddr)
- * @bp: Pointer to udp_payload_t to update
+ * @payload: iov_tail with UDP payload to update
* @toside: Flowside for destination side
- * @dlen: Length of UDP payload
* @no_udp_csum: Do not set UDP checksum
*
* Return: size of IPv4 payload (UDP header + data)
*/
-size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
- const struct flowside *toside, size_t dlen,
- bool no_udp_csum)
+size_t udp_update_hdr4(struct iphdr *ip4h, struct iov_tail *payload,
+ const struct flowside *toside, bool no_udp_csum)
{
const struct in_addr *src = inany_v4(&toside->oaddr);
const struct in_addr *dst = inany_v4(&toside->eaddr);
- size_t l4len = dlen + sizeof(bp->uh);
+ size_t l4len = iov_tail_size(payload);
size_t l3len = l4len + sizeof(*ip4h);
+ struct udphdr uh;
assert(src && dst);
@@ -278,19 +277,18 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
ip4h->saddr = src->s_addr;
ip4h->check = csum_ip4_header(l3len, IPPROTO_UDP, *src, *dst);
- bp->uh.source = htons(toside->oport);
- bp->uh.dest = htons(toside->eport);
- bp->uh.len = htons(l4len);
+ uh.source = htons(toside->oport);
+ uh.dest = htons(toside->eport);
+ uh.len = htons(l4len);
if (no_udp_csum) {
- bp->uh.check = 0;
+ uh.check = 0;
} else {
- const struct iovec iov = {
- .iov_base = bp->data,
- .iov_len = dlen
- };
- struct iov_tail data = IOV_TAIL(&iov, 1, 0);
- csum_udp4(&bp->uh, *src, *dst, &data);
+ struct iov_tail data = *payload;
+
+ IOV_DROP_HEADER(&data, struct udphdr);
+ csum_udp4(&uh, *src, *dst, &data);
}
+ IOV_PUSH_HEADER(payload, uh);
return l4len;
}
@@ -299,18 +297,17 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
* udp_update_hdr6() - Update headers for one IPv6 datagram
* @ip6h: Pre-filled IPv6 header (except for payload_len and
* addresses)
- * @bp: Pointer to udp_payload_t to update
+ * @payload: iov_tail with UDP payload to update
* @toside: Flowside for destination side
- * @dlen: Length of UDP payload
* @no_udp_csum: Do not set UDP checksum
*
* Return: size of IPv6 payload (UDP header + data)
*/
-size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
- const struct flowside *toside, size_t dlen,
- bool no_udp_csum)
+size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct iov_tail *payload,
+ const struct flowside *toside, bool no_udp_csum)
{
- uint16_t l4len = dlen + sizeof(bp->uh);
+ uint16_t l4len = iov_tail_size(payload);
+ struct udphdr uh;
ip6h->payload_len = htons(l4len);
ip6h->daddr = toside->eaddr.a6;
@@ -319,23 +316,24 @@ size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
ip6h->nexthdr = IPPROTO_UDP;
ip6h->hop_limit = 255;
- bp->uh.source = htons(toside->oport);
- bp->uh.dest = htons(toside->eport);
- bp->uh.len = ip6h->payload_len;
+ uh.source = htons(toside->oport);
+ uh.dest = htons(toside->eport);
+ uh.len = htons(l4len);
if (no_udp_csum) {
/* 0 is an invalid checksum for UDP IPv6 and dropped by
- * the kernel stack, even if the checksum is disabled by virtio
- * flags. We need to put any non-zero value here.
+ * the kernel stack, even if the checksum is disabled
+ * by virtio flags. We need to put any non-zero value
+ * here.
*/
- bp->uh.check = 0xffff;
+ uh.check = 0xffff;
} else {
- const struct iovec iov = {
- .iov_base = bp->data,
- .iov_len = dlen
- };
- struct iov_tail data = IOV_TAIL(&iov, 1, 0);
- csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data);
+ struct iov_tail data = *payload;
+
+ IOV_DROP_HEADER(&data, struct udphdr);
+ csum_udp6(&uh, &toside->oaddr.a6, &toside->eaddr.a6,
+ &data);
}
+ IOV_PUSH_HEADER(payload, uh);
return l4len;
}
@@ -374,12 +372,19 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
struct ethhdr *eh = (*tap_iov)[UDP_IOV_ETH].iov_base;
struct udp_payload_t *bp = &udp_payload[idx];
struct udp_meta_t *bm = &udp_meta[idx];
+ struct iovec iov[2];
+ struct iov_tail payload = IOV_TAIL(iov, ARRAY_SIZE(iov), 0);
size_t l4len, l2len;
+ iov[0].iov_base = &bp->uh;
+ iov[0].iov_len = sizeof(bp->uh);
+ iov[1].iov_base = bp->data;
+ iov[1].iov_len = mmh[idx].msg_len;
+
eth_update_mac(eh, NULL, tap_omac);
if (!inany_v4(&toside->eaddr) || !inany_v4(&toside->oaddr)) {
- l4len = udp_update_hdr6(&bm->ip6h, bp, toside,
- mmh[idx].msg_len, no_udp_csum);
+ l4len = udp_update_hdr6(&bm->ip6h, &payload, toside,
+ no_udp_csum);
l2len = MAX(l4len + sizeof(bm->ip6h) + ETH_HLEN, ETH_ZLEN);
tap_hdr_update(&bm->taph, l2len);
@@ -387,8 +392,8 @@ static void udp_tap_prepare(const struct mmsghdr *mmh,
eh->h_proto = htons_constant(ETH_P_IPV6);
(*tap_iov)[UDP_IOV_IP] = IOV_OF_LVALUE(bm->ip6h);
} else {
- l4len = udp_update_hdr4(&bm->ip4h, bp, toside,
- mmh[idx].msg_len, no_udp_csum);
+ l4len = udp_update_hdr4(&bm->ip4h, &payload, toside,
+ no_udp_csum);
l2len = MAX(l4len + sizeof(bm->ip4h) + ETH_HLEN, ETH_ZLEN);
tap_hdr_update(&bm->taph, l2len);
diff --git a/udp_internal.h b/udp_internal.h
index 64e457748324..d235746aa3e5 100644
--- a/udp_internal.h
+++ b/udp_internal.h
@@ -25,12 +25,10 @@ struct udp_payload_t {
} __attribute__ ((packed, aligned(__alignof__(unsigned int))));
#endif
-size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
- const struct flowside *toside, size_t dlen,
- bool no_udp_csum);
-size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
- const struct flowside *toside, size_t dlen,
- bool no_udp_csum);
+size_t udp_update_hdr4(struct iphdr *ip4h, struct iov_tail *payload,
+ const struct flowside *toside, bool no_udp_csum);
+size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct iov_tail *payload,
+ const struct flowside *toside, bool no_udp_csum);
void udp_sock_fwd(const struct ctx *c, int s, int rule_hint,
uint8_t frompif, in_port_t port, const struct timespec *now);
diff --git a/udp_vu.c b/udp_vu.c
index ea536e2ad240..cc09fe0fa511 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -91,43 +91,52 @@ static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6)
/**
* udp_vu_prepare() - Prepare the packet header
* @c: Execution context
- * @iov: IO vector for the frame (including vnet header)
+ * @data: IO vector tail for the frame,
+ * on return, points to the L3 frame
* @toside: Address information for one side of the flow
- * @dlen: Packet data length
*
* Return: Layer-4 length
*/
-static size_t udp_vu_prepare(const struct ctx *c, const struct iovec *iov,
- const struct flowside *toside, ssize_t dlen)
+static size_t udp_vu_prepare(const struct ctx *c, struct iov_tail *data,
+ const struct flowside *toside)
{
- struct ethhdr *eh;
+ bool ipv4 = inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr);
+ struct ethhdr eh;
size_t l4len;
/* ethernet header */
- eh = vu_eth(iov[0].iov_base);
+ memcpy(eh.h_dest, c->guest_mac, sizeof(eh.h_dest));
+ memcpy(eh.h_source, c->our_tap_mac, sizeof(eh.h_source));
- memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
- memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
+ if (ipv4)
+ eh.h_proto = htons(ETH_P_IP);
+ else
+ eh.h_proto = htons(ETH_P_IPV6);
+ IOV_PUSH_HEADER(data, eh);
/* initialize header */
- if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
- struct iphdr *iph = vu_ip(iov[0].iov_base);
- struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base);
+ if (ipv4) {
+ struct iov_tail udp_frame;
+ struct iphdr iph;
- eh->h_proto = htons(ETH_P_IP);
+ iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
- *iph = (struct iphdr)L2_BUF_IP4_INIT(IPPROTO_UDP);
+ udp_frame = *data;
+ IOV_DROP_HEADER(&udp_frame, struct iphdr);
+ l4len = udp_update_hdr4(&iph, &udp_frame, toside, true);
- l4len = udp_update_hdr4(iph, bp, toside, dlen, true);
+ IOV_PUSH_HEADER(data, iph);
} else {
- struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base);
- struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base);
+ struct iov_tail udp_frame;
+ struct ipv6hdr ip6h;
- eh->h_proto = htons(ETH_P_IPV6);
+ ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
- *ip6h = (struct ipv6hdr)L2_BUF_IP6_INIT(IPPROTO_UDP);
+ udp_frame = *data;
+ IOV_DROP_HEADER(&udp_frame, struct ipv6hdr);
+ l4len = udp_update_hdr6(&ip6h, &udp_frame, toside, true);
- l4len = udp_update_hdr6(ip6h, bp, toside, dlen, true);
+ IOV_PUSH_HEADER(data, ip6h);
}
return l4len;
@@ -136,27 +145,21 @@ static size_t udp_vu_prepare(const struct ctx *c, const struct iovec *iov,
/**
* udp_vu_csum() - Calculate and set checksum for a UDP packet
* @toside: Address information for one side of the flow
- * @iov: IO vector for the frame
- * @cnt: Number of IO vector entries
+ * @data: IO vector tail for the L3 frame
*/
-static void udp_vu_csum(const struct flowside *toside, const struct iovec *iov,
- size_t cnt)
+static void udp_vu_csum(const struct flowside *toside, struct iov_tail *data)
{
const struct in_addr *src4 = inany_v4(&toside->oaddr);
const struct in_addr *dst4 = inany_v4(&toside->eaddr);
- char *base = iov[0].iov_base;
- struct udp_payload_t *bp;
- struct iov_tail data;
-
- if (src4 && dst4) {
- bp = vu_payloadv4(base);
- data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
- csum_udp4(&bp->uh, *src4, *dst4, &data);
- } else {
- bp = vu_payloadv6(base);
- data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
- csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data);
- }
+ struct udphdr *uh, uh_storage;
+ bool ipv4 = src4 && dst4;
+
+ uh = IOV_REMOVE_HEADER(data, uh_storage);
+
+ if (ipv4)
+ csum_udp4(uh, *src4, *dst4, data);
+ else
+ csum_udp6(uh, &toside->oaddr.a6, &toside->eaddr.a6, data);
}
/**
@@ -220,9 +223,10 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
vu_queue_rewind(vq, elem_cnt - elem_used);
if (iov_cnt > 0) {
- udp_vu_prepare(c, iov_vu, toside, dlen);
+ struct iov_tail data = IOV_TAIL(iov_vu, iov_cnt, VNET_HLEN);
+ udp_vu_prepare(c, &data, toside);
if (*c->pcap) {
- udp_vu_csum(toside, iov_vu, iov_cnt);
+ udp_vu_csum(toside, &data);
pcap_iov(iov_vu, iov_cnt, VNET_HLEN);
}
vu_flush(vdev, vq, elem, elem_used);
--
2.53.0
^ permalink raw reply [flat|nested] 9+ messages in thread