* [PATCH 01/10] iov: Introduce iov_memset()
2026-04-01 19:18 [PATCH 00/10] vhost-user: Preparatory series for multiple iovec entries per virtqueue element Laurent Vivier
@ 2026-04-01 19:18 ` Laurent Vivier
2026-04-01 19:18 ` [PATCH 02/10] iov: Add iov_memcopy() to copy data between iovec arrays Laurent Vivier
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Laurent Vivier @ 2026-04-01 19:18 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Add a helper to set a range of bytes across an IO vector to a given
value, similar to memset() but operating over scatter-gather buffers.
It skips to the given offset and fills across iovec entries up to the
requested length.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
iov.c | 27 +++++++++++++++++++++++++++
iov.h | 2 ++
2 files changed, 29 insertions(+)
diff --git a/iov.c b/iov.c
index ae0743931d18..0188acdf5eba 100644
--- a/iov.c
+++ b/iov.c
@@ -170,6 +170,33 @@ size_t iov_truncate(struct iovec *iov, size_t iov_cnt, size_t size)
return i;
}
+/**
+ * iov_memset() - Set bytes of an IO vector to a given value
+ * @iov: IO vector
+ * @iov_cnt: Number of elements in @iov
+ * @offset: Byte offset in the iovec at which to start
+ * @c: Byte value to fill with
+ * @length: Number of bytes to set
+ * Will write less than @length bytes if it runs out of space in
+ * the iov
+ */
+/* cppcheck-suppress unusedFunction */
+void iov_memset(const struct iovec *iov, size_t iov_cnt, size_t offset, int c,
+ size_t length)
+{
+ size_t i;
+
+ i = iov_skip_bytes(iov, iov_cnt, offset, &offset);
+
+ for ( ; i < iov_cnt && length; i++) {
+ size_t n = MIN(iov[i].iov_len - offset, length);
+
+ memset((char *)iov[i].iov_base + offset, c, n);
+ offset = 0;
+ length -= n;
+ }
+}
+
/**
* iov_tail_prune() - Remove any unneeded buffers from an IOV tail
* @tail: IO vector tail (modified)
diff --git a/iov.h b/iov.h
index b4e50b0fca5a..d295d05b3bab 100644
--- a/iov.h
+++ b/iov.h
@@ -30,6 +30,8 @@ size_t iov_to_buf(const struct iovec *iov, size_t iov_cnt,
size_t offset, void *buf, size_t bytes);
size_t iov_size(const struct iovec *iov, size_t iov_cnt);
size_t iov_truncate(struct iovec *iov, size_t iov_cnt, size_t size);
+void iov_memset(const struct iovec *iov, size_t iov_cnt, size_t offset, int c,
+ size_t length);
/*
* DOC: Theory of Operation, struct iov_tail
--
2.53.0
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 02/10] iov: Add iov_memcopy() to copy data between iovec arrays
2026-04-01 19:18 [PATCH 00/10] vhost-user: Preparatory series for multiple iovec entries per virtqueue element Laurent Vivier
2026-04-01 19:18 ` [PATCH 01/10] iov: Introduce iov_memset() Laurent Vivier
@ 2026-04-01 19:18 ` Laurent Vivier
2026-04-01 19:18 ` [PATCH 03/10] vu_common: Move vnethdr setup into vu_flush() Laurent Vivier
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Laurent Vivier @ 2026-04-01 19:18 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Add a helper to copy data from a source iovec array to a destination
iovec array, each starting at an arbitrary byte offset, iterating
through both arrays simultaneously and copying in chunks matching the
smaller of the two current segments.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
iov.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
iov.h | 3 +++
2 files changed, 55 insertions(+)
diff --git a/iov.c b/iov.c
index 0188acdf5eba..83b683f3976a 100644
--- a/iov.c
+++ b/iov.c
@@ -197,6 +197,58 @@ void iov_memset(const struct iovec *iov, size_t iov_cnt, size_t offset, int c,
}
}
+/**
+ * iov_memcopy() - Copy data between two iovec arrays
+ * @dst_iov: Destination iovec array
+ * @dst_iov_cnt: Number of elements in destination iovec array
+ * @dst_offs: Destination offset
+ * @iov: Source iovec array
+ * @iov_cnt: Number of elements in source iovec array
+ * @offs: Source offset
+ * @length: Number of bytes to copy
+ *
+ * Return: total number of bytes copied
+ */
+/* cppcheck-suppress unusedFunction */
+size_t iov_memcopy(struct iovec *dst_iov, size_t dst_iov_cnt, size_t dst_offs,
+ const struct iovec *iov, size_t iov_cnt, size_t offs,
+ size_t length)
+{
+ unsigned int i, j;
+ size_t total = 0;
+
+ i = iov_skip_bytes(iov, iov_cnt, offs, &offs);
+ j = iov_skip_bytes(dst_iov, dst_iov_cnt, dst_offs, &dst_offs);
+
+ /* copying data */
+ while (length && i < iov_cnt && j < dst_iov_cnt) {
+ size_t n = MIN(dst_iov[j].iov_len - dst_offs,
+ iov[i].iov_len - offs);
+
+ if (n > length)
+ n = length;
+
+ memcpy((char *)dst_iov[j].iov_base + dst_offs,
+ (const char *)iov[i].iov_base + offs, n);
+
+ dst_offs += n;
+ offs += n;
+ total += n;
+ length -= n;
+
+ if (dst_offs == dst_iov[j].iov_len) {
+ dst_offs = 0;
+ j++;
+ }
+ if (offs == iov[i].iov_len) {
+ offs = 0;
+ i++;
+ }
+ }
+
+ return total;
+}
+
/**
* iov_tail_prune() - Remove any unneeded buffers from an IOV tail
* @tail: IO vector tail (modified)
diff --git a/iov.h b/iov.h
index d295d05b3bab..074266e127ef 100644
--- a/iov.h
+++ b/iov.h
@@ -32,6 +32,9 @@ size_t iov_size(const struct iovec *iov, size_t iov_cnt);
size_t iov_truncate(struct iovec *iov, size_t iov_cnt, size_t size);
void iov_memset(const struct iovec *iov, size_t iov_cnt, size_t offset, int c,
size_t length);
+size_t iov_memcopy(struct iovec *dst_iov, size_t dst_iov_cnt, size_t dst_offs,
+ const struct iovec *iov, size_t iov_cnt, size_t offs,
+ size_t length);
/*
* DOC: Theory of Operation, struct iov_tail
--
2.53.0
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 03/10] vu_common: Move vnethdr setup into vu_flush()
2026-04-01 19:18 [PATCH 00/10] vhost-user: Preparatory series for multiple iovec entries per virtqueue element Laurent Vivier
2026-04-01 19:18 ` [PATCH 01/10] iov: Introduce iov_memset() Laurent Vivier
2026-04-01 19:18 ` [PATCH 02/10] iov: Add iov_memcopy() to copy data between iovec arrays Laurent Vivier
@ 2026-04-01 19:18 ` Laurent Vivier
2026-04-01 19:18 ` [PATCH 04/10] udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller Laurent Vivier
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Laurent Vivier @ 2026-04-01 19:18 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Every caller of vu_flush() was calling vu_set_vnethdr() beforehand with
the same pattern. Move it into vu_flush().
Remove vu_queue_notify() from vu_flush() and let callers invoke it
explicitly. This allows paths that perform multiple flushes, such as
tcp_vu_send_flag() and tcp_vu_data_from_sock(), to issue a single guest
notification at the end.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
tcp_vu.c | 19 ++++++++-----------
udp_vu.c | 3 +--
vu_common.c | 9 +++++----
vu_common.h | 1 -
4 files changed, 14 insertions(+), 18 deletions(-)
diff --git a/tcp_vu.c b/tcp_vu.c
index dc0e17c0f03f..0cd01190d612 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -82,7 +82,6 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
struct ethhdr *eh;
uint32_t seq;
int elem_cnt;
- int nb_ack;
int ret;
hdrlen = tcp_vu_hdrlen(CONN_V6(conn));
@@ -97,8 +96,6 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
assert(flags_elem[0].in_sg[0].iov_len >=
MAX(hdrlen + sizeof(*opts), ETH_ZLEN + VNET_HLEN));
- vu_set_vnethdr(flags_elem[0].in_sg[0].iov_base, 1);
-
eh = vu_eth(flags_elem[0].in_sg[0].iov_base);
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
@@ -143,9 +140,10 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
l2len = optlen + hdrlen - VNET_HLEN;
vu_pad(&flags_elem[0].in_sg[0], l2len);
+ vu_flush(vdev, vq, flags_elem, 1);
+
if (*c->pcap)
pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN);
- nb_ack = 1;
if (flags & DUP_ACK) {
elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1,
@@ -157,14 +155,14 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
memcpy(flags_elem[1].in_sg[0].iov_base,
flags_elem[0].in_sg[0].iov_base,
flags_elem[0].in_sg[0].iov_len);
- nb_ack++;
+
+ vu_flush(vdev, vq, &flags_elem[1], 1);
if (*c->pcap)
pcap_iov(&flags_elem[1].in_sg[0], 1, VNET_HLEN);
}
}
-
- vu_flush(vdev, vq, flags_elem, nb_ack);
+ vu_queue_notify(vdev, vq);
return 0;
}
@@ -451,7 +449,6 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
assert(frame_size >= hdrlen);
dlen = frame_size - hdrlen;
- vu_set_vnethdr(iov->iov_base, buf_cnt);
/* The IPv4 header checksum varies only with dlen */
if (previous_dlen != dlen)
@@ -464,14 +461,14 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
l2len = dlen + hdrlen - VNET_HLEN;
vu_pad(iov, l2len);
+ vu_flush(vdev, vq, &elem[head[i]], buf_cnt);
+
if (*c->pcap)
pcap_iov(iov, buf_cnt, VNET_HLEN);
conn->seq_to_tap += dlen;
}
-
- /* send packets */
- vu_flush(vdev, vq, elem, iov_cnt);
+ vu_queue_notify(vdev, vq);
conn_flag(c, conn, ACK_FROM_TAP_DUE);
diff --git a/udp_vu.c b/udp_vu.c
index cc69654398f0..f8629af58ab5 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -124,8 +124,6 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
l2len = *dlen + hdrlen - VNET_HLEN;
vu_pad(&iov_vu[0], l2len);
- vu_set_vnethdr(iov_vu[0].iov_base, elem_used);
-
/* release unused buffers */
vu_queue_rewind(vq, elem_cnt - elem_used);
@@ -230,6 +228,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
pcap_iov(iov_vu, iov_used, VNET_HLEN);
}
vu_flush(vdev, vq, elem, iov_used);
+ vu_queue_notify(vdev, vq);
}
}
}
diff --git a/vu_common.c b/vu_common.c
index 13b1e51001d4..57949ca32309 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -118,7 +118,8 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
* @vnethdr: Address of the header to set
* @num_buffers: Number of guest buffers of the frame
*/
-void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr, int num_buffers)
+static void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr,
+ int num_buffers)
{
vnethdr->hdr = VU_HEADER;
/* Note: if VIRTIO_NET_F_MRG_RXBUF is not negotiated,
@@ -139,6 +140,8 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
{
int i;
+ vu_set_vnethdr(elem[0].in_sg[0].iov_base, elem_cnt);
+
for (i = 0; i < elem_cnt; i++) {
size_t elem_size = iov_size(elem[i].in_sg, elem[i].in_num);
@@ -146,7 +149,6 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
}
vu_queue_flush(vdev, vq, elem_cnt);
- vu_queue_notify(vdev, vq);
}
/**
@@ -260,8 +262,6 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
goto err;
}
- vu_set_vnethdr(in_sg[0].iov_base, elem_cnt);
-
total -= VNET_HLEN;
/* copy data from the buffer to the iovec */
@@ -271,6 +271,7 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
pcap_iov(in_sg, in_total, VNET_HLEN);
vu_flush(vdev, vq, elem, elem_cnt);
+ vu_queue_notify(vdev, vq);
trace("vhost-user sent %zu", total);
diff --git a/vu_common.h b/vu_common.h
index 7b060eb6184f..4037ab765b7d 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -39,7 +39,6 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int max_elem,
struct iovec *in_sg, size_t max_in_sg, size_t *in_total,
size_t size, size_t *collected);
-void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr, int num_buffers);
void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
struct vu_virtq_element *elem, int elem_cnt);
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
--
2.53.0
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 04/10] udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller
2026-04-01 19:18 [PATCH 00/10] vhost-user: Preparatory series for multiple iovec entries per virtqueue element Laurent Vivier
` (2 preceding siblings ...)
2026-04-01 19:18 ` [PATCH 03/10] vu_common: Move vnethdr setup into vu_flush() Laurent Vivier
@ 2026-04-01 19:18 ` Laurent Vivier
2026-04-01 19:18 ` [PATCH 05/10] udp_vu: Pass iov explicitly to helpers instead of using file-scoped array Laurent Vivier
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Laurent Vivier @ 2026-04-01 19:18 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
udp_vu_sock_recv() currently mixes two concerns: receiving data from the
socket and managing virtqueue buffers (collecting, rewinding, releasing).
This makes the function harder to reason about and couples socket I/O
with virtqueue state.
Move all virtqueue operations, vu_collect(), vu_init_elem(),
vu_queue_rewind(), vu_set_vnethdr(), and the queue-readiness check, into
udp_vu_sock_to_tap(), which is the only caller. This turns
udp_vu_sock_recv() into a pure socket receive function that simply reads
into the provided iov array and adjusts its length.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
udp_vu.c | 97 ++++++++++++++++++++++++++++----------------------------
1 file changed, 49 insertions(+), 48 deletions(-)
diff --git a/udp_vu.c b/udp_vu.c
index f8629af58ab5..34f39e1256f8 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -58,46 +58,22 @@ static size_t udp_vu_hdrlen(bool v6)
/**
* udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers
- * @c: Execution context
- * @vq: virtqueue to use to receive data
* @s: Socket to receive from
* @v6: Set for IPv6 connections
- * @dlen: Size of received data (output)
+ * @iov_cnt: Number of collected iov in iov_vu (input)
+ * Number of iov entries used to store the datagram (output)
*
- * Return: number of iov entries used to store the datagram, 0 if the datagram
- * was discarded because the virtqueue is not ready, -1 on error
+ * Return: size of received data, -1 on error
*/
-static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
- bool v6, ssize_t *dlen)
+static ssize_t udp_vu_sock_recv(int s, bool v6, size_t *iov_cnt)
{
- const struct vu_dev *vdev = c->vdev;
- int elem_cnt, elem_used, iov_used;
struct msghdr msg = { 0 };
size_t hdrlen, l2len;
- size_t iov_cnt;
-
- assert(!c->no_udp);
-
- if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
- debug("Got UDP packet, but RX virtqueue not usable yet");
-
- if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)
- debug_perror("Failed to discard datagram");
-
- return 0;
- }
+ ssize_t dlen;
/* compute L2 header length */
hdrlen = udp_vu_hdrlen(v6);
- elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem),
- iov_vu, ARRAY_SIZE(iov_vu), &iov_cnt,
- IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL);
- if (elem_cnt == 0)
- return -1;
-
- assert((size_t)elem_cnt == iov_cnt); /* one iovec per element */
-
/* reserve space for the headers */
assert(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN));
iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen;
@@ -105,29 +81,23 @@ static int udp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, int s,
/* read data from the socket */
msg.msg_iov = iov_vu;
- msg.msg_iovlen = iov_cnt;
+ msg.msg_iovlen = *iov_cnt;
- *dlen = recvmsg(s, &msg, 0);
- if (*dlen < 0) {
- vu_queue_rewind(vq, elem_cnt);
+ dlen = recvmsg(s, &msg, 0);
+ if (dlen < 0)
return -1;
- }
/* restore the pointer to the headers address */
iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen;
iov_vu[0].iov_len += hdrlen;
- iov_used = iov_truncate(iov_vu, iov_cnt, *dlen + hdrlen);
- elem_used = iov_used; /* one iovec per element */
+ *iov_cnt = iov_truncate(iov_vu, *iov_cnt, dlen + hdrlen);
/* pad frame to 60 bytes: first buffer is at least ETH_ZLEN long */
- l2len = *dlen + hdrlen - VNET_HLEN;
+ l2len = dlen + hdrlen - VNET_HLEN;
vu_pad(&iov_vu[0], l2len);
- /* release unused buffers */
- vu_queue_rewind(vq, elem_cnt - elem_used);
-
- return iov_used;
+ return dlen;
}
/**
@@ -213,21 +183,52 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
int i;
+ assert(!c->no_udp);
+
+ if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
+ struct msghdr msg = { 0 };
+
+ debug("Got UDP packet, but RX virtqueue not usable yet");
+
+ for (i = 0; i < n; i++) {
+ if (recvmsg(s, &msg, MSG_DONTWAIT) < 0)
+ debug_perror("Failed to discard datagram");
+ }
+
+ return;
+ }
+
for (i = 0; i < n; i++) {
+ unsigned elem_cnt, elem_used;
+ size_t iov_cnt;
ssize_t dlen;
- int iov_used;
- iov_used = udp_vu_sock_recv(c, vq, s, v6, &dlen);
- if (iov_used < 0)
+ elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem),
+ iov_vu, ARRAY_SIZE(iov_vu), &iov_cnt,
+ IP_MAX_MTU + ETH_HLEN + VNET_HLEN, NULL);
+ if (elem_cnt == 0)
+ break;
+
+ assert((size_t)elem_cnt == iov_cnt); /* one iovec per element */
+
+ dlen = udp_vu_sock_recv(s, v6, &iov_cnt);
+ if (dlen < 0) {
+ vu_queue_rewind(vq, iov_cnt);
break;
+ }
+
+ elem_used = iov_cnt; /* one iovec per element */
+
+ /* release unused buffers */
+ vu_queue_rewind(vq, elem_cnt - elem_used);
- if (iov_used > 0) {
+ if (iov_cnt > 0) {
udp_vu_prepare(c, toside, dlen);
if (*c->pcap) {
- udp_vu_csum(toside, iov_used);
- pcap_iov(iov_vu, iov_used, VNET_HLEN);
+ udp_vu_csum(toside, iov_cnt);
+ pcap_iov(iov_vu, iov_cnt, VNET_HLEN);
}
- vu_flush(vdev, vq, elem, iov_used);
+ vu_flush(vdev, vq, elem, iov_cnt);
vu_queue_notify(vdev, vq);
}
}
--
2.53.0
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 05/10] udp_vu: Pass iov explicitly to helpers instead of using file-scoped array
2026-04-01 19:18 [PATCH 00/10] vhost-user: Preparatory series for multiple iovec entries per virtqueue element Laurent Vivier
` (3 preceding siblings ...)
2026-04-01 19:18 ` [PATCH 04/10] udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller Laurent Vivier
@ 2026-04-01 19:18 ` Laurent Vivier
2026-04-01 19:18 ` [PATCH 06/10] checksum: Pass explicit L4 length to checksum functions Laurent Vivier
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Laurent Vivier @ 2026-04-01 19:18 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
udp_vu_sock_recv(), udp_vu_prepare(), and udp_vu_csum() all operated on
the file-scoped iov_vu[] array directly. Pass iov and count as explicit
parameters instead, and move iov_vu[] and elem[] to function-local
statics in udp_vu_sock_to_tap(), the only function that needs them.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
udp_vu.c | 64 +++++++++++++++++++++++++++++---------------------------
1 file changed, 33 insertions(+), 31 deletions(-)
diff --git a/udp_vu.c b/udp_vu.c
index 34f39e1256f8..9688fe1fdc5c 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -33,9 +33,6 @@
#include "udp_vu.h"
#include "vu_common.h"
-static struct iovec iov_vu [VIRTQUEUE_MAX_SIZE];
-static struct vu_virtq_element elem [VIRTQUEUE_MAX_SIZE];
-
/**
* udp_vu_hdrlen() - Sum size of all headers, from UDP to virtio-net
* @v6: Set for IPv6 packet
@@ -58,14 +55,14 @@ static size_t udp_vu_hdrlen(bool v6)
/**
* udp_vu_sock_recv() - Receive datagrams from socket into vhost-user buffers
+ * @iov: IO vector for the frame (in/out)
+ * @cnt: Number of IO vector entries (in/out)
* @s: Socket to receive from
* @v6: Set for IPv6 connections
- * @iov_cnt: Number of collected iov in iov_vu (input)
- * Number of iov entries used to store the datagram (output)
*
* Return: size of received data, -1 on error
*/
-static ssize_t udp_vu_sock_recv(int s, bool v6, size_t *iov_cnt)
+static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6)
{
struct msghdr msg = { 0 };
size_t hdrlen, l2len;
@@ -75,27 +72,27 @@ static ssize_t udp_vu_sock_recv(int s, bool v6, size_t *iov_cnt)
hdrlen = udp_vu_hdrlen(v6);
/* reserve space for the headers */
- assert(iov_vu[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN));
- iov_vu[0].iov_base = (char *)iov_vu[0].iov_base + hdrlen;
- iov_vu[0].iov_len -= hdrlen;
+ assert(iov[0].iov_len >= MAX(hdrlen, ETH_ZLEN + VNET_HLEN));
+ iov[0].iov_base = (char *)iov[0].iov_base + hdrlen;
+ iov[0].iov_len -= hdrlen;
/* read data from the socket */
- msg.msg_iov = iov_vu;
- msg.msg_iovlen = *iov_cnt;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = *cnt;
dlen = recvmsg(s, &msg, 0);
if (dlen < 0)
return -1;
/* restore the pointer to the headers address */
- iov_vu[0].iov_base = (char *)iov_vu[0].iov_base - hdrlen;
- iov_vu[0].iov_len += hdrlen;
+ iov[0].iov_base = (char *)iov[0].iov_base - hdrlen;
+ iov[0].iov_len += hdrlen;
- *iov_cnt = iov_truncate(iov_vu, *iov_cnt, dlen + hdrlen);
+ *cnt = iov_truncate(iov, *cnt, dlen + hdrlen);
/* pad frame to 60 bytes: first buffer is at least ETH_ZLEN long */
l2len = dlen + hdrlen - VNET_HLEN;
- vu_pad(&iov_vu[0], l2len);
+ vu_pad(&iov[0], l2len);
return dlen;
}
@@ -103,27 +100,28 @@ static ssize_t udp_vu_sock_recv(int s, bool v6, size_t *iov_cnt)
/**
* udp_vu_prepare() - Prepare the packet header
* @c: Execution context
+ * @iov: IO vector for the frame (including vnet header)
* @toside: Address information for one side of the flow
* @dlen: Packet data length
*
* Return: Layer-4 length
*/
-static size_t udp_vu_prepare(const struct ctx *c,
+static size_t udp_vu_prepare(const struct ctx *c, const struct iovec *iov,
const struct flowside *toside, ssize_t dlen)
{
struct ethhdr *eh;
size_t l4len;
/* ethernet header */
- eh = vu_eth(iov_vu[0].iov_base);
+ eh = vu_eth(iov[0].iov_base);
memcpy(eh->h_dest, c->guest_mac, sizeof(eh->h_dest));
memcpy(eh->h_source, c->our_tap_mac, sizeof(eh->h_source));
/* initialize header */
if (inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr)) {
- struct iphdr *iph = vu_ip(iov_vu[0].iov_base);
- struct udp_payload_t *bp = vu_payloadv4(iov_vu[0].iov_base);
+ struct iphdr *iph = vu_ip(iov[0].iov_base);
+ struct udp_payload_t *bp = vu_payloadv4(iov[0].iov_base);
eh->h_proto = htons(ETH_P_IP);
@@ -131,8 +129,8 @@ static size_t udp_vu_prepare(const struct ctx *c,
l4len = udp_update_hdr4(iph, bp, toside, dlen, true);
} else {
- struct ipv6hdr *ip6h = vu_ip(iov_vu[0].iov_base);
- struct udp_payload_t *bp = vu_payloadv6(iov_vu[0].iov_base);
+ struct ipv6hdr *ip6h = vu_ip(iov[0].iov_base);
+ struct udp_payload_t *bp = vu_payloadv6(iov[0].iov_base);
eh->h_proto = htons(ETH_P_IPV6);
@@ -147,23 +145,25 @@ static size_t udp_vu_prepare(const struct ctx *c,
/**
* udp_vu_csum() - Calculate and set checksum for a UDP packet
* @toside: Address information for one side of the flow
- * @iov_used: Number of used iov_vu items
+ * @iov: IO vector for the frame
+ * @cnt: Number of IO vector entries
*/
-static void udp_vu_csum(const struct flowside *toside, int iov_used)
+static void udp_vu_csum(const struct flowside *toside, const struct iovec *iov,
+ size_t cnt)
{
const struct in_addr *src4 = inany_v4(&toside->oaddr);
const struct in_addr *dst4 = inany_v4(&toside->eaddr);
- char *base = iov_vu[0].iov_base;
+ char *base = iov[0].iov_base;
struct udp_payload_t *bp;
struct iov_tail data;
if (src4 && dst4) {
bp = vu_payloadv4(base);
- data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base);
+ data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
csum_udp4(&bp->uh, *src4, *dst4, &data);
} else {
bp = vu_payloadv6(base);
- data = IOV_TAIL(iov_vu, iov_used, (char *)&bp->data - base);
+ data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data);
}
}
@@ -178,7 +178,9 @@ static void udp_vu_csum(const struct flowside *toside, int iov_used)
void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
{
const struct flowside *toside = flowside_at_sidx(tosidx);
+ static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE];
bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
+ static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE];
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
int i;
@@ -211,9 +213,9 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
assert((size_t)elem_cnt == iov_cnt); /* one iovec per element */
- dlen = udp_vu_sock_recv(s, v6, &iov_cnt);
+ dlen = udp_vu_sock_recv(iov_vu, &iov_cnt, s, v6);
if (dlen < 0) {
- vu_queue_rewind(vq, iov_cnt);
+ vu_queue_rewind(vq, elem_cnt);
break;
}
@@ -223,12 +225,12 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
vu_queue_rewind(vq, elem_cnt - elem_used);
if (iov_cnt > 0) {
- udp_vu_prepare(c, toside, dlen);
+ udp_vu_prepare(c, iov_vu, toside, dlen);
if (*c->pcap) {
- udp_vu_csum(toside, iov_cnt);
+ udp_vu_csum(toside, iov_vu, iov_cnt);
pcap_iov(iov_vu, iov_cnt, VNET_HLEN);
}
- vu_flush(vdev, vq, elem, iov_cnt);
+ vu_flush(vdev, vq, elem, elem_used);
vu_queue_notify(vdev, vq);
}
}
--
2.53.0
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 06/10] checksum: Pass explicit L4 length to checksum functions
2026-04-01 19:18 [PATCH 00/10] vhost-user: Preparatory series for multiple iovec entries per virtqueue element Laurent Vivier
` (4 preceding siblings ...)
2026-04-01 19:18 ` [PATCH 05/10] udp_vu: Pass iov explicitly to helpers instead of using file-scoped array Laurent Vivier
@ 2026-04-01 19:18 ` Laurent Vivier
2026-04-01 19:18 ` [PATCH 07/10] pcap: Pass explicit L2 length to pcap_iov() Laurent Vivier
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Laurent Vivier @ 2026-04-01 19:18 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
The iov_tail passed to csum_iov_tail() may contain padding or trailing
data beyond the actual L4 payload. Rather than relying on
iov_tail_size() to determine how many bytes to checksum, pass the
length explicitly so that only the relevant payload bytes are included
in the checksum computation.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
checksum.c | 35 +++++++++++++++++++++--------------
checksum.h | 6 +++---
tap.c | 4 ++--
tcp.c | 9 +++++----
udp.c | 5 +++--
udp_vu.c | 12 +++++++-----
6 files changed, 41 insertions(+), 30 deletions(-)
diff --git a/checksum.c b/checksum.c
index 828f9ecc9c02..a8cf80ba7470 100644
--- a/checksum.c
+++ b/checksum.c
@@ -182,21 +182,22 @@ static uint16_t csum(const void *buf, size_t len, uint32_t init)
* @saddr: IPv4 source address
* @daddr: IPv4 destination address
* @data: UDP payload (as IO vector tail)
+ * @l4len: UDP packet length including header
*/
void csum_udp4(struct udphdr *udp4hr,
struct in_addr saddr, struct in_addr daddr,
- struct iov_tail *data)
+ struct iov_tail *data, size_t l4len)
{
/* UDP checksums are optional, so don't bother */
udp4hr->check = 0;
if (UDP4_REAL_CHECKSUMS) {
- uint16_t l4len = iov_tail_size(data) + sizeof(struct udphdr);
uint32_t psum = proto_ipv4_header_psum(l4len, IPPROTO_UDP,
saddr, daddr);
- psum = csum_unfolded(udp4hr, sizeof(struct udphdr), psum);
- udp4hr->check = csum_iov_tail(data, psum);
+ psum = csum_unfolded(udp4hr, sizeof(*udp4hr), psum);
+ udp4hr->check = csum_iov_tail(data, psum,
+ l4len - sizeof(*udp4hr));
}
}
@@ -245,19 +246,19 @@ uint32_t proto_ipv6_header_psum(uint16_t payload_len, uint8_t protocol,
* @saddr: Source address
* @daddr: Destination address
* @data: UDP payload (as IO vector tail)
+ * @l4len: UDP packet length including header
*/
void csum_udp6(struct udphdr *udp6hr,
const struct in6_addr *saddr, const struct in6_addr *daddr,
- struct iov_tail *data)
+ struct iov_tail *data, size_t l4len)
{
- uint16_t l4len = iov_tail_size(data) + sizeof(struct udphdr);
uint32_t psum = proto_ipv6_header_psum(l4len, IPPROTO_UDP,
saddr, daddr);
udp6hr->check = 0;
- psum = csum_unfolded(udp6hr, sizeof(struct udphdr), psum);
- udp6hr->check = csum_iov_tail(data, psum);
+ psum = csum_unfolded(udp6hr, sizeof(*udp6hr), psum);
+ udp6hr->check = csum_iov_tail(data, psum, l4len - sizeof(*udp6hr));
}
/**
@@ -604,20 +605,26 @@ uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init)
/**
* csum_iov_tail() - Calculate unfolded checksum for the tail of an IO vector
* @tail: IO vector tail to checksum
- * @init Initial 32-bit checksum, 0 for no pre-computed checksum
+ * @init: Initial 32-bit checksum, 0 for no pre-computed checksum
+ * @len: Number of bytes to checksum from @tail
*
* Return: 16-bit folded, complemented checksum
*/
-uint16_t csum_iov_tail(struct iov_tail *tail, uint32_t init)
+uint16_t csum_iov_tail(struct iov_tail *tail, uint32_t init, size_t len)
{
if (iov_tail_prune(tail)) {
- size_t i;
+ size_t i, n;
+ n = MIN(len, tail->iov[0].iov_len - tail->off);
init = csum_unfolded((char *)tail->iov[0].iov_base + tail->off,
- tail->iov[0].iov_len - tail->off, init);
- for (i = 1; i < tail->cnt; i++) {
+ n, init);
+ len -= n;
+
+ for (i = 1; len && i < tail->cnt; i++) {
const struct iovec *iov = &tail->iov[i];
- init = csum_unfolded(iov->iov_base, iov->iov_len, init);
+ n = MIN(len, iov->iov_len);
+ init = csum_unfolded(iov->iov_base, n, init);
+ len -= n;
}
}
return (uint16_t)~csum_fold(init);
diff --git a/checksum.h b/checksum.h
index 4e3b098db072..65834bf9eaaf 100644
--- a/checksum.h
+++ b/checksum.h
@@ -21,18 +21,18 @@ uint32_t proto_ipv4_header_psum(uint16_t l4len, uint8_t protocol,
struct in_addr saddr, struct in_addr daddr);
void csum_udp4(struct udphdr *udp4hr,
struct in_addr saddr, struct in_addr daddr,
- struct iov_tail *data);
+ struct iov_tail *data, size_t l4len);
void csum_icmp4(struct icmphdr *icmp4hr, const void *payload, size_t dlen);
uint32_t proto_ipv6_header_psum(uint16_t payload_len, uint8_t protocol,
const struct in6_addr *saddr,
const struct in6_addr *daddr);
void csum_udp6(struct udphdr *udp6hr,
const struct in6_addr *saddr, const struct in6_addr *daddr,
- struct iov_tail *data);
+ struct iov_tail *data, size_t l4len);
void csum_icmp6(struct icmp6hdr *icmp6hr,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const void *payload, size_t dlen);
uint32_t csum_unfolded(const void *buf, size_t len, uint32_t init);
-uint16_t csum_iov_tail(struct iov_tail *tail, uint32_t init);
+uint16_t csum_iov_tail(struct iov_tail *tail, uint32_t init, size_t len);
#endif /* CHECKSUM_H */
diff --git a/tap.c b/tap.c
index 1049e023bcd2..b61199dd699d 100644
--- a/tap.c
+++ b/tap.c
@@ -252,7 +252,7 @@ void *tap_push_uh4(struct udphdr *uh, struct in_addr src, in_port_t sport,
uh->source = htons(sport);
uh->dest = htons(dport);
uh->len = htons(l4len);
- csum_udp4(uh, src, dst, &payload);
+ csum_udp4(uh, src, dst, &payload, l4len);
return (char *)uh + sizeof(*uh);
}
@@ -357,7 +357,7 @@ void *tap_push_uh6(struct udphdr *uh,
uh->source = htons(sport);
uh->dest = htons(dport);
uh->len = htons(l4len);
- csum_udp6(uh, src, dst, &payload);
+ csum_udp6(uh, src, dst, &payload, l4len);
return (char *)uh + sizeof(*uh);
}
diff --git a/tcp.c b/tcp.c
index 8ea9be84a9f3..49c6fb57ce16 100644
--- a/tcp.c
+++ b/tcp.c
@@ -815,13 +815,14 @@ static void tcp_sock_set_nodelay(int s)
* @psum: Unfolded partial checksum of the IPv4 or IPv6 pseudo-header
* @th: TCP header (updated)
* @payload: TCP payload
+ * @l4len: TCP packet length, including TCP header
*/
static void tcp_update_csum(uint32_t psum, struct tcphdr *th,
- struct iov_tail *payload)
+ struct iov_tail *payload, size_t l4len)
{
th->check = 0;
psum = csum_unfolded(th, sizeof(*th), psum);
- th->check = csum_iov_tail(payload, psum);
+ th->check = csum_iov_tail(payload, psum, l4len - sizeof(*th));
}
/**
@@ -1019,7 +1020,7 @@ size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
if (no_tcp_csum)
th->check = 0;
else
- tcp_update_csum(psum, th, payload);
+ tcp_update_csum(psum, th, payload, l4len);
return MAX(l3len + sizeof(struct ethhdr), ETH_ZLEN);
}
@@ -2196,7 +2197,7 @@ static void tcp_rst_no_conn(const struct ctx *c, int af,
rsth->ack = 1;
}
- tcp_update_csum(psum, rsth, &payload);
+ tcp_update_csum(psum, rsth, &payload, sizeof(*rsth));
rst_l2len = ((char *)rsth - buf) + sizeof(*rsth);
tap_send_single(c, buf, rst_l2len);
}
diff --git a/udp.c b/udp.c
index 1fc5a42c5ca7..e113b26bc726 100644
--- a/udp.c
+++ b/udp.c
@@ -289,7 +289,7 @@ size_t udp_update_hdr4(struct iphdr *ip4h, struct udp_payload_t *bp,
.iov_len = dlen
};
struct iov_tail data = IOV_TAIL(&iov, 1, 0);
- csum_udp4(&bp->uh, *src, *dst, &data);
+ csum_udp4(&bp->uh, *src, *dst, &data, l4len);
}
return l4len;
@@ -334,7 +334,8 @@ size_t udp_update_hdr6(struct ipv6hdr *ip6h, struct udp_payload_t *bp,
.iov_len = dlen
};
struct iov_tail data = IOV_TAIL(&iov, 1, 0);
- csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data);
+ csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data,
+ l4len);
}
return l4len;
diff --git a/udp_vu.c b/udp_vu.c
index 9688fe1fdc5c..5421a7d71a19 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -147,9 +147,10 @@ static size_t udp_vu_prepare(const struct ctx *c, const struct iovec *iov,
* @toside: Address information for one side of the flow
* @iov: IO vector for the frame
* @cnt: Number of IO vector entries
+ * @l4len: L4 length
*/
static void udp_vu_csum(const struct flowside *toside, const struct iovec *iov,
- size_t cnt)
+ size_t cnt, size_t l4len)
{
const struct in_addr *src4 = inany_v4(&toside->oaddr);
const struct in_addr *dst4 = inany_v4(&toside->eaddr);
@@ -160,11 +161,12 @@ static void udp_vu_csum(const struct flowside *toside, const struct iovec *iov,
if (src4 && dst4) {
bp = vu_payloadv4(base);
data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
- csum_udp4(&bp->uh, *src4, *dst4, &data);
+ csum_udp4(&bp->uh, *src4, *dst4, &data, l4len);
} else {
bp = vu_payloadv6(base);
data = IOV_TAIL(iov, cnt, (char *)&bp->data - base);
- csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data);
+ csum_udp6(&bp->uh, &toside->oaddr.a6, &toside->eaddr.a6, &data,
+ l4len);
}
}
@@ -225,9 +227,9 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
vu_queue_rewind(vq, elem_cnt - elem_used);
if (iov_cnt > 0) {
- udp_vu_prepare(c, iov_vu, toside, dlen);
+ size_t l4len = udp_vu_prepare(c, iov_vu, toside, dlen);
if (*c->pcap) {
- udp_vu_csum(toside, iov_vu, iov_cnt);
+ udp_vu_csum(toside, iov_vu, iov_cnt, l4len);
pcap_iov(iov_vu, iov_cnt, VNET_HLEN);
}
vu_flush(vdev, vq, elem, elem_used);
--
2.53.0
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 07/10] pcap: Pass explicit L2 length to pcap_iov()
2026-04-01 19:18 [PATCH 00/10] vhost-user: Preparatory series for multiple iovec entries per virtqueue element Laurent Vivier
` (5 preceding siblings ...)
2026-04-01 19:18 ` [PATCH 06/10] checksum: Pass explicit L4 length to checksum functions Laurent Vivier
@ 2026-04-01 19:18 ` Laurent Vivier
2026-04-01 19:18 ` [PATCH 08/10] vu_common: Pass explicit frame length to vu_flush() Laurent Vivier
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Laurent Vivier @ 2026-04-01 19:18 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
With vhost-user multibuffer frames, the iov can be larger than the
actual L2 frame. The previous approach of computing L2 length as
iov_size() - offset would overcount and write extra bytes into the
pcap file.
Pass the L2 frame length explicitly to pcap_frame() and pcap_iov(),
and write exactly that many bytes instead of the full iov remainder.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
pcap.c | 37 ++++++++++++++++++++++++++++---------
pcap.h | 2 +-
tap.c | 2 +-
tcp_vu.c | 9 ++++++---
udp_vu.c | 4 +++-
vu_common.c | 2 +-
6 files changed, 40 insertions(+), 16 deletions(-)
diff --git a/pcap.c b/pcap.c
index a026f17e7974..dfe1c61add9a 100644
--- a/pcap.c
+++ b/pcap.c
@@ -52,22 +52,38 @@ struct pcap_pkthdr {
* @iov: IO vector containing frame (with L2 headers and tap headers)
* @iovcnt: Number of buffers (@iov entries) in frame
* @offset: Byte offset of the L2 headers within @iov
+ * @l2len: Length of L2 frame data to capture
* @now: Timestamp
*/
static void pcap_frame(const struct iovec *iov, size_t iovcnt,
- size_t offset, const struct timespec *now)
+ size_t offset, size_t l2len, const struct timespec *now)
{
- size_t l2len = iov_size(iov, iovcnt) - offset;
struct pcap_pkthdr h = {
.tv_sec = now->tv_sec,
.tv_usec = DIV_ROUND_CLOSEST(now->tv_nsec, 1000),
.caplen = l2len,
.len = l2len
};
+ size_t i;
- if (write_all_buf(pcap_fd, &h, sizeof(h)) < 0 ||
- write_remainder(pcap_fd, iov, iovcnt, offset) < 0)
- debug_perror("Cannot log packet, length %zu", l2len);
+ if (write_all_buf(pcap_fd, &h, sizeof(h)) < 0) {
+ debug_perror("Cannot log packet, packet header error");
+ return;
+ }
+
+ for (i = iov_skip_bytes(iov, iovcnt, offset, &offset);
+ i < iovcnt && l2len; i++) {
+ size_t n = MIN(l2len, iov[i].iov_len - offset);
+
+ if (write_all_buf(pcap_fd, (char *)iov[i].iov_base + offset,
+ n) < 0) {
+ debug_perror("Cannot log packet");
+ return;
+ }
+
+ offset = 0;
+ l2len -= n;
+ }
}
/**
@@ -87,7 +103,7 @@ void pcap(const char *pkt, size_t l2len)
if (clock_gettime(CLOCK_REALTIME, &now))
err_perror("Failed to get CLOCK_REALTIME time");
- pcap_frame(&iov, 1, 0, &now);
+ pcap_frame(&iov, 1, 0, l2len, &now);
}
/**
@@ -110,7 +126,9 @@ void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n,
err_perror("Failed to get CLOCK_REALTIME time");
for (i = 0; i < n; i++)
- pcap_frame(iov + i * frame_parts, frame_parts, offset, &now);
+ pcap_frame(iov + i * frame_parts, frame_parts, offset,
+ iov_size(iov + i * frame_parts, frame_parts) - offset,
+ &now);
}
/**
@@ -120,8 +138,9 @@ void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n,
* containing packet data to write, including L2 header
* @iovcnt: Number of buffers (@iov entries)
* @offset: Offset of the L2 frame within the full data length
+ * @l2len: Length of L2 frame data to capture
*/
-void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset)
+void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset, size_t l2len)
{
struct timespec now = { 0 };
@@ -131,7 +150,7 @@ void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset)
if (clock_gettime(CLOCK_REALTIME, &now))
err_perror("Failed to get CLOCK_REALTIME time");
- pcap_frame(iov, iovcnt, offset, &now);
+ pcap_frame(iov, iovcnt, offset, l2len, &now);
}
/**
diff --git a/pcap.h b/pcap.h
index dface5df4ee6..c171257cbd73 100644
--- a/pcap.h
+++ b/pcap.h
@@ -13,7 +13,7 @@ extern int pcap_fd;
void pcap(const char *pkt, size_t l2len);
void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n,
size_t offset);
-void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset);
+void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset, size_t l2len);
void pcap_init(struct ctx *c);
#endif /* PCAP_H */
diff --git a/tap.c b/tap.c
index b61199dd699d..007c91864b4e 100644
--- a/tap.c
+++ b/tap.c
@@ -1105,7 +1105,7 @@ void tap_add_packet(struct ctx *c, struct iov_tail *data,
struct ethhdr eh_storage;
const struct ethhdr *eh;
- pcap_iov(data->iov, data->cnt, data->off);
+ pcap_iov(data->iov, data->cnt, data->off, iov_tail_size(data));
eh = IOV_PEEK_HEADER(data, eh_storage);
if (!eh)
diff --git a/tcp_vu.c b/tcp_vu.c
index 0cd01190d612..329fa969fca1 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -143,7 +143,8 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
vu_flush(vdev, vq, flags_elem, 1);
if (*c->pcap)
- pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN);
+ pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN,
+ hdrlen + optlen - VNET_HLEN);
if (flags & DUP_ACK) {
elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1,
@@ -159,7 +160,8 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
vu_flush(vdev, vq, &flags_elem[1], 1);
if (*c->pcap)
- pcap_iov(&flags_elem[1].in_sg[0], 1, VNET_HLEN);
+ pcap_iov(&flags_elem[1].in_sg[0], 1, VNET_HLEN,
+ hdrlen + optlen - VNET_HLEN);
}
}
vu_queue_notify(vdev, vq);
@@ -464,7 +466,8 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
vu_flush(vdev, vq, &elem[head[i]], buf_cnt);
if (*c->pcap)
- pcap_iov(iov, buf_cnt, VNET_HLEN);
+ pcap_iov(iov, buf_cnt, VNET_HLEN,
+ dlen + hdrlen - VNET_HLEN);
conn->seq_to_tap += dlen;
}
diff --git a/udp_vu.c b/udp_vu.c
index 5421a7d71a19..81491afa7e6a 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -185,6 +185,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE];
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
+ size_t hdrlen = udp_vu_hdrlen(v6);
int i;
assert(!c->no_udp);
@@ -230,7 +231,8 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
size_t l4len = udp_vu_prepare(c, iov_vu, toside, dlen);
if (*c->pcap) {
udp_vu_csum(toside, iov_vu, iov_cnt, l4len);
- pcap_iov(iov_vu, iov_cnt, VNET_HLEN);
+ pcap_iov(iov_vu, iov_cnt, VNET_HLEN,
+ hdrlen + dlen - VNET_HLEN);
}
vu_flush(vdev, vq, elem, elem_used);
vu_queue_notify(vdev, vq);
diff --git a/vu_common.c b/vu_common.c
index 57949ca32309..f254cb67ec78 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -268,7 +268,7 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
iov_from_buf(in_sg, in_total, VNET_HLEN, buf, total);
if (*c->pcap)
- pcap_iov(in_sg, in_total, VNET_HLEN);
+ pcap_iov(in_sg, in_total, VNET_HLEN, size);
vu_flush(vdev, vq, elem, elem_cnt);
vu_queue_notify(vdev, vq);
--
2.53.0
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 08/10] vu_common: Pass explicit frame length to vu_flush()
2026-04-01 19:18 [PATCH 00/10] vhost-user: Preparatory series for multiple iovec entries per virtqueue element Laurent Vivier
` (6 preceding siblings ...)
2026-04-01 19:18 ` [PATCH 07/10] pcap: Pass explicit L2 length to pcap_iov() Laurent Vivier
@ 2026-04-01 19:18 ` Laurent Vivier
2026-04-01 19:18 ` [PATCH 09/10] tcp: Pass explicit data length to tcp_fill_headers() Laurent Vivier
2026-04-01 19:18 ` [PATCH 10/10] vhost-user: Centralise Ethernet frame padding in vu_collect() and vu_pad() Laurent Vivier
9 siblings, 0 replies; 11+ messages in thread
From: Laurent Vivier @ 2026-04-01 19:18 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
Currently vu_flush() derives the frame size from the iov, but in
preparation for iov arrays that may be larger than the actual frame,
pass the total length (including vnet header) explicitly so that only
the relevant portion is reported to the virtqueue.
Ensure a minimum frame size of ETH_ZLEN + VNET_HLEN to handle short
frames. All elements are still flushed to avoid descriptor leaks,
but trailing elements beyond frame_len will report a zero length.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
tcp_vu.c | 6 +++---
udp_vu.c | 2 +-
vu_common.c | 15 ++++++++++++---
vu_common.h | 2 +-
4 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/tcp_vu.c b/tcp_vu.c
index 329fa969fca1..105bca41c6de 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -140,7 +140,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
l2len = optlen + hdrlen - VNET_HLEN;
vu_pad(&flags_elem[0].in_sg[0], l2len);
- vu_flush(vdev, vq, flags_elem, 1);
+ vu_flush(vdev, vq, flags_elem, 1, hdrlen + optlen);
if (*c->pcap)
pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN,
@@ -157,7 +157,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
flags_elem[0].in_sg[0].iov_base,
flags_elem[0].in_sg[0].iov_len);
- vu_flush(vdev, vq, &flags_elem[1], 1);
+ vu_flush(vdev, vq, &flags_elem[1], 1, hdrlen + optlen);
if (*c->pcap)
pcap_iov(&flags_elem[1].in_sg[0], 1, VNET_HLEN,
@@ -463,7 +463,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
l2len = dlen + hdrlen - VNET_HLEN;
vu_pad(iov, l2len);
- vu_flush(vdev, vq, &elem[head[i]], buf_cnt);
+ vu_flush(vdev, vq, &elem[head[i]], buf_cnt, dlen + hdrlen);
if (*c->pcap)
pcap_iov(iov, buf_cnt, VNET_HLEN,
diff --git a/udp_vu.c b/udp_vu.c
index 81491afa7e6a..4641f42eb5c4 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -234,7 +234,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
pcap_iov(iov_vu, iov_cnt, VNET_HLEN,
hdrlen + dlen - VNET_HLEN);
}
- vu_flush(vdev, vq, elem, elem_used);
+ vu_flush(vdev, vq, elem, elem_used, hdrlen + dlen);
vu_queue_notify(vdev, vq);
}
}
diff --git a/vu_common.c b/vu_common.c
index f254cb67ec78..d371a59a1813 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -134,18 +134,27 @@ static void vu_set_vnethdr(struct virtio_net_hdr_mrg_rxbuf *vnethdr,
* @vq: vhost-user virtqueue
* @elem: virtqueue elements array to send back to the virtqueue
* @elem_cnt: Length of the array
+ * @frame_len: Total frame length including vnet header
*/
void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
- struct vu_virtq_element *elem, int elem_cnt)
+ struct vu_virtq_element *elem, int elem_cnt, size_t frame_len)
{
+ size_t len;
int i;
vu_set_vnethdr(elem[0].in_sg[0].iov_base, elem_cnt);
+ len = MAX(ETH_ZLEN + VNET_HLEN, frame_len);
for (i = 0; i < elem_cnt; i++) {
- size_t elem_size = iov_size(elem[i].in_sg, elem[i].in_num);
+ size_t elem_size;
+
+ elem_size = iov_size(elem[i].in_sg, elem[i].in_num);
+ if (elem_size > len)
+ elem_size = len;
vu_queue_fill(vdev, vq, &elem[i], elem_size, i);
+
+ len -= elem_size;
}
vu_queue_flush(vdev, vq, elem_cnt);
@@ -270,7 +279,7 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
if (*c->pcap)
pcap_iov(in_sg, in_total, VNET_HLEN, size);
- vu_flush(vdev, vq, elem, elem_cnt);
+ vu_flush(vdev, vq, elem, elem_cnt, VNET_HLEN + size);
vu_queue_notify(vdev, vq);
trace("vhost-user sent %zu", total);
diff --git a/vu_common.h b/vu_common.h
index 4037ab765b7d..77d1849e6115 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -40,7 +40,7 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
struct iovec *in_sg, size_t max_in_sg, size_t *in_total,
size_t size, size_t *collected);
void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
- struct vu_virtq_element *elem, int elem_cnt);
+ struct vu_virtq_element *elem, int elem_cnt, size_t frame_len);
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
const struct timespec *now);
int vu_send_single(const struct ctx *c, const void *buf, size_t size);
--
2.53.0
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 09/10] tcp: Pass explicit data length to tcp_fill_headers()
2026-04-01 19:18 [PATCH 00/10] vhost-user: Preparatory series for multiple iovec entries per virtqueue element Laurent Vivier
` (7 preceding siblings ...)
2026-04-01 19:18 ` [PATCH 08/10] vu_common: Pass explicit frame length to vu_flush() Laurent Vivier
@ 2026-04-01 19:18 ` Laurent Vivier
2026-04-01 19:18 ` [PATCH 10/10] vhost-user: Centralise Ethernet frame padding in vu_collect() and vu_pad() Laurent Vivier
9 siblings, 0 replies; 11+ messages in thread
From: Laurent Vivier @ 2026-04-01 19:18 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
tcp_fill_headers() computed the TCP payload length from iov_tail_size(),
but with vhost-user multibuffer frames, the iov_tail will be larger than
the actual data. Pass the data length explicitly so that IP total
length, pseudo-header, and checksum computations use the correct value.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
tcp.c | 5 +++--
tcp_buf.c | 3 ++-
tcp_internal.h | 2 +-
tcp_vu.c | 9 +++++----
4 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/tcp.c b/tcp.c
index 49c6fb57ce16..6b0e25f33bf1 100644
--- a/tcp.c
+++ b/tcp.c
@@ -945,6 +945,7 @@ static void tcp_fill_header(struct tcphdr *th,
* @ip6h: Pointer to IPv6 header, or NULL
* @th: Pointer to TCP header
* @payload: TCP payload
+ * @dlen: TCP payload length
* @ip4_check: IPv4 checksum, if already known
* @seq: Sequence number for this segment
* @no_tcp_csum: Do not set TCP checksum
@@ -955,11 +956,11 @@ size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
struct ethhdr *eh,
struct iphdr *ip4h, struct ipv6hdr *ip6h,
struct tcphdr *th, struct iov_tail *payload,
- const uint16_t *ip4_check, uint32_t seq,
+ size_t dlen, const uint16_t *ip4_check, uint32_t seq,
bool no_tcp_csum)
{
const struct flowside *tapside = TAPFLOW(conn);
- size_t l4len = iov_tail_size(payload) + sizeof(*th);
+ size_t l4len = dlen + sizeof(*th);
uint8_t *omac = conn->f.tap_omac;
size_t l3len = l4len;
uint32_t psum = 0;
diff --git a/tcp_buf.c b/tcp_buf.c
index 41965b107567..27151854033c 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -190,7 +190,8 @@ static void tcp_l2_buf_fill_headers(const struct ctx *c,
else
ip6h = iov[TCP_IOV_IP].iov_base;
- l2len = tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &tail, check, seq,
+ l2len = tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &tail,
+ iov_tail_size(&tail), check, seq,
no_tcp_csum);
tap_hdr_update(taph, l2len);
}
diff --git a/tcp_internal.h b/tcp_internal.h
index d9408852571f..a0fa19f4ed11 100644
--- a/tcp_internal.h
+++ b/tcp_internal.h
@@ -187,7 +187,7 @@ size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
struct ethhdr *eh,
struct iphdr *ip4h, struct ipv6hdr *ip6h,
struct tcphdr *th, struct iov_tail *payload,
- const uint16_t *ip4_check, uint32_t seq,
+ size_t dlen, const uint16_t *ip4_check, uint32_t seq,
bool no_tcp_csum);
int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
diff --git a/tcp_vu.c b/tcp_vu.c
index 105bca41c6de..ae79a6d856b0 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -135,7 +135,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
seq--;
tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload,
- NULL, seq, !*c->pcap);
+ optlen, NULL, seq, !*c->pcap);
l2len = optlen + hdrlen - VNET_HLEN;
vu_pad(&flags_elem[0].in_sg[0], l2len);
@@ -280,12 +280,13 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
* @conn: Connection pointer
* @iov: Pointer to the array of IO vectors
* @iov_cnt: Number of entries in @iov
+ * @dlen: Data length
* @check: Checksum, if already known
* @no_tcp_csum: Do not set TCP checksum
* @push: Set PSH flag, last segment in a batch
*/
static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
- struct iovec *iov, size_t iov_cnt,
+ struct iovec *iov, size_t iov_cnt, size_t dlen,
const uint16_t **check, bool no_tcp_csum, bool push)
{
const struct flowside *toside = TAPFLOW(conn);
@@ -329,7 +330,7 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
th->ack = 1;
th->psh = push;
- tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload,
+ tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload, dlen,
*check, conn->seq_to_tap, no_tcp_csum);
if (ip4h)
*check = &ip4h->check;
@@ -457,7 +458,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
check = NULL;
previous_dlen = dlen;
- tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push);
+ tcp_vu_prepare(c, conn, iov, buf_cnt, dlen, &check, !*c->pcap, push);
/* Pad first/single buffer only, it's at least ETH_ZLEN long */
l2len = dlen + hdrlen - VNET_HLEN;
--
2.53.0
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 10/10] vhost-user: Centralise Ethernet frame padding in vu_collect() and vu_pad()
2026-04-01 19:18 [PATCH 00/10] vhost-user: Preparatory series for multiple iovec entries per virtqueue element Laurent Vivier
` (8 preceding siblings ...)
2026-04-01 19:18 ` [PATCH 09/10] tcp: Pass explicit data length to tcp_fill_headers() Laurent Vivier
@ 2026-04-01 19:18 ` Laurent Vivier
9 siblings, 0 replies; 11+ messages in thread
From: Laurent Vivier @ 2026-04-01 19:18 UTC (permalink / raw)
To: passt-dev; +Cc: Laurent Vivier
The previous per-protocol padding done by vu_pad() in tcp_vu.c and
udp_vu.c was only correct for single-buffer frames: it assumed the
padding area always fell within the first iov, writing past its end
with a plain memset().
It also required each caller to compute MAX(..., ETH_ZLEN + VNET_HLEN)
for vu_collect() and to call vu_pad() at the right point, duplicating
the minimum-size logic across protocols.
Move the Ethernet minimum size enforcement into vu_collect() itself, so
that enough buffer space is always reserved for padding regardless of
the requested frame size.
Rewrite vu_pad() to take a full iovec array and use iov_memset(),
making it safe for multi-buffer (mergeable rx buffer) frames.
In tcp_vu_sock_recv(), replace iov_truncate() with iov_skip_bytes():
now that all consumers receive explicit data lengths, truncating the
iovecs is no longer needed. In tcp_vu_data_from_sock(), cap each
frame's data length against the remaining bytes actually received from
the socket, so that the last partial frame gets correct headers and
sequence number advancement.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
iov.c | 1 -
tcp_vu.c | 29 ++++++++++++++---------------
udp_vu.c | 14 ++++++++------
vu_common.c | 32 +++++++++++++++-----------------
vu_common.h | 2 +-
5 files changed, 38 insertions(+), 40 deletions(-)
diff --git a/iov.c b/iov.c
index 83b683f3976a..2289b425529e 100644
--- a/iov.c
+++ b/iov.c
@@ -180,7 +180,6 @@ size_t iov_truncate(struct iovec *iov, size_t iov_cnt, size_t size)
* Will write less than @length bytes if it runs out of space in
* the iov
*/
-/* cppcheck-suppress unusedFunction */
void iov_memset(const struct iovec *iov, size_t iov_cnt, size_t offset, int c,
size_t length)
{
diff --git a/tcp_vu.c b/tcp_vu.c
index ae79a6d856b0..cae6926334b9 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -72,12 +72,12 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
struct vu_dev *vdev = c->vdev;
struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
struct vu_virtq_element flags_elem[2];
- size_t optlen, hdrlen, l2len;
struct ipv6hdr *ip6h = NULL;
struct iphdr *ip4h = NULL;
struct iovec flags_iov[2];
struct tcp_syn_opts *opts;
struct iov_tail payload;
+ size_t optlen, hdrlen;
struct tcphdr *th;
struct ethhdr *eh;
uint32_t seq;
@@ -88,7 +88,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
elem_cnt = vu_collect(vdev, vq, &flags_elem[0], 1,
&flags_iov[0], 1, NULL,
- MAX(hdrlen + sizeof(*opts), ETH_ZLEN + VNET_HLEN), NULL);
+ hdrlen + sizeof(*opts), NULL);
if (elem_cnt != 1)
return -1;
@@ -128,7 +128,6 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
return ret;
}
- iov_truncate(&flags_iov[0], 1, hdrlen + optlen);
payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen);
if (flags & KEEPALIVE)
@@ -137,9 +136,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload,
optlen, NULL, seq, !*c->pcap);
- l2len = optlen + hdrlen - VNET_HLEN;
- vu_pad(&flags_elem[0].in_sg[0], l2len);
-
+ vu_pad(flags_elem[0].in_sg, 1, hdrlen + optlen);
vu_flush(vdev, vq, flags_elem, 1, hdrlen + optlen);
if (*c->pcap)
@@ -149,7 +146,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
if (flags & DUP_ACK) {
elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1,
&flags_iov[1], 1, NULL,
- flags_elem[0].in_sg[0].iov_len, NULL);
+ hdrlen + optlen, NULL);
if (elem_cnt == 1 &&
flags_elem[1].in_sg[0].iov_len >=
flags_elem[0].in_sg[0].iov_len) {
@@ -213,7 +210,7 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
ARRAY_SIZE(elem) - elem_cnt,
&iov_vu[DISCARD_IOV_NUM + iov_used],
VIRTQUEUE_MAX_SIZE - iov_used, &in_total,
- MAX(MIN(mss, fillsize) + hdrlen, ETH_ZLEN + VNET_HLEN),
+ MIN(mss, fillsize) + hdrlen,
&frame_size);
if (cnt == 0)
break;
@@ -249,8 +246,11 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
if (!peek_offset_cap)
ret -= already_sent;
- /* adjust iov number and length of the last iov */
- i = iov_truncate(&iov_vu[DISCARD_IOV_NUM], iov_used, ret);
+ i = iov_skip_bytes(&iov_vu[DISCARD_IOV_NUM], iov_used,
+ MAX(hdrlen + ret, VNET_HLEN + ETH_ZLEN),
+ NULL);
+ if ((size_t)i < iov_used)
+ i++;
/* adjust head count */
while (*head_cnt > 0 && head[*head_cnt - 1] >= i)
@@ -447,11 +447,13 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
size_t frame_size = iov_size(iov, buf_cnt);
bool push = i == head_cnt - 1;
ssize_t dlen;
- size_t l2len;
assert(frame_size >= hdrlen);
dlen = frame_size - hdrlen;
+ if (dlen > len)
+ dlen = len;
+ len -= dlen;
/* The IPv4 header checksum varies only with dlen */
if (previous_dlen != dlen)
@@ -460,10 +462,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
tcp_vu_prepare(c, conn, iov, buf_cnt, dlen, &check, !*c->pcap, push);
- /* Pad first/single buffer only, it's at least ETH_ZLEN long */
- l2len = dlen + hdrlen - VNET_HLEN;
- vu_pad(iov, l2len);
-
+ vu_pad(elem[head[i]].in_sg, buf_cnt, dlen + hdrlen);
vu_flush(vdev, vq, &elem[head[i]], buf_cnt, dlen + hdrlen);
if (*c->pcap)
diff --git a/udp_vu.c b/udp_vu.c
index 4641f42eb5c4..30af64034516 100644
--- a/udp_vu.c
+++ b/udp_vu.c
@@ -65,7 +65,7 @@ static size_t udp_vu_hdrlen(bool v6)
static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6)
{
struct msghdr msg = { 0 };
- size_t hdrlen, l2len;
+ size_t hdrlen, iov_used;
ssize_t dlen;
/* compute L2 header length */
@@ -88,11 +88,12 @@ static ssize_t udp_vu_sock_recv(struct iovec *iov, size_t *cnt, int s, bool v6)
iov[0].iov_base = (char *)iov[0].iov_base - hdrlen;
iov[0].iov_len += hdrlen;
- *cnt = iov_truncate(iov, *cnt, dlen + hdrlen);
-
- /* pad frame to 60 bytes: first buffer is at least ETH_ZLEN long */
- l2len = dlen + hdrlen - VNET_HLEN;
- vu_pad(&iov[0], l2len);
+ iov_used = iov_skip_bytes(iov, *cnt,
+ MAX(dlen + hdrlen, VNET_HLEN + ETH_ZLEN),
+ NULL);
+ if (iov_used < *cnt)
+ iov_used++;
+ *cnt = iov_used; /* one iovec per element */
return dlen;
}
@@ -234,6 +235,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
pcap_iov(iov_vu, iov_cnt, VNET_HLEN,
hdrlen + dlen - VNET_HLEN);
}
+ vu_pad(iov_vu, iov_cnt, hdrlen + dlen);
vu_flush(vdev, vq, elem, elem_used, hdrlen + dlen);
vu_queue_notify(vdev, vq);
}
diff --git a/vu_common.c b/vu_common.c
index d371a59a1813..ca0aab369d3c 100644
--- a/vu_common.c
+++ b/vu_common.c
@@ -74,6 +74,7 @@ int vu_collect(const struct vu_dev *vdev, struct vu_virtq *vq,
size_t current_iov = 0;
int elem_cnt = 0;
+ size = MAX(size, ETH_ZLEN + VNET_HLEN); /* Ethernet minimum size */
while (current_size < size && elem_cnt < max_elem &&
current_iov < max_in_sg) {
int ret;
@@ -262,29 +263,27 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size)
return -1;
}
- size += VNET_HLEN;
elem_cnt = vu_collect(vdev, vq, elem, ARRAY_SIZE(elem), in_sg,
- ARRAY_SIZE(in_sg), &in_total, size, &total);
- if (elem_cnt == 0 || total < size) {
+ ARRAY_SIZE(in_sg), &in_total, VNET_HLEN + size, &total);
+ if (elem_cnt == 0 || total < VNET_HLEN + size) {
debug("vu_send_single: no space to send the data "
"elem_cnt %d size %zu", elem_cnt, total);
goto err;
}
- total -= VNET_HLEN;
-
/* copy data from the buffer to the iovec */
- iov_from_buf(in_sg, in_total, VNET_HLEN, buf, total);
+ iov_from_buf(in_sg, in_total, VNET_HLEN, buf, size);
if (*c->pcap)
pcap_iov(in_sg, in_total, VNET_HLEN, size);
+ vu_pad(in_sg, in_total, VNET_HLEN + size);
vu_flush(vdev, vq, elem, elem_cnt, VNET_HLEN + size);
vu_queue_notify(vdev, vq);
- trace("vhost-user sent %zu", total);
+ trace("vhost-user sent %zu", size);
- return total;
+ return size;
err:
for (i = 0; i < elem_cnt; i++)
vu_queue_detach_element(vq);
@@ -293,15 +292,14 @@ err:
}
/**
- * vu_pad() - Pad 802.3 frame to minimum length (60 bytes) if needed
- * @iov: Buffer in iovec array where end of 802.3 frame is stored
- * @l2len: Layer-2 length already filled in frame
+ * vu_pad() - Pad short frames to minimum Ethernet length and truncate iovec
+ * @iov: Pointer to iovec array
+ * @cnt: Number of entries in @iov
+ * @frame_len: Data length in @iov (including virtio-net header)
*/
-void vu_pad(struct iovec *iov, size_t l2len)
+void vu_pad(const struct iovec *iov, size_t cnt, size_t frame_len)
{
- if (l2len >= ETH_ZLEN)
- return;
-
- memset((char *)iov->iov_base + iov->iov_len, 0, ETH_ZLEN - l2len);
- iov->iov_len += ETH_ZLEN - l2len;
+ if (frame_len < ETH_ZLEN + VNET_HLEN)
+ iov_memset(iov, cnt, frame_len, 0,
+ ETH_ZLEN + VNET_HLEN - frame_len);
}
diff --git a/vu_common.h b/vu_common.h
index 77d1849e6115..51f70084a7cb 100644
--- a/vu_common.h
+++ b/vu_common.h
@@ -44,6 +44,6 @@ void vu_flush(const struct vu_dev *vdev, struct vu_virtq *vq,
void vu_kick_cb(struct vu_dev *vdev, union epoll_ref ref,
const struct timespec *now);
int vu_send_single(const struct ctx *c, const void *buf, size_t size);
-void vu_pad(struct iovec *iov, size_t l2len);
+void vu_pad(const struct iovec *iov, size_t cnt, size_t frame_len);
#endif /* VU_COMMON_H */
--
2.53.0
^ permalink raw reply [flat|nested] 11+ messages in thread