On Wed, May 20, 2026 at 03:46:48PM +0200, Laurent Vivier wrote: > On 5/11/26 10:24, David Gibson wrote: > > On Thu, Apr 16, 2026 at 06:16:17PM +0200, Laurent Vivier wrote: > > > Previously, tcp_vu_sock_recv() assumed a 1:1 mapping between virtqueue > > > elements and iovecs (one iovec per element), enforced by an ASSERT. > > > This prevented the use of virtqueue elements with multiple buffers > > > (e.g. when mergeable rx buffers are not negotiated and headers are > > > provided in a separate buffer). > > > > > > Introduce a struct vu_frame to track per-frame metadata: the range of > > > elements and iovecs that make up each frame, and the frame's total size. > > > This replaces the head[] array which only tracked element indices. > > > > > > A separate iov_msg[] array is built for recvmsg() by cloning the data > > > portions (after stripping headers) using iov_tail helpers. > > > > > > Then a frame truncation after recvmsg() properly walks the frame and > > > element arrays to adjust iovec counts and element counts. > > > > > > Signed-off-by: Laurent Vivier > > > > LGTM apart from a couple of minor points noted below. > > > > > --- > > > tcp_vu.c | 174 ++++++++++++++++++++++++++++++++++++------------------- > > > 1 file changed, 113 insertions(+), 61 deletions(-) > > > > > > diff --git a/tcp_vu.c b/tcp_vu.c > > > index 2017aec90342..96b16007701d 100644 > > > --- a/tcp_vu.c > > > +++ b/tcp_vu.c > > > @@ -35,9 +35,24 @@ > > > #include "vu_common.h" > > > #include > > > -static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE + DISCARD_IOV_NUM]; > > > +static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE]; > > > static struct vu_virtq_element elem[VIRTQUEUE_MAX_SIZE]; > > > -static int head[VIRTQUEUE_MAX_SIZE + 1]; > > > + > > > +/** > > > + * struct vu_frame - Descriptor for a TCP frame mapped to virtqueue elements > > > + * @idx_element: Index of first element in elem[] for this frame > > > + * @num_element: Number of virtqueue elements used by this frame > > > + * @idx_iovec: Index of first iovec in iov_vu[] for this frame > > > + * @num_iovec: Number of iovecs covering this frame's buffers > > > + * @size: Total frame size including all headers > > > + */ > > > +static struct vu_frame { > > > + int idx_element; > > > + int num_element; > > > + int idx_iovec; > > > + int num_iovec; > > > + size_t size; > > > +} frame[VIRTQUEUE_MAX_SIZE]; > > > /** > > > * tcp_vu_hdrlen() - Sum size of all headers, from TCP to virtio-net > > > @@ -174,8 +189,8 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) > > > * @v6: Set for IPv6 connections > > > * @already_sent: Number of bytes already sent > > > * @fillsize: Maximum bytes to fill in guest-side receiving window > > > - * @iov_cnt: number of iov (output) > > > - * @head_cnt: Pointer to store the count of head iov entries (output) > > > + * @elem_used: number of element (output) > > > + * @frame_cnt: Pointer to store the number of frames (output) > > > * > > > * Return: number of bytes received from the socket, or a negative error code > > > * on failure. > > > @@ -183,57 +198,77 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) > > > static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq, > > > const struct tcp_tap_conn *conn, bool v6, > > > uint32_t already_sent, size_t fillsize, > > > - int *iov_cnt, int *head_cnt) > > > + int *elem_used, int *frame_cnt) > > > { > > > + static struct iovec iov_msg[VIRTQUEUE_MAX_SIZE + DISCARD_IOV_NUM]; > > > const struct vu_dev *vdev = c->vdev; > > > struct msghdr mh_sock = { 0 }; > > > uint16_t mss = MSS_GET(conn); > > > size_t hdrlen, iov_used; > > > int s = conn->sock; > > > + ssize_t ret, dlen; > > > int elem_cnt; > > > - ssize_t ret; > > > - int i; > > > - > > > - *iov_cnt = 0; > > > + int i, j; > > > hdrlen = tcp_vu_hdrlen(v6); > > > + *elem_used = 0; > > > + > > > iov_used = 0; > > > elem_cnt = 0; > > > - *head_cnt = 0; > > > + *frame_cnt = 0; > > > while (fillsize > 0 && elem_cnt < ARRAY_SIZE(elem) && > > > - iov_used < VIRTQUEUE_MAX_SIZE) { > > > - size_t frame_size, dlen, in_total; > > > - struct iovec *iov; > > > + iov_used < ARRAY_SIZE(iov_vu) && > > > + *frame_cnt < ARRAY_SIZE(frame)) { > > > + size_t frame_size, in_total; > > > int cnt; > > > cnt = vu_collect(vdev, vq, &elem[elem_cnt], > > > ARRAY_SIZE(elem) - elem_cnt, > > > - &iov_vu[DISCARD_IOV_NUM + iov_used], > > > - VIRTQUEUE_MAX_SIZE - iov_used, &in_total, > > > + &iov_vu[iov_used], > > > + ARRAY_SIZE(iov_vu) - iov_used, &in_total, > > > MIN(mss, fillsize) + hdrlen, > > > &frame_size); > > > if (cnt == 0) > > > break; > > > - assert((size_t)cnt == in_total); /* one iovec per element */ > > > + > > > + frame[*frame_cnt].idx_element = elem_cnt; > > > + frame[*frame_cnt].num_element = cnt; > > > + frame[*frame_cnt].idx_iovec = iov_used; > > > + frame[*frame_cnt].num_iovec = in_total; > > > + frame[*frame_cnt].size = frame_size; > > > + (*frame_cnt)++; > > > iov_used += in_total; > > > - dlen = frame_size - hdrlen; > > > + elem_cnt += cnt; > > > - /* reserve space for headers in iov */ > > > - iov = &elem[elem_cnt].in_sg[0]; > > > - assert(iov->iov_len >= hdrlen); > > > - iov->iov_base = (char *)iov->iov_base + hdrlen; > > > - iov->iov_len -= hdrlen; > > > - head[(*head_cnt)++] = elem_cnt; > > > + fillsize -= frame_size - hdrlen; > > > + } > > > - fillsize -= dlen; > > > - elem_cnt += cnt; > > > + /* build an iov array without headers */ > > > + for (i = 0, j = DISCARD_IOV_NUM; i < *frame_cnt && > > > + j < ARRAY_SIZE(iov_msg); i++) { > > > + struct iov_tail data; > > > + ssize_t cnt; > > > + > > > + data = IOV_TAIL(&iov_vu[frame[i].idx_iovec], > > > + frame[i].num_iovec, 0); > > > + iov_drop_header(&data, hdrlen); > > > + > > > + cnt = iov_tail_clone(&iov_msg[j], ARRAY_SIZE(iov_msg) - j, > > > + &data); > > > + if (cnt == -1) > > > + die("Missing entries in iov_msg"); > > > > Is a fatal error really what we want here? > > As we are copying iov_vu into iov_msg and the sizes match (if we ignore the > discard part), there is always enough room. An assert() would be cleaner but > coverity doesn't manage it correctly. Huh, weird. Okay then. -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson