On Fri, Apr 03, 2026 at 06:38:08PM +0200, Laurent Vivier wrote: > With vhost-user multibuffer frames, the iov can be larger than the > actual L2 frame. The previous approach of computing L2 length as > iov_size() - offset would overcount and write extra bytes into the > pcap file. > > Pass the L2 frame length explicitly to pcap_frame() and pcap_iov(), > and write exactly that many bytes instead of the full iov remainder. > > Signed-off-by: Laurent Vivier > --- > pcap.c | 41 +++++++++++++++++++++++++++++++---------- > pcap.h | 2 +- > tap.c | 3 ++- > tcp_vu.c | 14 ++++++++------ > udp_vu.c | 4 +++- > vu_common.c | 2 +- > 6 files changed, 46 insertions(+), 20 deletions(-) > > diff --git a/pcap.c b/pcap.c > index a026f17e7974..c2a6910bec02 100644 > --- a/pcap.c > +++ b/pcap.c > @@ -52,22 +52,39 @@ struct pcap_pkthdr { > * @iov: IO vector containing frame (with L2 headers and tap headers) > * @iovcnt: Number of buffers (@iov entries) in frame > * @offset: Byte offset of the L2 headers within @iov > + * @l2len: Length of L2 frame data to capture > * @now: Timestamp > */ > static void pcap_frame(const struct iovec *iov, size_t iovcnt, > - size_t offset, const struct timespec *now) > + size_t offset, size_t l2len, const struct timespec *now) > { > - size_t l2len = iov_size(iov, iovcnt) - offset; > struct pcap_pkthdr h = { > .tv_sec = now->tv_sec, > .tv_usec = DIV_ROUND_CLOSEST(now->tv_nsec, 1000), > .caplen = l2len, > .len = l2len > }; > + size_t i; > > - if (write_all_buf(pcap_fd, &h, sizeof(h)) < 0 || > - write_remainder(pcap_fd, iov, iovcnt, offset) < 0) > - debug_perror("Cannot log packet, length %zu", l2len); > + if (write_all_buf(pcap_fd, &h, sizeof(h)) < 0) { > + debug_perror("Cannot log packet, packet header error"); > + return; > + } > + Hm, would it be cleaner to push a length parameter into write_remainder(), rather than open coding the loop here? We might be able to make at least partial use of writev() in that case. > + for (i = iov_skip_bytes(iov, iovcnt, offset, &offset); > + i < iovcnt && l2len; i++) { > + size_t l = MIN(l2len, iov[i].iov_len - offset); > + int n; > + > + n = write_all_buf(pcap_fd, (char *)iov[i].iov_base + offset, l); > + if (n < 0) { > + debug_perror("Cannot log packet, length %zu", l); > + return; > + } > + > + offset = 0; > + l2len -= l; > + } > } > > /** > @@ -87,7 +104,7 @@ void pcap(const char *pkt, size_t l2len) > if (clock_gettime(CLOCK_REALTIME, &now)) > err_perror("Failed to get CLOCK_REALTIME time"); > > - pcap_frame(&iov, 1, 0, &now); > + pcap_frame(&iov, 1, 0, l2len, &now); > } > > /** > @@ -109,8 +126,11 @@ void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n, > if (clock_gettime(CLOCK_REALTIME, &now)) > err_perror("Failed to get CLOCK_REALTIME time"); > > - for (i = 0; i < n; i++) > - pcap_frame(iov + i * frame_parts, frame_parts, offset, &now); > + for (i = 0; i < n; i++) { > + pcap_frame(iov + i * frame_parts, frame_parts, offset, > + iov_size(iov + i * frame_parts, frame_parts) - offset, > + &now); > + } > } > > /** > @@ -120,8 +140,9 @@ void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n, > * containing packet data to write, including L2 header > * @iovcnt: Number of buffers (@iov entries) > * @offset: Offset of the L2 frame within the full data length > + * @l2len: Length of L2 frame data to capture > */ > -void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset) > +void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset, size_t l2len) > { > struct timespec now = { 0 }; > > @@ -131,7 +152,7 @@ void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset) > if (clock_gettime(CLOCK_REALTIME, &now)) > err_perror("Failed to get CLOCK_REALTIME time"); > > - pcap_frame(iov, iovcnt, offset, &now); > + pcap_frame(iov, iovcnt, offset, l2len, &now); > } > > /** > diff --git a/pcap.h b/pcap.h > index dface5df4ee6..c171257cbd73 100644 > --- a/pcap.h > +++ b/pcap.h > @@ -13,7 +13,7 @@ extern int pcap_fd; > void pcap(const char *pkt, size_t l2len); > void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n, > size_t offset); > -void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset); > +void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset, size_t l2len); > void pcap_init(struct ctx *c); > > #endif /* PCAP_H */ > diff --git a/tap.c b/tap.c > index b61199dd699d..a4cc00b93d82 100644 > --- a/tap.c > +++ b/tap.c > @@ -1102,10 +1102,11 @@ void tap_handler(struct ctx *c, const struct timespec *now) > void tap_add_packet(struct ctx *c, struct iov_tail *data, > const struct timespec *now) > { > + size_t l2len = iov_tail_size(data); > struct ethhdr eh_storage; > const struct ethhdr *eh; > > - pcap_iov(data->iov, data->cnt, data->off); > + pcap_iov(data->iov, data->cnt, data->off, l2len); > > eh = IOV_PEEK_HEADER(data, eh_storage); > if (!eh) > diff --git a/tcp_vu.c b/tcp_vu.c > index 0cd01190d612..7b7ea9c789b1 100644 > --- a/tcp_vu.c > +++ b/tcp_vu.c > @@ -128,7 +128,8 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) > return ret; > } > > - iov_truncate(&flags_iov[0], 1, hdrlen + optlen); > + l2len = hdrlen + optlen - VNET_HLEN; > + iov_truncate(&flags_iov[0], 1, l2len + VNET_HLEN); > payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen); > > if (flags & KEEPALIVE) > @@ -137,13 +138,12 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) > tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload, > NULL, seq, !*c->pcap); > > - l2len = optlen + hdrlen - VNET_HLEN; > vu_pad(&flags_elem[0].in_sg[0], l2len); > > vu_flush(vdev, vq, flags_elem, 1); > > if (*c->pcap) > - pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN); > + pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN, l2len); > > if (flags & DUP_ACK) { > elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1, > @@ -158,8 +158,10 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) > > vu_flush(vdev, vq, &flags_elem[1], 1); > > - if (*c->pcap) > - pcap_iov(&flags_elem[1].in_sg[0], 1, VNET_HLEN); > + if (*c->pcap) { > + pcap_iov(&flags_elem[1].in_sg[0], 1, VNET_HLEN, > + l2len); > + } > } > } > vu_queue_notify(vdev, vq); > @@ -464,7 +466,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) > vu_flush(vdev, vq, &elem[head[i]], buf_cnt); > > if (*c->pcap) > - pcap_iov(iov, buf_cnt, VNET_HLEN); > + pcap_iov(iov, buf_cnt, VNET_HLEN, l2len); > > conn->seq_to_tap += dlen; > } > diff --git a/udp_vu.c b/udp_vu.c > index 5421a7d71a19..81491afa7e6a 100644 > --- a/udp_vu.c > +++ b/udp_vu.c > @@ -185,6 +185,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx) > static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE]; > struct vu_dev *vdev = c->vdev; > struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; > + size_t hdrlen = udp_vu_hdrlen(v6); > int i; > > assert(!c->no_udp); > @@ -230,7 +231,8 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx) > size_t l4len = udp_vu_prepare(c, iov_vu, toside, dlen); > if (*c->pcap) { > udp_vu_csum(toside, iov_vu, iov_cnt, l4len); > - pcap_iov(iov_vu, iov_cnt, VNET_HLEN); > + pcap_iov(iov_vu, iov_cnt, VNET_HLEN, > + hdrlen + dlen - VNET_HLEN); > } > vu_flush(vdev, vq, elem, elem_used); > vu_queue_notify(vdev, vq); > diff --git a/vu_common.c b/vu_common.c > index 57949ca32309..f254cb67ec78 100644 > --- a/vu_common.c > +++ b/vu_common.c > @@ -268,7 +268,7 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size) > iov_from_buf(in_sg, in_total, VNET_HLEN, buf, total); > > if (*c->pcap) > - pcap_iov(in_sg, in_total, VNET_HLEN); > + pcap_iov(in_sg, in_total, VNET_HLEN, size); > > vu_flush(vdev, vq, elem, elem_cnt); > vu_queue_notify(vdev, vq); > -- > 2.53.0 > -- David Gibson (he or they) | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you, not the other way | around. http://www.ozlabs.org/~dgibson