From mboxrd@z Thu Jan 1 00:00:00 1970 Authentication-Results: passt.top; dmarc=pass (p=quarantine dis=none) header.from=redhat.com Authentication-Results: passt.top; dkim=pass (1024-bit key; unprotected) header.d=redhat.com header.i=@redhat.com header.a=rsa-sha256 header.s=mimecast20190719 header.b=XV6RQWQ8; dkim-atps=neutral Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) by passt.top (Postfix) with ESMTPS id 55D925A0275 for ; Thu, 16 Apr 2026 17:57:38 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1776355057; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=IzniziOrkgdFoaiGG5yk1Xuc9Iv/5PgFsiwOhZ5rcWA=; b=XV6RQWQ8ShDoU1Td+adIxqFaG1qR8hbj21d+5Drmi0AxAfAQ2SsGJwQGog+S+ov2lZ+bEl iqtcsAMvJQ0EL2kZCIyYhXRgHF3YMfcpDHs0Fgznao7K8Fzm9gV63C36BHE0sPmrrLwM3C uin+xbFakrOH7z9jsjLs9taAdF1G6cE= Received: from mx-prod-mc-05.mail-002.prod.us-west-2.aws.redhat.com (ec2-54-186-198-63.us-west-2.compute.amazonaws.com [54.186.198.63]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3, cipher=TLS_AES_256_GCM_SHA384) id us-mta-77-wTDc1gbeO3Cs2DBlx-paxA-1; Thu, 16 Apr 2026 11:57:35 -0400 X-MC-Unique: wTDc1gbeO3Cs2DBlx-paxA-1 X-Mimecast-MFC-AGG-ID: wTDc1gbeO3Cs2DBlx-paxA_1776355055 Received: from mx-prod-int-05.mail-002.prod.us-west-2.aws.redhat.com (mx-prod-int-05.mail-002.prod.us-west-2.aws.redhat.com [10.30.177.17]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by mx-prod-mc-05.mail-002.prod.us-west-2.aws.redhat.com (Postfix) with ESMTPS id 1FAC91955F02 for ; Thu, 16 Apr 2026 15:57:35 +0000 (UTC) Received: from lenovo-t14s.redhat.corp (headnet01.pony-001.prod.iad2.dc.redhat.com [10.2.32.101]) by mx-prod-int-05.mail-002.prod.us-west-2.aws.redhat.com (Postfix) with ESMTP id 34FE1195608E; Thu, 16 Apr 2026 15:57:34 +0000 (UTC) From: Laurent Vivier To: passt-dev@passt.top Subject: [PATCH v3 07/10] pcap: Pass explicit L2 length to pcap_iov() Date: Thu, 16 Apr 2026 17:57:18 +0200 Message-ID: <20260416155721.3807225-8-lvivier@redhat.com> In-Reply-To: <20260416155721.3807225-1-lvivier@redhat.com> References: <20260416155721.3807225-1-lvivier@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.0 on 10.30.177.17 X-Mimecast-Spam-Score: 0 X-Mimecast-MFC-PROC-ID: y7lcwiJI2JsROYSHh-7YZ4aDk1cLe0KjqrwLhlfSXhY_1776355055 X-Mimecast-Originator: redhat.com Content-Transfer-Encoding: 8bit content-type: text/plain; charset="US-ASCII"; x-default=true Message-ID-Hash: FKQ5SLYCSPKXFWKNKM4UB4XNJRPC7LYE X-Message-ID-Hash: FKQ5SLYCSPKXFWKNKM4UB4XNJRPC7LYE X-MailFrom: lvivier@redhat.com X-Mailman-Rule-Misses: dmarc-mitigation; no-senders; approved; emergency; loop; banned-address; member-moderation; nonmember-moderation; administrivia; implicit-dest; max-recipients; max-size; news-moderation; no-subject; digests; suspicious-header CC: Laurent Vivier X-Mailman-Version: 3.3.8 Precedence: list List-Id: Development discussion and patches for passt Archived-At: Archived-At: List-Archive: List-Archive: List-Help: List-Owner: List-Post: List-Subscribe: List-Unsubscribe: With vhost-user multibuffer frames, the iov can be larger than the actual L2 frame. The previous approach of computing L2 length as iov_size() - offset would overcount and write extra bytes into the pcap file. Pass the L2 frame length explicitly to pcap_frame() and pcap_iov(), and write exactly that many bytes instead of the full iov remainder. Signed-off-by: Laurent Vivier --- pcap.c | 28 +++++++++++++++++++--------- pcap.h | 2 +- tap.c | 6 ++++-- tcp_vu.c | 14 ++++++++------ udp_vu.c | 4 +++- util.c | 31 +++++++++++++++++++++++++++---- util.h | 3 ++- vu_common.c | 2 +- 8 files changed, 65 insertions(+), 25 deletions(-) diff --git a/pcap.c b/pcap.c index a026f17e7974..d1717a003faf 100644 --- a/pcap.c +++ b/pcap.c @@ -52,12 +52,12 @@ struct pcap_pkthdr { * @iov: IO vector containing frame (with L2 headers and tap headers) * @iovcnt: Number of buffers (@iov entries) in frame * @offset: Byte offset of the L2 headers within @iov + * @l2len: Length of L2 frame data to capture * @now: Timestamp */ static void pcap_frame(const struct iovec *iov, size_t iovcnt, - size_t offset, const struct timespec *now) + size_t offset, size_t l2len, const struct timespec *now) { - size_t l2len = iov_size(iov, iovcnt) - offset; struct pcap_pkthdr h = { .tv_sec = now->tv_sec, .tv_usec = DIV_ROUND_CLOSEST(now->tv_nsec, 1000), @@ -65,9 +65,15 @@ static void pcap_frame(const struct iovec *iov, size_t iovcnt, .len = l2len }; - if (write_all_buf(pcap_fd, &h, sizeof(h)) < 0 || - write_remainder(pcap_fd, iov, iovcnt, offset) < 0) + if (write_all_buf(pcap_fd, &h, sizeof(h)) < 0) { + debug_perror("Cannot log packet, packet header error"); + return; + } + + if (write_remainder(pcap_fd, iov, iovcnt, offset, l2len) < 0) { debug_perror("Cannot log packet, length %zu", l2len); + return; + } } /** @@ -87,7 +93,7 @@ void pcap(const char *pkt, size_t l2len) if (clock_gettime(CLOCK_REALTIME, &now)) err_perror("Failed to get CLOCK_REALTIME time"); - pcap_frame(&iov, 1, 0, &now); + pcap_frame(&iov, 1, 0, l2len, &now); } /** @@ -109,8 +115,11 @@ void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n, if (clock_gettime(CLOCK_REALTIME, &now)) err_perror("Failed to get CLOCK_REALTIME time"); - for (i = 0; i < n; i++) - pcap_frame(iov + i * frame_parts, frame_parts, offset, &now); + for (i = 0; i < n; i++) { + pcap_frame(iov + i * frame_parts, frame_parts, offset, + iov_size(iov + i * frame_parts, frame_parts) - offset, + &now); + } } /** @@ -120,8 +129,9 @@ void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n, * containing packet data to write, including L2 header * @iovcnt: Number of buffers (@iov entries) * @offset: Offset of the L2 frame within the full data length + * @l2len: Length of L2 frame data to capture */ -void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset) +void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset, size_t l2len) { struct timespec now = { 0 }; @@ -131,7 +141,7 @@ void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset) if (clock_gettime(CLOCK_REALTIME, &now)) err_perror("Failed to get CLOCK_REALTIME time"); - pcap_frame(iov, iovcnt, offset, &now); + pcap_frame(iov, iovcnt, offset, l2len, &now); } /** diff --git a/pcap.h b/pcap.h index dface5df4ee6..c171257cbd73 100644 --- a/pcap.h +++ b/pcap.h @@ -13,7 +13,7 @@ extern int pcap_fd; void pcap(const char *pkt, size_t l2len); void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n, size_t offset); -void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset); +void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset, size_t l2len); void pcap_init(struct ctx *c); #endif /* PCAP_H */ diff --git a/tap.c b/tap.c index 41a61a36c279..41ba2b8666a5 100644 --- a/tap.c +++ b/tap.c @@ -500,7 +500,8 @@ static size_t tap_send_frames_passt(const struct ctx *c, /* Number of unsent or partially sent buffers for the frame */ size_t rembufs = bufs_per_frame - (i % bufs_per_frame); - if (write_remainder(c->fd_tap, &iov[i], rembufs, buf_offset) < 0) { + if (write_remainder(c->fd_tap, &iov[i], rembufs, buf_offset, + SIZE_MAX) < 0) { err_perror("tap: partial frame send"); return i; } @@ -1102,10 +1103,11 @@ void tap_handler(struct ctx *c, const struct timespec *now) void tap_add_packet(struct ctx *c, struct iov_tail *data, const struct timespec *now) { + size_t l2len = iov_tail_size(data); struct ethhdr eh_storage; const struct ethhdr *eh; - pcap_iov(data->iov, data->cnt, data->off); + pcap_iov(data->iov, data->cnt, data->off, l2len); eh = IOV_PEEK_HEADER(data, eh_storage); if (!eh) diff --git a/tcp_vu.c b/tcp_vu.c index 0cd01190d612..7b7ea9c789b1 100644 --- a/tcp_vu.c +++ b/tcp_vu.c @@ -128,7 +128,8 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) return ret; } - iov_truncate(&flags_iov[0], 1, hdrlen + optlen); + l2len = hdrlen + optlen - VNET_HLEN; + iov_truncate(&flags_iov[0], 1, l2len + VNET_HLEN); payload = IOV_TAIL(flags_elem[0].in_sg, 1, hdrlen); if (flags & KEEPALIVE) @@ -137,13 +138,12 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload, NULL, seq, !*c->pcap); - l2len = optlen + hdrlen - VNET_HLEN; vu_pad(&flags_elem[0].in_sg[0], l2len); vu_flush(vdev, vq, flags_elem, 1); if (*c->pcap) - pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN); + pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN, l2len); if (flags & DUP_ACK) { elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1, @@ -158,8 +158,10 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags) vu_flush(vdev, vq, &flags_elem[1], 1); - if (*c->pcap) - pcap_iov(&flags_elem[1].in_sg[0], 1, VNET_HLEN); + if (*c->pcap) { + pcap_iov(&flags_elem[1].in_sg[0], 1, VNET_HLEN, + l2len); + } } } vu_queue_notify(vdev, vq); @@ -464,7 +466,7 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn) vu_flush(vdev, vq, &elem[head[i]], buf_cnt); if (*c->pcap) - pcap_iov(iov, buf_cnt, VNET_HLEN); + pcap_iov(iov, buf_cnt, VNET_HLEN, l2len); conn->seq_to_tap += dlen; } diff --git a/udp_vu.c b/udp_vu.c index 1a73d997f683..76242d423778 100644 --- a/udp_vu.c +++ b/udp_vu.c @@ -182,6 +182,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx) static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE]; struct vu_dev *vdev = c->vdev; struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE]; + size_t hdrlen = udp_vu_hdrlen(v6); int i; assert(!c->no_udp); @@ -227,7 +228,8 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx) udp_vu_prepare(c, iov_vu, toside, dlen); if (*c->pcap) { udp_vu_csum(toside, iov_vu, iov_cnt, dlen); - pcap_iov(iov_vu, iov_cnt, VNET_HLEN); + pcap_iov(iov_vu, iov_cnt, VNET_HLEN, + hdrlen + dlen - VNET_HLEN); } vu_flush(vdev, vq, elem, elem_used); vu_queue_notify(vdev, vq); diff --git a/util.c b/util.c index 73c9d51d7b4a..141aad275869 100644 --- a/util.c +++ b/util.c @@ -722,31 +722,54 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags, * @iov: IO vector * @iovcnt: Number of entries in @iov * @skip: Number of bytes of the vector to skip writing + * @length: Number of bytes of the vector to write * * Return: 0 on success, -1 on error (with errno set) * * #syscalls writev */ -int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip) +int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, + size_t skip, size_t length) { size_t i = 0, offset; - while ((i += iov_skip_bytes(iov + i, iovcnt - i, skip, &offset)) < iovcnt) { + while (length && + (i += iov_skip_bytes(iov + i, iovcnt - i, skip, &offset)) < iovcnt) { ssize_t rc; + size_t end; if (offset) { + size_t len = MIN(length, iov[i].iov_len - offset); + /* Write the remainder of the partially written buffer */ if (write_all_buf(fd, (char *)iov[i].iov_base + offset, - iov[i].iov_len - offset) < 0) + len) < 0) return -1; + + length -= len; i++; + + if (!length || i >= iovcnt) + break; + } + + end = iov_skip_bytes(iov + i, iovcnt - i, length, NULL); + + /* Write a trailing partial buffer */ + if (!end) { + size_t len = MIN(length, iov[i].iov_len); + + if (write_all_buf(fd, iov[i].iov_base, len) < 0) + return -1; + break; } /* Write as much of the remaining whole buffers as we can */ - rc = writev(fd, &iov[i], iovcnt - i); + rc = writev(fd, &iov[i], end); if (rc < 0) return -1; + length -= rc; skip = rc; } return 0; diff --git a/util.h b/util.h index 92aeabc86b52..888093277896 100644 --- a/util.h +++ b/util.h @@ -235,7 +235,8 @@ int fls(unsigned long x); int ilog2(unsigned long x); int write_file(const char *path, const char *buf); intmax_t read_file_integer(const char *path, intmax_t fallback); -int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, size_t skip); +int write_remainder(int fd, const struct iovec *iov, size_t iovcnt, + size_t skip, size_t length); int read_remainder(int fd, const struct iovec *iov, size_t cnt, size_t skip); void close_open_files(int argc, char **argv); bool snprintf_check(char *str, size_t size, const char *format, ...); diff --git a/vu_common.c b/vu_common.c index 57949ca32309..f254cb67ec78 100644 --- a/vu_common.c +++ b/vu_common.c @@ -268,7 +268,7 @@ int vu_send_single(const struct ctx *c, const void *buf, size_t size) iov_from_buf(in_sg, in_total, VNET_HLEN, buf, total); if (*c->pcap) - pcap_iov(in_sg, in_total, VNET_HLEN); + pcap_iov(in_sg, in_total, VNET_HLEN, size); vu_flush(vdev, vq, elem, elem_cnt); vu_queue_notify(vdev, vq); -- 2.53.0