public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Laurent Vivier <lvivier@redhat.com>
To: Stefano Brivio <sbrivio@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH 07/10] pcap: Pass explicit L2 length to pcap_iov()
Date: Fri, 3 Apr 2026 12:19:01 +0200	[thread overview]
Message-ID: <7c3a617b-3c85-42e4-a88b-1f08f1b9ab1c@redhat.com> (raw)
In-Reply-To: <20260403082032.6470b99f@elisabeth>

On 4/3/26 08:20, Stefano Brivio wrote:
> On Wed,  1 Apr 2026 21:18:23 +0200
> Laurent Vivier <lvivier@redhat.com> wrote:
> 
>> With vhost-user multibuffer frames, the iov can be larger than the
>> actual L2 frame. The previous approach of computing L2 length as
>> iov_size() - offset would overcount and write extra bytes into the
>> pcap file.
>>
>> Pass the L2 frame length explicitly to pcap_frame() and pcap_iov(),
>> and write exactly that many bytes instead of the full iov remainder.
>>
>> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
>> ---
>>   pcap.c      | 37 ++++++++++++++++++++++++++++---------
>>   pcap.h      |  2 +-
>>   tap.c       |  2 +-
>>   tcp_vu.c    |  9 ++++++---
>>   udp_vu.c    |  4 +++-
>>   vu_common.c |  2 +-
>>   6 files changed, 40 insertions(+), 16 deletions(-)
>>
>> diff --git a/pcap.c b/pcap.c
>> index a026f17e7974..dfe1c61add9a 100644
>> --- a/pcap.c
>> +++ b/pcap.c
>> @@ -52,22 +52,38 @@ struct pcap_pkthdr {
>>    * @iov:	IO vector containing frame (with L2 headers and tap headers)
>>    * @iovcnt:	Number of buffers (@iov entries) in frame
>>    * @offset:	Byte offset of the L2 headers within @iov
>> + * @l2len:	Length of L2 frame data to capture
>>    * @now:	Timestamp
>>    */
>>   static void pcap_frame(const struct iovec *iov, size_t iovcnt,
>> -		       size_t offset, const struct timespec *now)
>> +		       size_t offset, size_t l2len, const struct timespec *now)
>>   {
>> -	size_t l2len = iov_size(iov, iovcnt) - offset;
>>   	struct pcap_pkthdr h = {
>>   		.tv_sec = now->tv_sec,
>>   		.tv_usec = DIV_ROUND_CLOSEST(now->tv_nsec, 1000),
>>   		.caplen = l2len,
>>   		.len = l2len
>>   	};
>> +	size_t i;
>>   
>> -	if (write_all_buf(pcap_fd, &h, sizeof(h)) < 0 ||
>> -	    write_remainder(pcap_fd, iov, iovcnt, offset) < 0)
>> -		debug_perror("Cannot log packet, length %zu", l2len);
>> +	if (write_all_buf(pcap_fd, &h, sizeof(h)) < 0) {
>> +		debug_perror("Cannot log packet, packet header error");
>> +		return;
>> +	}
>> +
>> +	for (i = iov_skip_bytes(iov, iovcnt, offset, &offset);
>> +	     i < iovcnt && l2len; i++) {
>> +		size_t n = MIN(l2len, iov[i].iov_len - offset);
>> +
>> +		if (write_all_buf(pcap_fd, (char *)iov[i].iov_base + offset,
>> +				  n) < 0) {
> 
> This could be written as:
> 
> 		size_t l = MIN(l2len, iov[i].iov_len - offset), n;
> 
> 		n = write_all_buf(pcap_fd, (char *)iov[i].iov_base + offset, l);
> 		if (n < 0) {
> 
> same number of lines, but slightly clearer I think.
> 
>> +			debug_perror("Cannot log packet");
> 
> Should we add back the ", length %zu" part, perhaps, using this 'l'
> variable? I think it makes a difference if we see we're failing to
> capture a reasonably sized 64k packet (disk full or something) compared
> to a 100G-length buffer coming from some calculation gone wrong.
> 
>> +			return;
>> +		}
>> +
>> +		offset = 0;
>> +		l2len -= n;
>> +	}
>>   }
>>   
>>   /**
>> @@ -87,7 +103,7 @@ void pcap(const char *pkt, size_t l2len)
>>   	if (clock_gettime(CLOCK_REALTIME, &now))
>>   		err_perror("Failed to get CLOCK_REALTIME time");
>>   
>> -	pcap_frame(&iov, 1, 0, &now);
>> +	pcap_frame(&iov, 1, 0, l2len, &now);
>>   }
>>   
>>   /**
>> @@ -110,7 +126,9 @@ void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n,
>>   		err_perror("Failed to get CLOCK_REALTIME time");
>>   
>>   	for (i = 0; i < n; i++)
>> -		pcap_frame(iov + i * frame_parts, frame_parts, offset, &now);
>> +		pcap_frame(iov + i * frame_parts, frame_parts, offset,
> 
> Nit: curly brackets for coding style.
> 
>> +			   iov_size(iov + i * frame_parts, frame_parts) - offset,
>> +			   &now);
>>   }
>>   
>>   /**
>> @@ -120,8 +138,9 @@ void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n,
>>    *		containing packet data to write, including L2 header
>>    * @iovcnt:	Number of buffers (@iov entries)
>>    * @offset:	Offset of the L2 frame within the full data length
>> + * @l2len:	Length of L2 frame data to capture
>>    */
>> -void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset)
>> +void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset, size_t l2len)
>>   {
>>   	struct timespec now = { 0 };
>>   
>> @@ -131,7 +150,7 @@ void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset)
>>   	if (clock_gettime(CLOCK_REALTIME, &now))
>>   		err_perror("Failed to get CLOCK_REALTIME time");
>>   
>> -	pcap_frame(iov, iovcnt, offset, &now);
>> +	pcap_frame(iov, iovcnt, offset, l2len, &now);
>>   }
>>   
>>   /**
>> diff --git a/pcap.h b/pcap.h
>> index dface5df4ee6..c171257cbd73 100644
>> --- a/pcap.h
>> +++ b/pcap.h
>> @@ -13,7 +13,7 @@ extern int pcap_fd;
>>   void pcap(const char *pkt, size_t l2len);
>>   void pcap_multiple(const struct iovec *iov, size_t frame_parts, unsigned int n,
>>   		   size_t offset);
>> -void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset);
>> +void pcap_iov(const struct iovec *iov, size_t iovcnt, size_t offset, size_t l2len);
>>   void pcap_init(struct ctx *c);
>>   
>>   #endif /* PCAP_H */
>> diff --git a/tap.c b/tap.c
>> index b61199dd699d..007c91864b4e 100644
>> --- a/tap.c
>> +++ b/tap.c
>> @@ -1105,7 +1105,7 @@ void tap_add_packet(struct ctx *c, struct iov_tail *data,
>>   	struct ethhdr eh_storage;
>>   	const struct ethhdr *eh;
>>   
>> -	pcap_iov(data->iov, data->cnt, data->off);
>> +	pcap_iov(data->iov, data->cnt, data->off, iov_tail_size(data));
> 
> This leads to a warning from Coverity Scan:
> 
> /home/sbrivio/passt/tap.c:1108:2:
>    Type: Evaluation order violation (EVALUATION_ORDER)
> 
> /home/sbrivio/passt/tap.c:1108:2:
>    read_write_order: In argument #4 of "pcap_iov(data->iov, data->cnt, data->off, iov_tail_size(data))", a call is made to "iov_tail_size(data)".  In argument #1 of this function, the object "data->off" is modified.  This object is also used in "data->off", the argument #3 of the outer function call.  The order in which these arguments are evaluated is not specified, and will vary between platforms.
> 
> I'm not sure if we can ever reach this point with a non-pruned iov_tail
> so that iov_tail_prune() will actually modify 'off', but just to be
> sure I guess we should call iov_tail_size() first, assign its result to
> a temporary variable, and use that.

In fact, iov_size() will not change after pruning. The couple (cnt, off) will always point 
to the same offset (but cnt can be higher and off lower).

>>   
>>   	eh = IOV_PEEK_HEADER(data, eh_storage);
>>   	if (!eh)
>> diff --git a/tcp_vu.c b/tcp_vu.c
>> index 0cd01190d612..329fa969fca1 100644
>> --- a/tcp_vu.c
>> +++ b/tcp_vu.c
>> @@ -143,7 +143,8 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
>>   	vu_flush(vdev, vq, flags_elem, 1);
>>   
>>   	if (*c->pcap)
>> -		pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN);
>> +		pcap_iov(&flags_elem[0].in_sg[0], 1, VNET_HLEN,
>> +			 hdrlen + optlen - VNET_HLEN);
> 
> Same here, curly brackets.
> 
>>   
>>   	if (flags & DUP_ACK) {
>>   		elem_cnt = vu_collect(vdev, vq, &flags_elem[1], 1,
>> @@ -159,7 +160,8 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
>>   			vu_flush(vdev, vq, &flags_elem[1], 1);
>>   
>>   			if (*c->pcap)
>> -				pcap_iov(&flags_elem[1].in_sg[0], 1, VNET_HLEN);
>> +				pcap_iov(&flags_elem[1].in_sg[0], 1, VNET_HLEN,
>> +					 hdrlen + optlen - VNET_HLEN);
> 
> And here.
> 
>>   		}
>>   	}
>>   	vu_queue_notify(vdev, vq);
>> @@ -464,7 +466,8 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
>>   		vu_flush(vdev, vq, &elem[head[i]], buf_cnt);
>>   
>>   		if (*c->pcap)
>> -			pcap_iov(iov, buf_cnt, VNET_HLEN);
>> +			pcap_iov(iov, buf_cnt, VNET_HLEN,
>> +				 dlen + hdrlen - VNET_HLEN);
> 
> And here too.
> 
>>   
>>   		conn->seq_to_tap += dlen;
>>   	}
>> diff --git a/udp_vu.c b/udp_vu.c
>> index 5421a7d71a19..81491afa7e6a 100644
>> --- a/udp_vu.c
>> +++ b/udp_vu.c
>> @@ -185,6 +185,7 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
>>   	static struct iovec iov_vu[VIRTQUEUE_MAX_SIZE];
>>   	struct vu_dev *vdev = c->vdev;
>>   	struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
>> +	size_t hdrlen = udp_vu_hdrlen(v6);
>>   	int i;
>>   
>>   	assert(!c->no_udp);
>> @@ -230,7 +231,8 @@ void udp_vu_sock_to_tap(const struct ctx *c, int s, int n, flow_sidx_t tosidx)
>>   			size_t l4len = udp_vu_prepare(c, iov_vu, toside, dlen);
>>   			if (*c->pcap) {
>>   				udp_vu_csum(toside, iov_vu, iov_cnt, l4len);
>> -				pcap_iov(iov_vu, iov_cnt, VNET_HLEN);
>> +				pcap_iov(iov_vu, iov_cnt, VNET_HLEN,
>> +					 hdrlen + dlen - VNET_HLEN);
> 
> And here as well. Or maybe, in all those cases, we could have a 'size'
> temporary variable which should make things a bit more obvious.

I agree

Thanks,
Laurent


  reply	other threads:[~2026-04-03 10:19 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-01 19:18 [PATCH 00/10] vhost-user: Preparatory series for multiple iovec entries per virtqueue element Laurent Vivier
2026-04-01 19:18 ` [PATCH 01/10] iov: Introduce iov_memset() Laurent Vivier
2026-04-03 12:35   ` David Gibson
2026-04-01 19:18 ` [PATCH 02/10] iov: Add iov_memcopy() to copy data between iovec arrays Laurent Vivier
2026-04-03  6:20   ` Stefano Brivio
2026-04-01 19:18 ` [PATCH 03/10] vu_common: Move vnethdr setup into vu_flush() Laurent Vivier
2026-04-03  6:20   ` Stefano Brivio
2026-04-03 10:16     ` Laurent Vivier
2026-04-01 19:18 ` [PATCH 04/10] udp_vu: Move virtqueue management from udp_vu_sock_recv() to its caller Laurent Vivier
2026-04-01 19:18 ` [PATCH 05/10] udp_vu: Pass iov explicitly to helpers instead of using file-scoped array Laurent Vivier
2026-04-01 19:18 ` [PATCH 06/10] checksum: Pass explicit L4 length to checksum functions Laurent Vivier
2026-04-01 19:18 ` [PATCH 07/10] pcap: Pass explicit L2 length to pcap_iov() Laurent Vivier
2026-04-03  6:20   ` Stefano Brivio
2026-04-03 10:19     ` Laurent Vivier [this message]
2026-04-01 19:18 ` [PATCH 08/10] vu_common: Pass explicit frame length to vu_flush() Laurent Vivier
2026-04-03  6:20   ` Stefano Brivio
2026-04-01 19:18 ` [PATCH 09/10] tcp: Pass explicit data length to tcp_fill_headers() Laurent Vivier
2026-04-01 19:18 ` [PATCH 10/10] vhost-user: Centralise Ethernet frame padding in vu_collect() and vu_pad() Laurent Vivier
2026-04-03  6:20   ` Stefano Brivio
2026-04-03 10:25     ` Laurent Vivier

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7c3a617b-3c85-42e4-a88b-1f08f1b9ab1c@redhat.com \
    --to=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).