public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Laurent Vivier <lvivier@redhat.com>
Cc: passt-dev@passt.top
Subject: Re: [PATCH v6 1/4] tcp: Encode checksum computation flags in a single parameter
Date: Mon, 11 May 2026 17:49:04 +1000	[thread overview]
Message-ID: <agGJ8A-9fpS64occ@zatzit> (raw)
In-Reply-To: <20260416161618.3826904-2-lvivier@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 11498 bytes --]

On Thu, Apr 16, 2026 at 06:16:15PM +0200, Laurent Vivier wrote:
> tcp_fill_headers() takes a pointer to a previously computed IPv4 header
> checksum to avoid recalculating it when the payload length doesn't
> change, and a separate bool to skip TCP checksum computation.
> 
> Replace both parameters with a single uint32_t csum_flags that encodes:
> - IP4_CSUM (bit 31): compute IPv4 header checksum from scratch
> - TCP_CSUM (bit 30): compute TCP checksum
> - IP4_CMASK (low 16 bits): cached IPv4 header checksum value
> 
> When IP4_CSUM is not set, the cached checksum is extracted from the low
> 16 bits.  This is cleaner than the pointer-based approach, and also
> avoids a potential dangling pointer issue: a subsequent patch makes
> tcp_fill_headers() access ip4h via with_header(), which scopes it to a
> temporary variable, so a pointer to ip4h->check would become invalid
> after the with_header() block.
> 
> Suggested-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  tcp.c          | 25 +++++++++++++------------
>  tcp_buf.c      | 23 ++++++++++++-----------
>  tcp_internal.h |  7 +++++--
>  tcp_vu.c       | 28 +++++++++++++++++-----------
>  4 files changed, 47 insertions(+), 36 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index 45bcc19375fe..de362290b034 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -946,9 +946,10 @@ static void tcp_fill_header(struct tcphdr *th,
>   * @th:			Pointer to TCP header
>   * @payload:		TCP payload
>   * @dlen:		TCP payload length
> - * @ip4_check:		IPv4 checksum, if already known
> + * @csum_flags:		TCP_CSUM if TCP checksum must be computed,
> + *                      IP4_CSUM if IPv4 checksum must be computed,
> + *                      otherwise IPv4 checksum is provided in IP4_CMASK
>   * @seq:		Sequence number for this segment
> - * @no_tcp_csum:	Do not set TCP checksum
>   *
>   * Return: frame length (including L2 headers)
>   */
> @@ -956,8 +957,7 @@ size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
>  			struct ethhdr *eh,
>  			struct iphdr *ip4h, struct ipv6hdr *ip6h,
>  			struct tcphdr *th, struct iov_tail *payload,
> -			size_t dlen, const uint16_t *ip4_check, uint32_t seq,
> -			bool no_tcp_csum)
> +			size_t dlen, uint32_t csum_flags, uint32_t seq)
>  {
>  	const struct flowside *tapside = TAPFLOW(conn);
>  	size_t l4len = dlen + sizeof(*th);
> @@ -977,13 +977,14 @@ size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
>  		ip4h->saddr = src4->s_addr;
>  		ip4h->daddr = dst4->s_addr;
>  
> -		if (ip4_check)
> -			ip4h->check = *ip4_check;
> -		else
> +		if (csum_flags & IP4_CSUM) {
>  			ip4h->check = csum_ip4_header(l3len, IPPROTO_TCP,
>  						      *src4, *dst4);
> +		} else {
> +			ip4h->check = csum_flags & IP4_CMASK;
> +		}
>  
> -		if (!no_tcp_csum) {
> +		if (csum_flags & TCP_CSUM) {
>  			psum = proto_ipv4_header_psum(l4len, IPPROTO_TCP,
>  						      *src4, *dst4);
>  		}
> @@ -1003,7 +1004,7 @@ size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
>  
>  		ip6_set_flow_lbl(ip6h, conn->sock);
>  
> -		if (!no_tcp_csum) {
> +		if (csum_flags & TCP_CSUM) {
>  			psum = proto_ipv6_header_psum(l4len, IPPROTO_TCP,
>  						      &ip6h->saddr,
>  						      &ip6h->daddr);
> @@ -1018,10 +1019,10 @@ size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
>  
>  	tcp_fill_header(th, conn, seq);
>  
> -	if (no_tcp_csum)
> -		th->check = 0;
> -	else
> +	if (csum_flags & TCP_CSUM)
>  		tcp_update_csum(psum, th, payload, dlen);
> +	else
> +		th->check = 0;
>  
>  	return MAX(l3len + sizeof(struct ethhdr), ETH_ZLEN);
>  }
> diff --git a/tcp_buf.c b/tcp_buf.c
> index 27151854033c..a27d9733616c 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -166,14 +166,15 @@ static void tcp_l2_buf_pad(struct iovec *iov)
>   * @c:		Execution context
>   * @conn:	Connection pointer
>   * @iov:	Pointer to an array of iovec of TCP pre-cooked buffers
> - * @check:	Checksum, if already known
> + * @csum_flags:	TCP_CSUM if TCP checksum must be computed,
> + * 		IP4_CSUM if IPv4 checksum must be computed,
> + * 		otherwise IPv4 checksum is provided in IP4_CMASK
>   * @seq:	Sequence number for this segment
> - * @no_tcp_csum: Do not set TCP checksum
>   */
>  static void tcp_l2_buf_fill_headers(const struct ctx *c,
>  				    struct tcp_tap_conn *conn,
> -				    struct iovec *iov, const uint16_t *check,
> -				    uint32_t seq, bool no_tcp_csum)
> +				    struct iovec *iov, uint32_t csum_flags,
> +				    uint32_t seq)
>  {
>  	struct iov_tail tail = IOV_TAIL(&iov[TCP_IOV_PAYLOAD], 1, 0);
>  	struct tcphdr th_storage, *th = IOV_REMOVE_HEADER(&tail, th_storage);
> @@ -191,8 +192,7 @@ static void tcp_l2_buf_fill_headers(const struct ctx *c,
>  		ip6h = iov[TCP_IOV_IP].iov_base;
>  
>  	l2len = tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &tail,
> -				 iov_tail_size(&tail), check, seq,
> -				 no_tcp_csum);
> +				 iov_tail_size(&tail), csum_flags, seq);
>  	tap_hdr_update(taph, l2len);
>  }
>  
> @@ -234,7 +234,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  	if (flags & KEEPALIVE)
>  		seq--;
>  
> -	tcp_l2_buf_fill_headers(c, conn, iov, NULL, seq, false);
> +	tcp_l2_buf_fill_headers(c, conn, iov, IP4_CSUM | TCP_CSUM, seq);
>  
>  	tcp_l2_buf_pad(iov);
>  
> @@ -271,7 +271,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
>  			    ssize_t dlen, int no_csum, uint32_t seq, bool push)
>  {
>  	struct tcp_payload_t *payload;
> -	const uint16_t *check = NULL;
> +	uint32_t check = IP4_CSUM;
>  	struct iovec *iov;
>  
>  	conn->seq_to_tap = seq + dlen;
> @@ -280,9 +280,10 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
>  	if (CONN_V4(conn)) {
>  		if (no_csum) {
>  			struct iovec *iov_prev = tcp_l2_iov[tcp_payload_used - 1];
> -			struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base;
> +			const struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base;
>  
> -			check = &iph->check;
> +			/* overwrite IP4_CSUM flag as we set the checksum */
> +			check = iph->check;
>  		}
>  		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
>  	} else if (CONN_V6(conn)) {
> @@ -296,7 +297,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
>  	payload->th.ack = 1;
>  	payload->th.psh = push;
>  	iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr);
> -	tcp_l2_buf_fill_headers(c, conn, iov, check, seq, false);
> +	tcp_l2_buf_fill_headers(c, conn, iov, TCP_CSUM | check, seq);
>  
>  	tcp_l2_buf_pad(iov);
>  
> diff --git a/tcp_internal.h b/tcp_internal.h
> index a0fa19f4ed11..40472c9973c8 100644
> --- a/tcp_internal.h
> +++ b/tcp_internal.h
> @@ -183,12 +183,15 @@ void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn);
>  
>  struct tcp_info_linux;
>  
> +#define IP4_CSUM	0x80000000
> +#define IP4_CMASK	0x0000FFFF
> +#define TCP_CSUM	0x40000000
> +
>  size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
>  			struct ethhdr *eh,
>  			struct iphdr *ip4h, struct ipv6hdr *ip6h,
>  			struct tcphdr *th, struct iov_tail *payload,
> -			size_t dlen, const uint16_t *ip4_check, uint32_t seq,
> -			bool no_tcp_csum);
> +			size_t dlen, uint32_t csum_flags, uint32_t seq);
>  
>  int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
>  			  bool force_seq, struct tcp_info_linux *tinfo);
> diff --git a/tcp_vu.c b/tcp_vu.c
> index 2dfe14485eee..3e399c20f0d7 100644
> --- a/tcp_vu.c
> +++ b/tcp_vu.c
> @@ -134,7 +134,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  		seq--;
>  
>  	tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload,
> -			 optlen, NULL, seq, !*c->pcap);
> +			 optlen, IP4_CSUM | (*c->pcap ? TCP_CSUM : 0), seq);
>  
>  	vu_pad(flags_elem[0].in_sg, 1, hdrlen + optlen);
>  	vu_flush(vdev, vq, flags_elem, 1, hdrlen + optlen);
> @@ -282,13 +282,15 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
>   * @iov:		Pointer to the array of IO vectors
>   * @iov_cnt:		Number of entries in @iov
>   * @dlen:		Data length
> - * @check:		Checksum, if already known
> - * @no_tcp_csum:	Do not set TCP checksum
> + * @csum_flags:		Pointer to checksum flags (input/output)
> + * 			TCP_CSUM if TCP checksum must be computed,
> + * 			IP4_CSUM if IPv4 checksum must be computed,
> + * 			otherwise IPv4 checksum is provided in IP4_CMASK
>   * @push:		Set PSH flag, last segment in a batch
>   */
>  static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
>  			   struct iovec *iov, size_t iov_cnt, size_t dlen,
> -			   const uint16_t **check, bool no_tcp_csum, bool push)
> +			   uint32_t *csum_flags, bool push)
>  {
>  	const struct flowside *toside = TAPFLOW(conn);
>  	bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
> @@ -332,9 +334,11 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
>  	th->psh = push;
>  
>  	tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload, dlen,
> -			 *check, conn->seq_to_tap, no_tcp_csum);
> +			 *csum_flags, conn->seq_to_tap);
> +
> +	/* Preserve TCP_CSUM, overwrite IP4_CSUM as we set the checksum */
>  	if (ip4h)
> -		*check = &ip4h->check;
> +		*csum_flags = (*csum_flags & TCP_CSUM) | ip4h->check;
>  }
>  
>  /**
> @@ -350,12 +354,11 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
>  	uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
>  	struct vu_dev *vdev = c->vdev;
>  	struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
> +	uint32_t already_sent, check;
>  	ssize_t len, previous_dlen;
>  	int i, iov_cnt, head_cnt;
>  	size_t hdrlen, fillsize;
>  	int v6 = CONN_V6(conn);
> -	uint32_t already_sent;
> -	const uint16_t *check;
>  
>  	if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
>  		debug("Got packet, but RX virtqueue not usable yet");
> @@ -442,7 +445,10 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
>  	 */
>  
>  	hdrlen = tcp_vu_hdrlen(v6);
> -	for (i = 0, previous_dlen = -1, check = NULL; i < head_cnt; i++) {
> +	check = IP4_CSUM;
> +	if (*c->pcap)
> +		check |= TCP_CSUM;
> +	for (i = 0, previous_dlen = -1; i < head_cnt; i++) {
>  		struct iovec *iov = &elem[head[i]].in_sg[0];
>  		int buf_cnt = head[i + 1] - head[i];
>  		size_t frame_size = iov_size(iov, buf_cnt);
> @@ -458,10 +464,10 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
>  
>  		/* The IPv4 header checksum varies only with dlen */
>  		if (previous_dlen != dlen)
> -			check = NULL;
> +			check |= IP4_CSUM;
>  		previous_dlen = dlen;
>  
> -		tcp_vu_prepare(c, conn, iov, buf_cnt, dlen, &check, !*c->pcap, push);
> +		tcp_vu_prepare(c, conn, iov, buf_cnt, dlen, &check, push);
>  
>  		vu_pad(elem[head[i]].in_sg, buf_cnt, dlen + hdrlen);
>  		vu_flush(vdev, vq, &elem[head[i]], buf_cnt, dlen + hdrlen);
> -- 
> 2.53.0
> 

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  parent reply	other threads:[~2026-05-11  7:49 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-16 16:16 [PATCH v6 0/4] vhost-user,tcp: Handle multiple iovec entries per virtqueue element Laurent Vivier
2026-04-16 16:16 ` [PATCH v6 1/4] tcp: Encode checksum computation flags in a single parameter Laurent Vivier
2026-05-09 23:45   ` Jon Maloy
2026-05-11  7:49   ` David Gibson [this message]
2026-04-16 16:16 ` [PATCH v6 2/4] tcp_vu: Build headers on the stack and write them into the iovec Laurent Vivier
2026-05-09 23:57   ` Jon Maloy
2026-05-11  7:54   ` David Gibson
2026-04-16 16:16 ` [PATCH v6 3/4] tcp_vu: Support multibuffer frames in tcp_vu_sock_recv() Laurent Vivier
2026-04-17 14:56   ` Laurent Vivier
2026-05-10  1:33   ` Jon Maloy
2026-05-11  8:24   ` David Gibson
2026-04-16 16:16 ` [PATCH v6 4/4] tcp_vu: Support multibuffer frames in tcp_vu_send_flag() Laurent Vivier
2026-05-10  2:03   ` Jon Maloy
2026-05-11 10:52   ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=agGJ8A-9fpS64occ@zatzit \
    --to=david@gibson.dropbear.id.au \
    --cc=lvivier@redhat.com \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).