public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH] tcp: Encode checksum computation flags in a single parameter
@ 2026-03-24 11:01 Laurent Vivier
  2026-03-24 23:56 ` David Gibson
  0 siblings, 1 reply; 2+ messages in thread
From: Laurent Vivier @ 2026-03-24 11:01 UTC (permalink / raw)
  To: passt-dev; +Cc: Laurent Vivier, David Gibson

tcp_fill_headers() takes a pointer to a previously computed IPv4 header
checksum to avoid recalculating it when the payload length doesn't
change, and a separate bool to skip TCP checksum computation.

Replace both parameters with a single uint32_t csum_flags that encodes:
- IP4_CSUM (bit 31): compute IPv4 header checksum from scratch
- TCP_CSUM (bit 30): compute TCP checksum
- IP4_CMASK (low 16 bits): cached IPv4 header checksum value

When IP4_CSUM is not set, the cached checksum is extracted from the low
16 bits.  This is cleaner than the pointer-based approach, and also
avoids a potential dangling pointer issue: a subsequent patch makes
tcp_fill_headers() access ip4h via with_header(), which scopes it to a
temporary variable, so a pointer to ip4h->check would become invalid
after the with_header() block.

Suggested-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 tcp.c          | 25 +++++++++++++------------
 tcp_buf.c      | 24 +++++++++++++-----------
 tcp_internal.h |  7 +++++--
 tcp_vu.c       | 28 +++++++++++++++++-----------
 4 files changed, 48 insertions(+), 36 deletions(-)

diff --git a/tcp.c b/tcp.c
index b14586249c4e..14c472226225 100644
--- a/tcp.c
+++ b/tcp.c
@@ -943,9 +943,10 @@ static void tcp_fill_header(struct tcphdr *th,
  * @ip6h:		Pointer to IPv6 header, or NULL
  * @th:			Pointer to TCP header
  * @payload:		TCP payload
- * @ip4_check:		IPv4 checksum, if already known
+ * @csum_flags:		TCP_CSUM if TCP checksum must be computed,
+ *                      IP4_CSUM if IPv4 checksum must be computed,
+ *                      otherwise IPv4 checksum is provided in IP4_CMASK
  * @seq:		Sequence number for this segment
- * @no_tcp_csum:	Do not set TCP checksum
  *
  * Return: frame length (including L2 headers)
  */
@@ -953,8 +954,7 @@ size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
 			struct ethhdr *eh,
 			struct iphdr *ip4h, struct ipv6hdr *ip6h,
 			struct tcphdr *th, struct iov_tail *payload,
-			const uint16_t *ip4_check, uint32_t seq,
-			bool no_tcp_csum)
+			uint32_t csum_flags, uint32_t seq)
 {
 	const struct flowside *tapside = TAPFLOW(conn);
 	size_t l4len = iov_tail_size(payload) + sizeof(*th);
@@ -974,13 +974,14 @@ size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
 		ip4h->saddr = src4->s_addr;
 		ip4h->daddr = dst4->s_addr;
 
-		if (ip4_check)
-			ip4h->check = *ip4_check;
-		else
+		if (csum_flags & IP4_CSUM) {
 			ip4h->check = csum_ip4_header(l3len, IPPROTO_TCP,
 						      *src4, *dst4);
+		} else {
+			ip4h->check = csum_flags & IP4_CMASK;
+		}
 
-		if (!no_tcp_csum) {
+		if (csum_flags & TCP_CSUM) {
 			psum = proto_ipv4_header_psum(l4len, IPPROTO_TCP,
 						      *src4, *dst4);
 		}
@@ -1000,7 +1001,7 @@ size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
 
 		ip6_set_flow_lbl(ip6h, conn->sock);
 
-		if (!no_tcp_csum) {
+		if (csum_flags & TCP_CSUM) {
 			psum = proto_ipv6_header_psum(l4len, IPPROTO_TCP,
 						      &ip6h->saddr,
 						      &ip6h->daddr);
@@ -1015,10 +1016,10 @@ size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
 
 	tcp_fill_header(th, conn, seq);
 
-	if (no_tcp_csum)
-		th->check = 0;
-	else
+	if (csum_flags & TCP_CSUM)
 		tcp_update_csum(psum, th, payload);
+	else
+		th->check = 0;
 
 	return MAX(l3len + sizeof(struct ethhdr), ETH_ZLEN);
 }
diff --git a/tcp_buf.c b/tcp_buf.c
index 41965b107567..9c982129bbcf 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -166,14 +166,15 @@ static void tcp_l2_buf_pad(struct iovec *iov)
  * @c:		Execution context
  * @conn:	Connection pointer
  * @iov:	Pointer to an array of iovec of TCP pre-cooked buffers
- * @check:	Checksum, if already known
+ * @csum_flags:	TCP_CSUM if TCP checksum must be computed,
+ * 		IP4_CSUM if IPv4 checksum must be computed,
+ * 		otherwise IPv4 checksum is provided in IP4_CMASK
  * @seq:	Sequence number for this segment
- * @no_tcp_csum: Do not set TCP checksum
  */
 static void tcp_l2_buf_fill_headers(const struct ctx *c,
 				    struct tcp_tap_conn *conn,
-				    struct iovec *iov, const uint16_t *check,
-				    uint32_t seq, bool no_tcp_csum)
+				    struct iovec *iov, uint32_t csum_flags,
+				    uint32_t seq)
 {
 	struct iov_tail tail = IOV_TAIL(&iov[TCP_IOV_PAYLOAD], 1, 0);
 	struct tcphdr th_storage, *th = IOV_REMOVE_HEADER(&tail, th_storage);
@@ -190,8 +191,8 @@ static void tcp_l2_buf_fill_headers(const struct ctx *c,
 	else
 		ip6h = iov[TCP_IOV_IP].iov_base;
 
-	l2len = tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &tail, check, seq,
-				 no_tcp_csum);
+	l2len = tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &tail, csum_flags,
+				 seq);
 	tap_hdr_update(taph, l2len);
 }
 
@@ -233,7 +234,7 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 	if (flags & KEEPALIVE)
 		seq--;
 
-	tcp_l2_buf_fill_headers(c, conn, iov, NULL, seq, false);
+	tcp_l2_buf_fill_headers(c, conn, iov, IP4_CSUM | TCP_CSUM, seq);
 
 	tcp_l2_buf_pad(iov);
 
@@ -270,7 +271,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 			    ssize_t dlen, int no_csum, uint32_t seq, bool push)
 {
 	struct tcp_payload_t *payload;
-	const uint16_t *check = NULL;
+	uint32_t check = IP4_CSUM;
 	struct iovec *iov;
 
 	conn->seq_to_tap = seq + dlen;
@@ -279,9 +280,10 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 	if (CONN_V4(conn)) {
 		if (no_csum) {
 			struct iovec *iov_prev = tcp_l2_iov[tcp_payload_used - 1];
-			struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base;
+			const struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base;
 
-			check = &iph->check;
+			/* overwrite IP4_CSUM flag as we set the checksum */
+			check = iph->check;
 		}
 		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[tcp_payload_used]);
 	} else if (CONN_V6(conn)) {
@@ -295,7 +297,7 @@ static void tcp_data_to_tap(const struct ctx *c, struct tcp_tap_conn *conn,
 	payload->th.ack = 1;
 	payload->th.psh = push;
 	iov[TCP_IOV_PAYLOAD].iov_len = dlen + sizeof(struct tcphdr);
-	tcp_l2_buf_fill_headers(c, conn, iov, check, seq, false);
+	tcp_l2_buf_fill_headers(c, conn, iov, TCP_CSUM | check, seq);
 
 	tcp_l2_buf_pad(iov);
 
diff --git a/tcp_internal.h b/tcp_internal.h
index d9408852571f..525fcbfa48fb 100644
--- a/tcp_internal.h
+++ b/tcp_internal.h
@@ -183,12 +183,15 @@ void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn);
 
 struct tcp_info_linux;
 
+#define IP4_CSUM	0x80000000
+#define IP4_CMASK	0x0000FFFF
+#define TCP_CSUM	0x40000000
+
 size_t tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
 			struct ethhdr *eh,
 			struct iphdr *ip4h, struct ipv6hdr *ip6h,
 			struct tcphdr *th, struct iov_tail *payload,
-			const uint16_t *ip4_check, uint32_t seq,
-			bool no_tcp_csum);
+			uint32_t csum_flags, uint32_t seq);
 
 int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
 			  bool force_seq, struct tcp_info_linux *tinfo);
diff --git a/tcp_vu.c b/tcp_vu.c
index dc0e17c0f03f..1afd582aaf95 100644
--- a/tcp_vu.c
+++ b/tcp_vu.c
@@ -138,7 +138,7 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
 		seq--;
 
 	tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload,
-			 NULL, seq, !*c->pcap);
+			 IP4_CSUM | (*c->pcap ? TCP_CSUM : 0), seq);
 
 	l2len = optlen + hdrlen - VNET_HLEN;
 	vu_pad(&flags_elem[0].in_sg[0], l2len);
@@ -280,13 +280,15 @@ static ssize_t tcp_vu_sock_recv(const struct ctx *c, struct vu_virtq *vq,
  * @conn:		Connection pointer
  * @iov:		Pointer to the array of IO vectors
  * @iov_cnt:		Number of entries in @iov
- * @check:		Checksum, if already known
- * @no_tcp_csum:	Do not set TCP checksum
+ * @csum_flags:		Pointer to checksum flags (input/output)
+ * 			TCP_CSUM if TCP checksum must be computed,
+ * 			IP4_CSUM if IPv4 checksum must be computed,
+ * 			otherwise IPv4 checksum is provided in IP4_CMASK
  * @push:		Set PSH flag, last segment in a batch
  */
 static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
 			   struct iovec *iov, size_t iov_cnt,
-			   const uint16_t **check, bool no_tcp_csum, bool push)
+			   uint32_t *csum_flags, bool push)
 {
 	const struct flowside *toside = TAPFLOW(conn);
 	bool v6 = !(inany_v4(&toside->eaddr) && inany_v4(&toside->oaddr));
@@ -330,9 +332,11 @@ static void tcp_vu_prepare(const struct ctx *c, struct tcp_tap_conn *conn,
 	th->psh = push;
 
 	tcp_fill_headers(c, conn, eh, ip4h, ip6h, th, &payload,
-			 *check, conn->seq_to_tap, no_tcp_csum);
+			 *csum_flags, conn->seq_to_tap);
+
+	/* Preserve TCP_CSUM, overwrite IP4_CSUM as we set the checksum */
 	if (ip4h)
-		*check = &ip4h->check;
+		*csum_flags = (*csum_flags & TCP_CSUM) | ip4h->check;
 }
 
 /**
@@ -348,12 +352,11 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
 	uint32_t wnd_scaled = conn->wnd_from_tap << conn->ws_from_tap;
 	struct vu_dev *vdev = c->vdev;
 	struct vu_virtq *vq = &vdev->vq[VHOST_USER_RX_QUEUE];
+	uint32_t already_sent, check;
 	ssize_t len, previous_dlen;
 	int i, iov_cnt, head_cnt;
 	size_t hdrlen, fillsize;
 	int v6 = CONN_V6(conn);
-	uint32_t already_sent;
-	const uint16_t *check;
 
 	if (!vu_queue_enabled(vq) || !vu_queue_started(vq)) {
 		debug("Got packet, but RX virtqueue not usable yet");
@@ -440,7 +443,10 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
 	 */
 
 	hdrlen = tcp_vu_hdrlen(v6);
-	for (i = 0, previous_dlen = -1, check = NULL; i < head_cnt; i++) {
+	check = IP4_CSUM;
+	if (*c->pcap)
+		check |= TCP_CSUM;
+	for (i = 0, previous_dlen = -1; i < head_cnt; i++) {
 		struct iovec *iov = &elem[head[i]].in_sg[0];
 		int buf_cnt = head[i + 1] - head[i];
 		size_t frame_size = iov_size(iov, buf_cnt);
@@ -455,10 +461,10 @@ int tcp_vu_data_from_sock(const struct ctx *c, struct tcp_tap_conn *conn)
 
 		/* The IPv4 header checksum varies only with dlen */
 		if (previous_dlen != dlen)
-			check = NULL;
+			check |= IP4_CSUM;
 		previous_dlen = dlen;
 
-		tcp_vu_prepare(c, conn, iov, buf_cnt, &check, !*c->pcap, push);
+		tcp_vu_prepare(c, conn, iov, buf_cnt, &check, push);
 
 		/* Pad first/single buffer only, it's at least ETH_ZLEN long */
 		l2len = dlen + hdrlen - VNET_HLEN;
-- 
2.53.0


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] tcp: Encode checksum computation flags in a single parameter
  2026-03-24 11:01 [PATCH] tcp: Encode checksum computation flags in a single parameter Laurent Vivier
@ 2026-03-24 23:56 ` David Gibson
  0 siblings, 0 replies; 2+ messages in thread
From: David Gibson @ 2026-03-24 23:56 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: passt-dev

[-- Attachment #1: Type: text/plain, Size: 1903 bytes --]

On Tue, Mar 24, 2026 at 12:01:23PM +0100, Laurent Vivier wrote:
> tcp_fill_headers() takes a pointer to a previously computed IPv4 header
> checksum to avoid recalculating it when the payload length doesn't
> change, and a separate bool to skip TCP checksum computation.
> 
> Replace both parameters with a single uint32_t csum_flags that encodes:
> - IP4_CSUM (bit 31): compute IPv4 header checksum from scratch
> - TCP_CSUM (bit 30): compute TCP checksum
> - IP4_CMASK (low 16 bits): cached IPv4 header checksum value
> 
> When IP4_CSUM is not set, the cached checksum is extracted from the low
> 16 bits.  This is cleaner than the pointer-based approach, and also
> avoids a potential dangling pointer issue: a subsequent patch makes
> tcp_fill_headers() access ip4h via with_header(), which scopes it to a
> temporary variable, so a pointer to ip4h->check would become invalid
> after the with_header() block.
> 
> Suggested-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>

When I suggested this, I'd missed the fact that @ip4_check and
@no_tcp_csum were talking about different checksums, oops.
Nonetheless you've made it work :).

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

[snip]
>  	if (ip4h)
> -		*check = &ip4h->check;
> +		*csum_flags = (*csum_flags & TCP_CSUM) | ip4h->check;

Now that I've realised my mistake, I'm pretty neutral on whether we
include the TCP checksum control in this parameter.  I still think
avoiding the pointer is a significant win - not referencing one
packet's buffer when we're working on another means less non-obvious
constraints in how we organise those buffers.

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-03-24 23:56 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-03-24 11:01 [PATCH] tcp: Encode checksum computation flags in a single parameter Laurent Vivier
2026-03-24 23:56 ` David Gibson

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).