public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 4/8] tcp, udp: Don't include destination address in partially precomputed csums
Date: Fri, 28 Jul 2023 19:48:27 +1000	[thread overview]
Message-ID: <20230728094831.4097571-5-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20230728094831.4097571-1-david@gibson.dropbear.id.au>

We partially prepopulate IP and TCP header structures including, amongst
other things the destination address, which for IPv4 is always the known
address of the guest/namespace.  We partially precompute both the IPv4
header checksum and the TCP checksum based on this.

In future we're going to want more flexibility with controlling the
destination for IPv4 (as we already do for IPv6), so this precomputed value
gets in the way.  Therefore remove the IPv4 destination from the
precomputed checksum and fold it into the checksum update when we actually
send a packet.

Doing this means we no longer need to recompute those partial sums when
the destination address changes ({tcp,udp}_update_l2_buf()) and instead
the computation can be moved to compile time.  This means while we perform
slightly more computations on each packet, we slightly reduce the amount of
memory we need to access.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c  | 61 ++++++++++++++++++++--------------------------------------
 udp.c  | 14 +++-----------
 util.h |  4 +++-
 3 files changed, 27 insertions(+), 52 deletions(-)

diff --git a/tcp.c b/tcp.c
index c1bfc4f..c0bffb3 100644
--- a/tcp.c
+++ b/tcp.c
@@ -323,10 +323,8 @@
 #define MSS_DEFAULT			536
 
 struct tcp4_l2_head {	/* For MSS4 macro: keep in sync with tcp4_l2_buf_t */
-	uint32_t psum;
-	uint32_t tsum;
 #ifdef __AVX2__
-	uint8_t pad[18];
+	uint8_t pad[26];
 #else
 	uint8_t pad[2];
 #endif
@@ -443,8 +441,6 @@ static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
 
 /**
  * tcp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
- * @psum:	Partial IP header checksum (excluding tot_len and saddr)
- * @tsum:	Partial TCP header checksum (excluding length and saddr)
  * @pad:	Align TCP header to 32 bytes, for AVX2 checksum calculation only
  * @taph:	Tap-level headers (partially pre-filled)
  * @iph:	Pre-filled IP header (except for tot_len and saddr)
@@ -452,17 +448,15 @@ static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
  * @data:	Storage for TCP payload
  */
 static struct tcp4_l2_buf_t {
-	uint32_t psum;		/* 0 */
-	uint32_t tsum;		/* 4 */
 #ifdef __AVX2__
-	uint8_t pad[18];	/* 8, align th to 32 bytes */
+	uint8_t pad[26];	/* 0, align th to 32 bytes */
 #else
-	uint8_t pad[2];		/*	align iph to 4 bytes	8 */
+	uint8_t pad[2];		/*	align iph to 4 bytes	0 */
 #endif
-	struct tap_hdr taph;	/* 26				10 */
-	struct iphdr iph;	/* 44				28 */
-	struct tcphdr th;	/* 64				48 */
-	uint8_t data[MSS4];	/* 84				68 */
+	struct tap_hdr taph;	/* 26				2 */
+	struct iphdr iph;	/* 44				20 */
+	struct tcphdr th;	/* 64				40 */
+	uint8_t data[MSS4];	/* 84				60 */
 				/* 65536			65532 */
 #ifdef __AVX2__
 } __attribute__ ((packed, aligned(32)))
@@ -517,8 +511,6 @@ static struct iovec	tcp_iov			[UIO_MAXIOV];
 
 /**
  * tcp4_l2_flags_buf_t - IPv4 packet buffers for segments without data (flags)
- * @psum:	Partial IP header checksum (excluding tot_len and saddr)
- * @tsum:	Partial TCP header checksum (excluding length and saddr)
  * @pad:	Align TCP header to 32 bytes, for AVX2 checksum calculation only
  * @taph:	Tap-level headers (partially pre-filled)
  * @iph:	Pre-filled IP header (except for tot_len and saddr)
@@ -526,16 +518,14 @@ static struct iovec	tcp_iov			[UIO_MAXIOV];
  * @opts:	Headroom for TCP options
  */
 static struct tcp4_l2_flags_buf_t {
-	uint32_t psum;		/* 0 */
-	uint32_t tsum;		/* 4 */
 #ifdef __AVX2__
-	uint8_t pad[18];	/* 8, align th to 32 bytes */
+	uint8_t pad[26];	/* 0, align th to 32 bytes */
 #else
-	uint8_t pad[2];		/*	align iph to 4 bytes	8 */
+	uint8_t pad[2];		/*	align iph to 4 bytes	0 */
 #endif
-	struct tap_hdr taph;	/* 26				10 */
-	struct iphdr iph;	/* 44				28 */
-	struct tcphdr th;	/* 64				48 */
+	struct tap_hdr taph;	/* 26				2 */
+	struct iphdr iph;	/* 44				20 */
+	struct tcphdr th;	/* 64				40 */
 	char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
 #ifdef __AVX2__
 } __attribute__ ((packed, aligned(32)))
@@ -955,11 +945,13 @@ void tcp_sock_set_bufsize(const struct ctx *c, int s)
  */
 static void tcp_update_check_ip4(struct tcp4_l2_buf_t *buf)
 {
-	uint32_t sum = buf->psum;
+	uint32_t sum = L2_BUF_IP4_PSUM(IPPROTO_TCP);
 
 	sum += buf->iph.tot_len;
 	sum += (buf->iph.saddr >> 16) & 0xffff;
 	sum += buf->iph.saddr & 0xffff;
+	sum += (buf->iph.daddr >> 16) & 0xffff;
+	sum += buf->iph.daddr & 0xffff;
 
 	buf->iph.check = (uint16_t)~csum_fold(sum);
 }
@@ -971,10 +963,12 @@ static void tcp_update_check_ip4(struct tcp4_l2_buf_t *buf)
 static void tcp_update_check_tcp4(struct tcp4_l2_buf_t *buf)
 {
 	uint16_t tlen = ntohs(buf->iph.tot_len) - 20;
-	uint32_t sum = buf->tsum;
+	uint32_t sum = htons(IPPROTO_TCP);
 
 	sum += (buf->iph.saddr >> 16) & 0xffff;
 	sum += buf->iph.saddr & 0xffff;
+	sum += (buf->iph.daddr >> 16) & 0xffff;
+	sum += buf->iph.daddr & 0xffff;
 	sum += htons(ntohs(buf->iph.tot_len) - 20);
 
 	buf->th.check = 0;
@@ -1025,20 +1019,6 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
 
 		if (ip_da) {
 			b4f->iph.daddr = b4->iph.daddr = ip_da->s_addr;
-			if (!i) {
-				b4f->iph.saddr = b4->iph.saddr = 0;
-				b4f->iph.tot_len = b4->iph.tot_len = 0;
-				b4f->iph.check = b4->iph.check = 0;
-				b4f->psum = b4->psum = sum_16b(&b4->iph, 20);
-
-				b4->tsum = ((ip_da->s_addr >> 16) & 0xffff) +
-					    (ip_da->s_addr & 0xffff) +
-					    htons(IPPROTO_TCP);
-				b4f->tsum = b4->tsum;
-			} else {
-				b4f->psum = b4->psum = tcp4_l2_buf[0].psum;
-				b4f->tsum = b4->tsum = tcp4_l2_buf[0].tsum;
-			}
 		}
 	}
 }
@@ -1047,15 +1027,16 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
  * tcp_sock4_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets
  * @c:		Execution context
  */
-static void tcp_sock4_iov_init(const struct ctx *c)
+static void tcp_sock4_iov_init(struct ctx *c)
 {
+	struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP);
 	struct iovec *iov;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(tcp4_l2_buf); i++) {
 		tcp4_l2_buf[i] = (struct tcp4_l2_buf_t) {
 			.taph = TAP_HDR_INIT(ETH_P_IP),
-			.iph = L2_BUF_IP4_INIT(IPPROTO_TCP),
+			.iph = iph,
 			.th = { .doff = sizeof(struct tcphdr) / 4, .ack = 1 }
 		};
 	}
diff --git a/udp.c b/udp.c
index 47b4a61..b82aea5 100644
--- a/udp.c
+++ b/udp.c
@@ -168,7 +168,6 @@ static uint8_t udp_act[IP_VERSIONS][UDP_ACT_TYPE_MAX][DIV_ROUND_UP(NUM_PORTS, 8)
 /**
  * udp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
  * @s_in:	Source socket address, filled in by recvmmsg()
- * @psum:	Partial IP header checksum (excluding tot_len and saddr)
  * @taph:	Tap-level headers (partially pre-filled)
  * @iph:	Pre-filled IP header (except for tot_len and saddr)
  * @uh:		Headroom for UDP header
@@ -176,7 +175,6 @@ static uint8_t udp_act[IP_VERSIONS][UDP_ACT_TYPE_MAX][DIV_ROUND_UP(NUM_PORTS, 8)
  */
 static struct udp4_l2_buf_t {
 	struct sockaddr_in s_in;
-	uint32_t psum;
 
 	struct tap_hdr taph;
 	struct iphdr iph;
@@ -263,11 +261,13 @@ static void udp_invert_portmap(struct udp_port_fwd *fwd)
  */
 static void udp_update_check4(struct udp4_l2_buf_t *buf)
 {
-	uint32_t sum = buf->psum;
+	uint32_t sum = L2_BUF_IP4_PSUM(IPPROTO_UDP);
 
 	sum += buf->iph.tot_len;
 	sum += (buf->iph.saddr >> 16) & 0xffff;
 	sum += buf->iph.saddr & 0xffff;
+	sum += (buf->iph.daddr >> 16) & 0xffff;
+	sum += buf->iph.daddr & 0xffff;
 
 	buf->iph.check = (uint16_t)~csum_fold(sum);
 }
@@ -292,14 +292,6 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
 
 		if (ip_da) {
 			b4->iph.daddr = ip_da->s_addr;
-			if (!i) {
-				b4->iph.saddr = 0;
-				b4->iph.tot_len = 0;
-				b4->iph.check = 0;
-				b4->psum = sum_16b(&b4->iph, 20);
-			} else {
-				b4->psum = udp4_l2_buf[0].psum;
-			}
 		}
 	}
 }
diff --git a/util.h b/util.h
index 26892aa..a1c3829 100644
--- a/util.h
+++ b/util.h
@@ -141,11 +141,13 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
 		.tot_len	= 0,					\
 		.id		= 0,					\
 		.frag_off	= 0,					\
-		.ttl		= 255,					\
+		.ttl		= 0xff,					\
 		.protocol	= (proto),				\
 		.saddr		= 0,					\
 		.daddr		= 0,					\
 	}
+#define L2_BUF_IP4_PSUM(proto)	((uint32_t)htons_constant(0x4500) +	\
+				 (uint32_t)htons_constant(0xff00 | (proto)))
 
 #define L2_BUF_IP6_INIT(proto)						\
 	{								\
-- 
@@ -141,11 +141,13 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
 		.tot_len	= 0,					\
 		.id		= 0,					\
 		.frag_off	= 0,					\
-		.ttl		= 255,					\
+		.ttl		= 0xff,					\
 		.protocol	= (proto),				\
 		.saddr		= 0,					\
 		.daddr		= 0,					\
 	}
+#define L2_BUF_IP4_PSUM(proto)	((uint32_t)htons_constant(0x4500) +	\
+				 (uint32_t)htons_constant(0xff00 | (proto)))
 
 #define L2_BUF_IP6_INIT(proto)						\
 	{								\
-- 
2.41.0


  parent reply	other threads:[~2023-07-28  9:48 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-28  9:48 [PATCH 0/8] RFC: Generalize flow tracking, part 1 David Gibson
2023-07-28  9:48 ` [PATCH 1/8] tap: Don't clobber source address in tap6_handler() David Gibson
2023-07-28  9:48 ` [PATCH 2/8] tap: Pass source address to protocol handler functions David Gibson
2023-07-28  9:48 ` [PATCH 3/8] tcp: More precise terms for addresses and ports David Gibson
2023-07-28  9:48 ` David Gibson [this message]
2023-07-28  9:48 ` [PATCH 5/8] tcp, udp: Don't pre-fill IPv4 destination address in headers David Gibson
2023-07-28  9:48 ` [PATCH 6/8] tcp: Track guest-side correspondent address David Gibson
2023-07-28  9:48 ` [PATCH 7/8] tcp, flow: Introduce struct demiflow David Gibson
2023-07-28  9:48 ` [PATCH 8/8] tcp, flow: Perform TCP hash calculations based on demiflow structure David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230728094831.4097571-5-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).