From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 4/8] tcp, udp: Don't include destination address in partially precomputed csums
Date: Fri, 28 Jul 2023 19:48:27 +1000 [thread overview]
Message-ID: <20230728094831.4097571-5-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20230728094831.4097571-1-david@gibson.dropbear.id.au>
We partially prepopulate IP and TCP header structures including, amongst
other things the destination address, which for IPv4 is always the known
address of the guest/namespace. We partially precompute both the IPv4
header checksum and the TCP checksum based on this.
In future we're going to want more flexibility with controlling the
destination for IPv4 (as we already do for IPv6), so this precomputed value
gets in the way. Therefore remove the IPv4 destination from the
precomputed checksum and fold it into the checksum update when we actually
send a packet.
Doing this means we no longer need to recompute those partial sums when
the destination address changes ({tcp,udp}_update_l2_buf()) and instead
the computation can be moved to compile time. This means while we perform
slightly more computations on each packet, we slightly reduce the amount of
memory we need to access.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
tcp.c | 61 ++++++++++++++++++++--------------------------------------
udp.c | 14 +++-----------
util.h | 4 +++-
3 files changed, 27 insertions(+), 52 deletions(-)
diff --git a/tcp.c b/tcp.c
index c1bfc4f..c0bffb3 100644
--- a/tcp.c
+++ b/tcp.c
@@ -323,10 +323,8 @@
#define MSS_DEFAULT 536
struct tcp4_l2_head { /* For MSS4 macro: keep in sync with tcp4_l2_buf_t */
- uint32_t psum;
- uint32_t tsum;
#ifdef __AVX2__
- uint8_t pad[18];
+ uint8_t pad[26];
#else
uint8_t pad[2];
#endif
@@ -443,8 +441,6 @@ static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
/**
* tcp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
- * @psum: Partial IP header checksum (excluding tot_len and saddr)
- * @tsum: Partial TCP header checksum (excluding length and saddr)
* @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only
* @taph: Tap-level headers (partially pre-filled)
* @iph: Pre-filled IP header (except for tot_len and saddr)
@@ -452,17 +448,15 @@ static union inany_addr low_rtt_dst[LOW_RTT_TABLE_SIZE];
* @data: Storage for TCP payload
*/
static struct tcp4_l2_buf_t {
- uint32_t psum; /* 0 */
- uint32_t tsum; /* 4 */
#ifdef __AVX2__
- uint8_t pad[18]; /* 8, align th to 32 bytes */
+ uint8_t pad[26]; /* 0, align th to 32 bytes */
#else
- uint8_t pad[2]; /* align iph to 4 bytes 8 */
+ uint8_t pad[2]; /* align iph to 4 bytes 0 */
#endif
- struct tap_hdr taph; /* 26 10 */
- struct iphdr iph; /* 44 28 */
- struct tcphdr th; /* 64 48 */
- uint8_t data[MSS4]; /* 84 68 */
+ struct tap_hdr taph; /* 26 2 */
+ struct iphdr iph; /* 44 20 */
+ struct tcphdr th; /* 64 40 */
+ uint8_t data[MSS4]; /* 84 60 */
/* 65536 65532 */
#ifdef __AVX2__
} __attribute__ ((packed, aligned(32)))
@@ -517,8 +511,6 @@ static struct iovec tcp_iov [UIO_MAXIOV];
/**
* tcp4_l2_flags_buf_t - IPv4 packet buffers for segments without data (flags)
- * @psum: Partial IP header checksum (excluding tot_len and saddr)
- * @tsum: Partial TCP header checksum (excluding length and saddr)
* @pad: Align TCP header to 32 bytes, for AVX2 checksum calculation only
* @taph: Tap-level headers (partially pre-filled)
* @iph: Pre-filled IP header (except for tot_len and saddr)
@@ -526,16 +518,14 @@ static struct iovec tcp_iov [UIO_MAXIOV];
* @opts: Headroom for TCP options
*/
static struct tcp4_l2_flags_buf_t {
- uint32_t psum; /* 0 */
- uint32_t tsum; /* 4 */
#ifdef __AVX2__
- uint8_t pad[18]; /* 8, align th to 32 bytes */
+ uint8_t pad[26]; /* 0, align th to 32 bytes */
#else
- uint8_t pad[2]; /* align iph to 4 bytes 8 */
+ uint8_t pad[2]; /* align iph to 4 bytes 0 */
#endif
- struct tap_hdr taph; /* 26 10 */
- struct iphdr iph; /* 44 28 */
- struct tcphdr th; /* 64 48 */
+ struct tap_hdr taph; /* 26 2 */
+ struct iphdr iph; /* 44 20 */
+ struct tcphdr th; /* 64 40 */
char opts[OPT_MSS_LEN + OPT_WS_LEN + 1];
#ifdef __AVX2__
} __attribute__ ((packed, aligned(32)))
@@ -955,11 +945,13 @@ void tcp_sock_set_bufsize(const struct ctx *c, int s)
*/
static void tcp_update_check_ip4(struct tcp4_l2_buf_t *buf)
{
- uint32_t sum = buf->psum;
+ uint32_t sum = L2_BUF_IP4_PSUM(IPPROTO_TCP);
sum += buf->iph.tot_len;
sum += (buf->iph.saddr >> 16) & 0xffff;
sum += buf->iph.saddr & 0xffff;
+ sum += (buf->iph.daddr >> 16) & 0xffff;
+ sum += buf->iph.daddr & 0xffff;
buf->iph.check = (uint16_t)~csum_fold(sum);
}
@@ -971,10 +963,12 @@ static void tcp_update_check_ip4(struct tcp4_l2_buf_t *buf)
static void tcp_update_check_tcp4(struct tcp4_l2_buf_t *buf)
{
uint16_t tlen = ntohs(buf->iph.tot_len) - 20;
- uint32_t sum = buf->tsum;
+ uint32_t sum = htons(IPPROTO_TCP);
sum += (buf->iph.saddr >> 16) & 0xffff;
sum += buf->iph.saddr & 0xffff;
+ sum += (buf->iph.daddr >> 16) & 0xffff;
+ sum += buf->iph.daddr & 0xffff;
sum += htons(ntohs(buf->iph.tot_len) - 20);
buf->th.check = 0;
@@ -1025,20 +1019,6 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
if (ip_da) {
b4f->iph.daddr = b4->iph.daddr = ip_da->s_addr;
- if (!i) {
- b4f->iph.saddr = b4->iph.saddr = 0;
- b4f->iph.tot_len = b4->iph.tot_len = 0;
- b4f->iph.check = b4->iph.check = 0;
- b4f->psum = b4->psum = sum_16b(&b4->iph, 20);
-
- b4->tsum = ((ip_da->s_addr >> 16) & 0xffff) +
- (ip_da->s_addr & 0xffff) +
- htons(IPPROTO_TCP);
- b4f->tsum = b4->tsum;
- } else {
- b4f->psum = b4->psum = tcp4_l2_buf[0].psum;
- b4f->tsum = b4->tsum = tcp4_l2_buf[0].tsum;
- }
}
}
}
@@ -1047,15 +1027,16 @@ void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
* tcp_sock4_iov_init() - Initialise scatter-gather L2 buffers for IPv4 sockets
* @c: Execution context
*/
-static void tcp_sock4_iov_init(const struct ctx *c)
+static void tcp_sock4_iov_init(struct ctx *c)
{
+ struct iphdr iph = L2_BUF_IP4_INIT(IPPROTO_TCP);
struct iovec *iov;
int i;
for (i = 0; i < ARRAY_SIZE(tcp4_l2_buf); i++) {
tcp4_l2_buf[i] = (struct tcp4_l2_buf_t) {
.taph = TAP_HDR_INIT(ETH_P_IP),
- .iph = L2_BUF_IP4_INIT(IPPROTO_TCP),
+ .iph = iph,
.th = { .doff = sizeof(struct tcphdr) / 4, .ack = 1 }
};
}
diff --git a/udp.c b/udp.c
index 47b4a61..b82aea5 100644
--- a/udp.c
+++ b/udp.c
@@ -168,7 +168,6 @@ static uint8_t udp_act[IP_VERSIONS][UDP_ACT_TYPE_MAX][DIV_ROUND_UP(NUM_PORTS, 8)
/**
* udp4_l2_buf_t - Pre-cooked IPv4 packet buffers for tap connections
* @s_in: Source socket address, filled in by recvmmsg()
- * @psum: Partial IP header checksum (excluding tot_len and saddr)
* @taph: Tap-level headers (partially pre-filled)
* @iph: Pre-filled IP header (except for tot_len and saddr)
* @uh: Headroom for UDP header
@@ -176,7 +175,6 @@ static uint8_t udp_act[IP_VERSIONS][UDP_ACT_TYPE_MAX][DIV_ROUND_UP(NUM_PORTS, 8)
*/
static struct udp4_l2_buf_t {
struct sockaddr_in s_in;
- uint32_t psum;
struct tap_hdr taph;
struct iphdr iph;
@@ -263,11 +261,13 @@ static void udp_invert_portmap(struct udp_port_fwd *fwd)
*/
static void udp_update_check4(struct udp4_l2_buf_t *buf)
{
- uint32_t sum = buf->psum;
+ uint32_t sum = L2_BUF_IP4_PSUM(IPPROTO_UDP);
sum += buf->iph.tot_len;
sum += (buf->iph.saddr >> 16) & 0xffff;
sum += buf->iph.saddr & 0xffff;
+ sum += (buf->iph.daddr >> 16) & 0xffff;
+ sum += buf->iph.daddr & 0xffff;
buf->iph.check = (uint16_t)~csum_fold(sum);
}
@@ -292,14 +292,6 @@ void udp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s,
if (ip_da) {
b4->iph.daddr = ip_da->s_addr;
- if (!i) {
- b4->iph.saddr = 0;
- b4->iph.tot_len = 0;
- b4->iph.check = 0;
- b4->psum = sum_16b(&b4->iph, 20);
- } else {
- b4->psum = udp4_l2_buf[0].psum;
- }
}
}
}
diff --git a/util.h b/util.h
index 26892aa..a1c3829 100644
--- a/util.h
+++ b/util.h
@@ -141,11 +141,13 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
.tot_len = 0, \
.id = 0, \
.frag_off = 0, \
- .ttl = 255, \
+ .ttl = 0xff, \
.protocol = (proto), \
.saddr = 0, \
.daddr = 0, \
}
+#define L2_BUF_IP4_PSUM(proto) ((uint32_t)htons_constant(0x4500) + \
+ (uint32_t)htons_constant(0xff00 | (proto)))
#define L2_BUF_IP6_INIT(proto) \
{ \
--
@@ -141,11 +141,13 @@ int do_clone(int (*fn)(void *), char *stack_area, size_t stack_size, int flags,
.tot_len = 0, \
.id = 0, \
.frag_off = 0, \
- .ttl = 255, \
+ .ttl = 0xff, \
.protocol = (proto), \
.saddr = 0, \
.daddr = 0, \
}
+#define L2_BUF_IP4_PSUM(proto) ((uint32_t)htons_constant(0x4500) + \
+ (uint32_t)htons_constant(0xff00 | (proto)))
#define L2_BUF_IP6_INIT(proto) \
{ \
--
2.41.0
next prev parent reply other threads:[~2023-07-28 9:48 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-28 9:48 [PATCH 0/8] RFC: Generalize flow tracking, part 1 David Gibson
2023-07-28 9:48 ` [PATCH 1/8] tap: Don't clobber source address in tap6_handler() David Gibson
2023-07-28 9:48 ` [PATCH 2/8] tap: Pass source address to protocol handler functions David Gibson
2023-07-28 9:48 ` [PATCH 3/8] tcp: More precise terms for addresses and ports David Gibson
2023-07-28 9:48 ` David Gibson [this message]
2023-07-28 9:48 ` [PATCH 5/8] tcp, udp: Don't pre-fill IPv4 destination address in headers David Gibson
2023-07-28 9:48 ` [PATCH 6/8] tcp: Track guest-side correspondent address David Gibson
2023-07-28 9:48 ` [PATCH 7/8] tcp, flow: Introduce struct demiflow David Gibson
2023-07-28 9:48 ` [PATCH 8/8] tcp, flow: Perform TCP hash calculations based on demiflow structure David Gibson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230728094831.4097571-5-david@gibson.dropbear.id.au \
--to=david@gibson.dropbear.id.au \
--cc=passt-dev@passt.top \
--cc=sbrivio@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://passt.top/passt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).