public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH 0/4] tcp: unify IPv4 and IPv6 l2 tap queues
@ 2024-09-06 21:34 Jon Maloy
  2024-09-06 21:34 ` [PATCH 1/4] tcp: create unified struct for IPv4 and IPv6 header Jon Maloy
                   ` (4 more replies)
  0 siblings, 5 replies; 9+ messages in thread
From: Jon Maloy @ 2024-09-06 21:34 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

This should save us some memory and code.

Jon Maloy (4):
  tcp: create unified struct for IPv4 and IPv6 header
  tcp: update ip address in l2 tap queues on the fly
  tcp: unify l2 TCPv4 and TCPv6 queues and structures
  tcp: change prefix tcp4_ to tcp_ where applicable

 tcp.c     |  22 ++---
 tcp_buf.c | 259 +++++++++++++++++-------------------------------------
 tcp_buf.h |   3 +
 3 files changed, 98 insertions(+), 186 deletions(-)

-- 
2.45.2


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/4] tcp: create unified struct for IPv4 and IPv6 header
  2024-09-06 21:34 [PATCH 0/4] tcp: unify IPv4 and IPv6 l2 tap queues Jon Maloy
@ 2024-09-06 21:34 ` Jon Maloy
  2024-09-09  1:04   ` David Gibson
  2024-09-06 21:34 ` [PATCH 2/4] tcp: update ip address in l2 tap queues on the fly Jon Maloy
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 9+ messages in thread
From: Jon Maloy @ 2024-09-06 21:34 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

As a preparation for unifying the l2 queues for TCPv4 and TCPv6 traffic
we prepare a unified struct that has space for both IP address types.

With this change, the arrays tcp4_l2_iov and tcp4_l2_iov, as well as
tcp4_l2_flags_iov and tcp4_l2_flags_iov, are identical in structure
and size.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 tcp_buf.c | 40 ++++++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/tcp_buf.c b/tcp_buf.c
index c31e9f3..1af4786 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -52,6 +52,18 @@ struct tcp_payload_t {
 } __attribute__ ((packed, aligned(__alignof__(unsigned int))));
 #endif
 
+/**
+ * struct iphdr_t - Area with space for both IPv4 and IPv6 header
+ * @ip4:	IPv4 header
+ * @ip6:	IPv6 header
+ */
+struct iphdr_t {
+	union {
+		struct iphdr ip4;
+		struct ipv6hdr ip6;
+	};
+};
+
 /**
  * struct tcp_flags_t - TCP header and data to send zero-length
  *                      segments (flags)
@@ -72,7 +84,7 @@ static struct ethhdr		tcp4_eth_src;
 
 static struct tap_hdr		tcp4_payload_tap_hdr[TCP_FRAMES_MEM];
 /* IPv4 headers */
-static struct iphdr		tcp4_payload_ip[TCP_FRAMES_MEM];
+static struct iphdr_t		tcp4_payload_ip[TCP_FRAMES_MEM];
 /* TCP segments with payload for IPv4 frames */
 static struct tcp_payload_t	tcp4_payload[TCP_FRAMES_MEM];
 
@@ -84,7 +96,7 @@ static unsigned int tcp4_payload_used;
 
 static struct tap_hdr		tcp4_flags_tap_hdr[TCP_FRAMES_MEM];
 /* IPv4 headers for TCP segment without payload */
-static struct iphdr		tcp4_flags_ip[TCP_FRAMES_MEM];
+static struct iphdr_t		tcp4_flags_ip[TCP_FRAMES_MEM];
 /* TCP segments without payload for IPv4 frames */
 static struct tcp_flags_t	tcp4_flags[TCP_FRAMES_MEM];
 
@@ -95,7 +107,7 @@ static struct ethhdr		tcp6_eth_src;
 
 static struct tap_hdr		tcp6_payload_tap_hdr[TCP_FRAMES_MEM];
 /* IPv6 headers */
-static struct ipv6hdr		tcp6_payload_ip[TCP_FRAMES_MEM];
+static struct iphdr_t		tcp6_payload_ip[TCP_FRAMES_MEM];
 /* TCP headers and data for IPv6 frames */
 static struct tcp_payload_t	tcp6_payload[TCP_FRAMES_MEM];
 
@@ -107,7 +119,7 @@ static unsigned int tcp6_payload_used;
 
 static struct tap_hdr		tcp6_flags_tap_hdr[TCP_FRAMES_MEM];
 /* IPv6 headers for TCP segment without payload */
-static struct ipv6hdr		tcp6_flags_ip[TCP_FRAMES_MEM];
+static struct iphdr_t		tcp6_flags_ip[TCP_FRAMES_MEM];
 /* TCP segment without payload for IPv6 frames */
 static struct tcp_flags_t	tcp6_flags[TCP_FRAMES_MEM];
 
@@ -144,13 +156,13 @@ void tcp_sock4_iov_init(const struct ctx *c)
 	tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
 
 	for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) {
-		tcp4_payload_ip[i] = iph;
+		tcp4_payload_ip[i].ip4 = iph;
 		tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4;
 		tcp4_payload[i].th.ack = 1;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(tcp4_flags); i++) {
-		tcp4_flags_ip[i] = iph;
+		tcp4_flags_ip[i].ip4 = iph;
 		tcp4_flags[i].th.doff = sizeof(struct tcphdr) / 4;
 		tcp4_flags[i].th.ack = 1;
 	}
@@ -160,7 +172,8 @@ void tcp_sock4_iov_init(const struct ctx *c)
 
 		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_payload_tap_hdr[i]);
 		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
-		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[i]);
+		iov[TCP_IOV_IP].iov_base = &tcp4_payload_ip[i];
+		iov[TCP_IOV_IP].iov_len = sizeof(tcp4_payload_ip[i].ip4);
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i];
 	}
 
@@ -170,7 +183,8 @@ void tcp_sock4_iov_init(const struct ctx *c)
 		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_flags_tap_hdr[i]);
 		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
 		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
-		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[i]);
+		iov[TCP_IOV_IP].iov_base = &tcp4_flags_ip[i];
+		iov[TCP_IOV_IP].iov_len = sizeof(tcp4_flags_ip[i].ip4);
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_flags[i];
 	}
 }
@@ -188,13 +202,13 @@ void tcp_sock6_iov_init(const struct ctx *c)
 	tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
 
 	for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) {
-		tcp6_payload_ip[i] = ip6;
+		tcp6_payload_ip[i].ip6 = ip6;
 		tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4;
 		tcp6_payload[i].th.ack = 1;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) {
-		tcp6_flags_ip[i] = ip6;
+		tcp6_flags_ip[i].ip6 = ip6;
 		tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4;
 		tcp6_flags[i].th .ack = 1;
 	}
@@ -204,7 +218,8 @@ void tcp_sock6_iov_init(const struct ctx *c)
 
 		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]);
 		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
-		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[i]);
+		iov[TCP_IOV_IP].iov_base = &tcp6_payload_ip[i];
+		iov[TCP_IOV_IP].iov_len = sizeof(tcp6_payload_ip[i].ip6);
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i];
 	}
 
@@ -213,7 +228,8 @@ void tcp_sock6_iov_init(const struct ctx *c)
 
 		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]);
 		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
-		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[i]);
+		iov[TCP_IOV_IP].iov_base = &tcp6_flags_ip[i];
+		iov[TCP_IOV_IP].iov_len = sizeof(tcp6_flags_ip[i].ip6);
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i];
 	}
 }
-- 
@@ -52,6 +52,18 @@ struct tcp_payload_t {
 } __attribute__ ((packed, aligned(__alignof__(unsigned int))));
 #endif
 
+/**
+ * struct iphdr_t - Area with space for both IPv4 and IPv6 header
+ * @ip4:	IPv4 header
+ * @ip6:	IPv6 header
+ */
+struct iphdr_t {
+	union {
+		struct iphdr ip4;
+		struct ipv6hdr ip6;
+	};
+};
+
 /**
  * struct tcp_flags_t - TCP header and data to send zero-length
  *                      segments (flags)
@@ -72,7 +84,7 @@ static struct ethhdr		tcp4_eth_src;
 
 static struct tap_hdr		tcp4_payload_tap_hdr[TCP_FRAMES_MEM];
 /* IPv4 headers */
-static struct iphdr		tcp4_payload_ip[TCP_FRAMES_MEM];
+static struct iphdr_t		tcp4_payload_ip[TCP_FRAMES_MEM];
 /* TCP segments with payload for IPv4 frames */
 static struct tcp_payload_t	tcp4_payload[TCP_FRAMES_MEM];
 
@@ -84,7 +96,7 @@ static unsigned int tcp4_payload_used;
 
 static struct tap_hdr		tcp4_flags_tap_hdr[TCP_FRAMES_MEM];
 /* IPv4 headers for TCP segment without payload */
-static struct iphdr		tcp4_flags_ip[TCP_FRAMES_MEM];
+static struct iphdr_t		tcp4_flags_ip[TCP_FRAMES_MEM];
 /* TCP segments without payload for IPv4 frames */
 static struct tcp_flags_t	tcp4_flags[TCP_FRAMES_MEM];
 
@@ -95,7 +107,7 @@ static struct ethhdr		tcp6_eth_src;
 
 static struct tap_hdr		tcp6_payload_tap_hdr[TCP_FRAMES_MEM];
 /* IPv6 headers */
-static struct ipv6hdr		tcp6_payload_ip[TCP_FRAMES_MEM];
+static struct iphdr_t		tcp6_payload_ip[TCP_FRAMES_MEM];
 /* TCP headers and data for IPv6 frames */
 static struct tcp_payload_t	tcp6_payload[TCP_FRAMES_MEM];
 
@@ -107,7 +119,7 @@ static unsigned int tcp6_payload_used;
 
 static struct tap_hdr		tcp6_flags_tap_hdr[TCP_FRAMES_MEM];
 /* IPv6 headers for TCP segment without payload */
-static struct ipv6hdr		tcp6_flags_ip[TCP_FRAMES_MEM];
+static struct iphdr_t		tcp6_flags_ip[TCP_FRAMES_MEM];
 /* TCP segment without payload for IPv6 frames */
 static struct tcp_flags_t	tcp6_flags[TCP_FRAMES_MEM];
 
@@ -144,13 +156,13 @@ void tcp_sock4_iov_init(const struct ctx *c)
 	tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
 
 	for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) {
-		tcp4_payload_ip[i] = iph;
+		tcp4_payload_ip[i].ip4 = iph;
 		tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4;
 		tcp4_payload[i].th.ack = 1;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(tcp4_flags); i++) {
-		tcp4_flags_ip[i] = iph;
+		tcp4_flags_ip[i].ip4 = iph;
 		tcp4_flags[i].th.doff = sizeof(struct tcphdr) / 4;
 		tcp4_flags[i].th.ack = 1;
 	}
@@ -160,7 +172,8 @@ void tcp_sock4_iov_init(const struct ctx *c)
 
 		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_payload_tap_hdr[i]);
 		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
-		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[i]);
+		iov[TCP_IOV_IP].iov_base = &tcp4_payload_ip[i];
+		iov[TCP_IOV_IP].iov_len = sizeof(tcp4_payload_ip[i].ip4);
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i];
 	}
 
@@ -170,7 +183,8 @@ void tcp_sock4_iov_init(const struct ctx *c)
 		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_flags_tap_hdr[i]);
 		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
 		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
-		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[i]);
+		iov[TCP_IOV_IP].iov_base = &tcp4_flags_ip[i];
+		iov[TCP_IOV_IP].iov_len = sizeof(tcp4_flags_ip[i].ip4);
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_flags[i];
 	}
 }
@@ -188,13 +202,13 @@ void tcp_sock6_iov_init(const struct ctx *c)
 	tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
 
 	for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) {
-		tcp6_payload_ip[i] = ip6;
+		tcp6_payload_ip[i].ip6 = ip6;
 		tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4;
 		tcp6_payload[i].th.ack = 1;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) {
-		tcp6_flags_ip[i] = ip6;
+		tcp6_flags_ip[i].ip6 = ip6;
 		tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4;
 		tcp6_flags[i].th .ack = 1;
 	}
@@ -204,7 +218,8 @@ void tcp_sock6_iov_init(const struct ctx *c)
 
 		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]);
 		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
-		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[i]);
+		iov[TCP_IOV_IP].iov_base = &tcp6_payload_ip[i];
+		iov[TCP_IOV_IP].iov_len = sizeof(tcp6_payload_ip[i].ip6);
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i];
 	}
 
@@ -213,7 +228,8 @@ void tcp_sock6_iov_init(const struct ctx *c)
 
 		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]);
 		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
-		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[i]);
+		iov[TCP_IOV_IP].iov_base = &tcp6_flags_ip[i];
+		iov[TCP_IOV_IP].iov_len = sizeof(tcp6_flags_ip[i].ip6);
 		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i];
 	}
 }
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/4] tcp: update ip address in l2 tap queues on the fly
  2024-09-06 21:34 [PATCH 0/4] tcp: unify IPv4 and IPv6 l2 tap queues Jon Maloy
  2024-09-06 21:34 ` [PATCH 1/4] tcp: create unified struct for IPv4 and IPv6 header Jon Maloy
@ 2024-09-06 21:34 ` Jon Maloy
  2024-09-09  1:08   ` David Gibson
  2024-09-06 21:34 ` [PATCH 3/4] tcp: unify l2 TCPv4 and TCPv6 queues and structures Jon Maloy
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 9+ messages in thread
From: Jon Maloy @ 2024-09-06 21:34 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

l2 tap queue entries are currently initialized at system start, and
reused with preset headers through its whole life time. The only
fields we need to update per message are things like payload size
and checksums.

If we want to reuse these entries between ipv4 and ipv6 messages we
will now need to initialize the complete ip header on the fly.

We do this here.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 tcp.c     | 14 +++++++++-----
 tcp_buf.c |  6 ++++--
 tcp_buf.h |  2 ++
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/tcp.c b/tcp.c
index 77c62f0..006e503 100644
--- a/tcp.c
+++ b/tcp.c
@@ -920,6 +920,7 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn,
 
 	ASSERT(src4 && dst4);
 
+	*iph = tcp_payload_ip4;
 	iph->tot_len = htons(l3len);
 	iph->saddr = src4->s_addr;
 	iph->daddr = dst4->s_addr;
@@ -956,6 +957,7 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn,
 	const struct flowside *tapside = TAPFLOW(conn);
 	size_t l4len = dlen + sizeof(*th);
 
+	*ip6h = tcp_payload_ip6;
 	ip6h->payload_len = htons(l4len);
 	ip6h->saddr = tapside->oaddr.a6;
 	ip6h->daddr = tapside->eaddr.a6;
@@ -995,16 +997,18 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
 	const struct in_addr *a4 = inany_v4(&tapside->oaddr);
 
 	if (a4) {
+		iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr);
 		return tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base,
 					 iov[TCP_IOV_IP].iov_base,
 					 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
 					 check, seq);
+	} else {
+		iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr);
+		return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base,
+					 iov[TCP_IOV_IP].iov_base,
+					 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
+					 seq);
 	}
-
-	return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base,
-				 iov[TCP_IOV_IP].iov_base,
-				 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
-				 seq);
 }
 
 /**
diff --git a/tcp_buf.c b/tcp_buf.c
index 1af4786..6e6549f 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -84,6 +84,7 @@ static struct ethhdr		tcp4_eth_src;
 
 static struct tap_hdr		tcp4_payload_tap_hdr[TCP_FRAMES_MEM];
 /* IPv4 headers */
+struct iphdr		tcp_payload_ip4;
 static struct iphdr_t		tcp4_payload_ip[TCP_FRAMES_MEM];
 /* TCP segments with payload for IPv4 frames */
 static struct tcp_payload_t	tcp4_payload[TCP_FRAMES_MEM];
@@ -107,6 +108,7 @@ static struct ethhdr		tcp6_eth_src;
 
 static struct tap_hdr		tcp6_payload_tap_hdr[TCP_FRAMES_MEM];
 /* IPv6 headers */
+struct ipv6hdr		tcp_payload_ip6;
 static struct iphdr_t		tcp6_payload_ip[TCP_FRAMES_MEM];
 /* TCP headers and data for IPv6 frames */
 static struct tcp_payload_t	tcp6_payload[TCP_FRAMES_MEM];
@@ -154,9 +156,9 @@ void tcp_sock4_iov_init(const struct ctx *c)
 	int i;
 
 	tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
+	tcp_payload_ip4 = iph;
 
 	for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) {
-		tcp4_payload_ip[i].ip4 = iph;
 		tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4;
 		tcp4_payload[i].th.ack = 1;
 	}
@@ -200,9 +202,9 @@ void tcp_sock6_iov_init(const struct ctx *c)
 	int i;
 
 	tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
+	tcp_payload_ip6 = ip6;
 
 	for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) {
-		tcp6_payload_ip[i].ip6 = ip6;
 		tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4;
 		tcp6_payload[i].th.ack = 1;
 	}
diff --git a/tcp_buf.h b/tcp_buf.h
index 3db4c56..d3d0d7f 100644
--- a/tcp_buf.h
+++ b/tcp_buf.h
@@ -13,4 +13,6 @@ void tcp_payload_flush(struct ctx *c);
 int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn);
 int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags);
 
+extern struct iphdr		tcp_payload_ip4;
+extern struct ipv6hdr		tcp_payload_ip6;
 #endif  /*TCP_BUF_H */
-- 
@@ -13,4 +13,6 @@ void tcp_payload_flush(struct ctx *c);
 int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn);
 int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags);
 
+extern struct iphdr		tcp_payload_ip4;
+extern struct ipv6hdr		tcp_payload_ip6;
 #endif  /*TCP_BUF_H */
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/4] tcp: unify l2 TCPv4 and TCPv6 queues and structures
  2024-09-06 21:34 [PATCH 0/4] tcp: unify IPv4 and IPv6 l2 tap queues Jon Maloy
  2024-09-06 21:34 ` [PATCH 1/4] tcp: create unified struct for IPv4 and IPv6 header Jon Maloy
  2024-09-06 21:34 ` [PATCH 2/4] tcp: update ip address in l2 tap queues on the fly Jon Maloy
@ 2024-09-06 21:34 ` Jon Maloy
  2024-09-09  1:17   ` David Gibson
  2024-09-06 21:34 ` [PATCH 4/4] tcp: change prefix tcp4_ to tcp_ where applicable Jon Maloy
  2024-09-09  8:31 ` [PATCH 0/4] tcp: unify IPv4 and IPv6 l2 tap queues Stefano Brivio
  4 siblings, 1 reply; 9+ messages in thread
From: Jon Maloy @ 2024-09-06 21:34 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

Following the preparations in the previous commits, we can now
remove the queues dedicated for TCPv6 and move that traffic over
to the queues currently used for TCPv4.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 tcp.c     |   8 ++-
 tcp_buf.c | 158 +++++++++---------------------------------------------
 tcp_buf.h |   1 +
 3 files changed, 28 insertions(+), 139 deletions(-)

diff --git a/tcp.c b/tcp.c
index 006e503..19cf9e5 100644
--- a/tcp.c
+++ b/tcp.c
@@ -998,12 +998,14 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
 
 	if (a4) {
 		iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr);
+		tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
 		return tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base,
 					 iov[TCP_IOV_IP].iov_base,
 					 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
 					 check, seq);
 	} else {
 		iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr);
+		tcp4_eth_src.h_proto = htons_constant(ETH_P_IPV6);
 		return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base,
 					 iov[TCP_IOV_IP].iov_base,
 					 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
@@ -2508,11 +2510,7 @@ int tcp_init(struct ctx *c)
 {
 	ASSERT(!c->no_tcp);
 
-	if (c->ifi4)
-		tcp_sock4_iov_init(c);
-
-	if (c->ifi6)
-		tcp_sock6_iov_init(c);
+	tcp_sock4_iov_init(c);
 
 	memset(init_sock_pool4,		0xff,	sizeof(init_sock_pool4));
 	memset(init_sock_pool6,		0xff,	sizeof(init_sock_pool6));
diff --git a/tcp_buf.c b/tcp_buf.c
index 6e6549f..92c4d73 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -80,7 +80,7 @@ struct tcp_flags_t {
 #endif
 
 /* Ethernet header for IPv4 frames */
-static struct ethhdr		tcp4_eth_src;
+struct ethhdr		tcp4_eth_src;
 
 static struct tap_hdr		tcp4_payload_tap_hdr[TCP_FRAMES_MEM];
 /* IPv4 headers */
@@ -104,36 +104,14 @@ static struct tcp_flags_t	tcp4_flags[TCP_FRAMES_MEM];
 static unsigned int tcp4_flags_used;
 
 /* Ethernet header for IPv6 frames */
-static struct ethhdr		tcp6_eth_src;
-
-static struct tap_hdr		tcp6_payload_tap_hdr[TCP_FRAMES_MEM];
-/* IPv6 headers */
-struct ipv6hdr		tcp_payload_ip6;
-static struct iphdr_t		tcp6_payload_ip[TCP_FRAMES_MEM];
-/* TCP headers and data for IPv6 frames */
-static struct tcp_payload_t	tcp6_payload[TCP_FRAMES_MEM];
-
-static_assert(MSS6 <= sizeof(tcp6_payload[0].data), "MSS6 is greater than 65516");
-
-/* References tracking the owner connection of frames in the tap outqueue */
-static struct tcp_tap_conn *tcp6_frame_conns[TCP_FRAMES_MEM];
-static unsigned int tcp6_payload_used;
-
-static struct tap_hdr		tcp6_flags_tap_hdr[TCP_FRAMES_MEM];
-/* IPv6 headers for TCP segment without payload */
-static struct iphdr_t		tcp6_flags_ip[TCP_FRAMES_MEM];
-/* TCP segment without payload for IPv6 frames */
-static struct tcp_flags_t	tcp6_flags[TCP_FRAMES_MEM];
-
-static unsigned int tcp6_flags_used;
+struct ipv6hdr tcp_payload_ip6;
 
 /* recvmsg()/sendmsg() data for tap */
 static struct iovec	iov_sock		[TCP_FRAMES_MEM + 1];
 
 static struct iovec	tcp4_l2_iov		[TCP_FRAMES_MEM][TCP_NUM_IOVS];
-static struct iovec	tcp6_l2_iov		[TCP_FRAMES_MEM][TCP_NUM_IOVS];
 static struct iovec	tcp4_l2_flags_iov	[TCP_FRAMES_MEM][TCP_NUM_IOVS];
-static struct iovec	tcp6_l2_flags_iov	[TCP_FRAMES_MEM][TCP_NUM_IOVS];
+
 /**
  * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
  * @eth_d:	Ethernet destination address, NULL if unchanged
@@ -142,7 +120,6 @@ static struct iovec	tcp6_l2_flags_iov	[TCP_FRAMES_MEM][TCP_NUM_IOVS];
 void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
 {
 	eth_update_mac(&tcp4_eth_src, eth_d, eth_s);
-	eth_update_mac(&tcp6_eth_src, eth_d, eth_s);
 }
 
 /**
@@ -191,61 +168,12 @@ void tcp_sock4_iov_init(const struct ctx *c)
 	}
 }
 
-/**
- * tcp_sock6_iov_init() - Initialise scatter-gather L2 buffers for IPv6 sockets
- * @c:		Execution context
- */
-void tcp_sock6_iov_init(const struct ctx *c)
-{
-	struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP);
-	struct iovec *iov;
-	int i;
-
-	tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
-	tcp_payload_ip6 = ip6;
-
-	for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) {
-		tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4;
-		tcp6_payload[i].th.ack = 1;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) {
-		tcp6_flags_ip[i].ip6 = ip6;
-		tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4;
-		tcp6_flags[i].th .ack = 1;
-	}
-
-	for (i = 0; i < TCP_FRAMES_MEM; i++) {
-		iov = tcp6_l2_iov[i];
-
-		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]);
-		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
-		iov[TCP_IOV_IP].iov_base = &tcp6_payload_ip[i];
-		iov[TCP_IOV_IP].iov_len = sizeof(tcp6_payload_ip[i].ip6);
-		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i];
-	}
-
-	for (i = 0; i < TCP_FRAMES_MEM; i++) {
-		iov = tcp6_l2_flags_iov[i];
-
-		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]);
-		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
-		iov[TCP_IOV_IP].iov_base = &tcp6_flags_ip[i];
-		iov[TCP_IOV_IP].iov_len = sizeof(tcp6_flags_ip[i].ip6);
-		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i];
-	}
-}
-
 /**
  * tcp_flags_flush() - Send out buffers for segments with no data (flags)
  * @c:		Execution context
  */
 void tcp_flags_flush(const struct ctx *c)
 {
-	tap_send_frames(c, &tcp6_l2_flags_iov[0][0], TCP_NUM_IOVS,
-			tcp6_flags_used);
-	tcp6_flags_used = 0;
-
 	tap_send_frames(c, &tcp4_l2_flags_iov[0][0], TCP_NUM_IOVS,
 			tcp4_flags_used);
 	tcp4_flags_used = 0;
@@ -287,14 +215,6 @@ void tcp_payload_flush(struct ctx *c)
 {
 	size_t m;
 
-	m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS,
-			    tcp6_payload_used);
-	if (m != tcp6_payload_used) {
-		tcp_revert_seq(c, &tcp6_frame_conns[m], &tcp6_l2_iov[m],
-			       tcp6_payload_used - m);
-	}
-	tcp6_payload_used = 0;
-
 	m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS,
 			    tcp4_payload_used);
 	if (m != tcp4_payload_used) {
@@ -321,21 +241,13 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 	uint32_t seq;
 	int ret;
 
-	if (CONN_V4(conn))
-		iov = tcp4_l2_flags_iov[tcp4_flags_used++];
-	else
-		iov = tcp6_l2_flags_iov[tcp6_flags_used++];
-
+	iov = tcp4_l2_flags_iov[tcp4_flags_used++];
 	payload = iov[TCP_IOV_PAYLOAD].iov_base;
-
 	seq = conn->seq_to_tap;
 	ret = tcp_prepare_flags(c, conn, flags, &payload->th,
 				payload->opts, &optlen);
 	if (ret <= 0) {
-		if (CONN_V4(conn))
-			tcp4_flags_used--;
-		else
-			tcp6_flags_used--;
+		tcp4_flags_used--;
 		return ret;
 	}
 
@@ -346,10 +258,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 		struct iovec *dup_iov;
 		int i;
 
-		if (CONN_V4(conn))
-			dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++];
-		else
-			dup_iov = tcp6_l2_flags_iov[tcp6_flags_used++];
+		dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++];
 
 		for (i = 0; i < TCP_NUM_IOVS; i++)
 			memcpy(dup_iov[i].iov_base, iov[i].iov_base,
@@ -357,13 +266,8 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 		dup_iov[TCP_IOV_PAYLOAD].iov_len = iov[TCP_IOV_PAYLOAD].iov_len;
 	}
 
-	if (CONN_V4(conn)) {
-		if (tcp4_flags_used > TCP_FRAMES_MEM - 2)
-			tcp_flags_flush(c);
-	} else {
-		if (tcp6_flags_used > TCP_FRAMES_MEM - 2)
-			tcp_flags_flush(c);
-	}
+	if (tcp4_flags_used > TCP_FRAMES_MEM - 2)
+		tcp_flags_flush(c);
 
 	return 0;
 }
@@ -379,36 +283,26 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn,
 			    ssize_t dlen, int no_csum, uint32_t seq)
 {
+	struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1];
+	const uint16_t *check = NULL;
 	struct iovec *iov;
 	size_t l4len;
 
 	conn->seq_to_tap = seq + dlen;
 
-	if (CONN_V4(conn)) {
-		struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1];
-		const uint16_t *check = NULL;
-
-		if (no_csum) {
-			struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base;
-			check = &iph->check;
-		}
+	if (CONN_V4(conn) && no_csum) {
+		struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base;
 
-		tcp4_frame_conns[tcp4_payload_used] = conn;
-
-		iov = tcp4_l2_iov[tcp4_payload_used++];
-		l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq);
-		iov[TCP_IOV_PAYLOAD].iov_len = l4len;
-		if (tcp4_payload_used > TCP_FRAMES_MEM - 1)
-			tcp_payload_flush(c);
-	} else if (CONN_V6(conn)) {
-		tcp6_frame_conns[tcp6_payload_used] = conn;
-
-		iov = tcp6_l2_iov[tcp6_payload_used++];
-		l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, NULL, seq);
-		iov[TCP_IOV_PAYLOAD].iov_len = l4len;
-		if (tcp6_payload_used > TCP_FRAMES_MEM - 1)
-			tcp_payload_flush(c);
+		check = &iph->check;
 	}
+
+	tcp4_frame_conns[tcp4_payload_used] = conn;
+
+	iov = tcp4_l2_iov[tcp4_payload_used++];
+	l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq);
+	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
+	if (tcp4_payload_used > TCP_FRAMES_MEM - 1)
+		tcp_payload_flush(c);
 }
 
 /**
@@ -472,19 +366,15 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
 		mh_sock.msg_iovlen = fill_bufs;
 	}
 
-	if (( v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM) ||
-	    (!v4 && tcp6_payload_used + fill_bufs > TCP_FRAMES_MEM)) {
+	if ((v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM)) {
 		tcp_payload_flush(c);
 
 		/* Silence Coverity CWE-125 false positive */
-		tcp4_payload_used = tcp6_payload_used = 0;
+		tcp4_payload_used = 0;
 	}
 
 	for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) {
-		if (v4)
-			iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data;
-		else
-			iov->iov_base = &tcp6_payload[tcp6_payload_used + i].data;
+		iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data;
 		iov->iov_len = mss;
 	}
 	if (iov_rem)
diff --git a/tcp_buf.h b/tcp_buf.h
index d3d0d7f..d7cdbaf 100644
--- a/tcp_buf.h
+++ b/tcp_buf.h
@@ -13,6 +13,7 @@ void tcp_payload_flush(struct ctx *c);
 int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn);
 int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags);
 
+extern struct ethhdr		tcp4_eth_src;
 extern struct iphdr		tcp_payload_ip4;
 extern struct ipv6hdr		tcp_payload_ip6;
 #endif  /*TCP_BUF_H */
-- 
@@ -13,6 +13,7 @@ void tcp_payload_flush(struct ctx *c);
 int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn);
 int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags);
 
+extern struct ethhdr		tcp4_eth_src;
 extern struct iphdr		tcp_payload_ip4;
 extern struct ipv6hdr		tcp_payload_ip6;
 #endif  /*TCP_BUF_H */
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/4] tcp: change prefix tcp4_ to tcp_ where applicable
  2024-09-06 21:34 [PATCH 0/4] tcp: unify IPv4 and IPv6 l2 tap queues Jon Maloy
                   ` (2 preceding siblings ...)
  2024-09-06 21:34 ` [PATCH 3/4] tcp: unify l2 TCPv4 and TCPv6 queues and structures Jon Maloy
@ 2024-09-06 21:34 ` Jon Maloy
  2024-09-09  8:31 ` [PATCH 0/4] tcp: unify IPv4 and IPv6 l2 tap queues Stefano Brivio
  4 siblings, 0 replies; 9+ messages in thread
From: Jon Maloy @ 2024-09-06 21:34 UTC (permalink / raw)
  To: passt-dev, sbrivio, lvivier, dgibson, jmaloy

The queues and structures previously used for TCPv4 traffic are now
used for both IP protocols. We therefore need to replace the prefix
tcp4_* with the generic tcp_ everywhere applicable.

There are no functional changes in this commit.

Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
 tcp.c     |   4 +-
 tcp_buf.c | 115 +++++++++++++++++++++++++++---------------------------
 tcp_buf.h |   2 +-
 3 files changed, 60 insertions(+), 61 deletions(-)

diff --git a/tcp.c b/tcp.c
index 19cf9e5..433dce4 100644
--- a/tcp.c
+++ b/tcp.c
@@ -998,14 +998,14 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
 
 	if (a4) {
 		iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr);
-		tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
+		tcp_eth_src.h_proto = htons_constant(ETH_P_IP);
 		return tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base,
 					 iov[TCP_IOV_IP].iov_base,
 					 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
 					 check, seq);
 	} else {
 		iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr);
-		tcp4_eth_src.h_proto = htons_constant(ETH_P_IPV6);
+		tcp_eth_src.h_proto = htons_constant(ETH_P_IPV6);
 		return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base,
 					 iov[TCP_IOV_IP].iov_base,
 					 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
diff --git a/tcp_buf.c b/tcp_buf.c
index 92c4d73..0476184 100644
--- a/tcp_buf.c
+++ b/tcp_buf.c
@@ -80,28 +80,27 @@ struct tcp_flags_t {
 #endif
 
 /* Ethernet header for IPv4 frames */
-struct ethhdr		tcp4_eth_src;
+struct ethhdr		tcp_eth_src;
 
-static struct tap_hdr		tcp4_payload_tap_hdr[TCP_FRAMES_MEM];
+static struct tap_hdr		tcp_payload_tap_hdr[TCP_FRAMES_MEM];
 /* IPv4 headers */
 struct iphdr		tcp_payload_ip4;
-static struct iphdr_t		tcp4_payload_ip[TCP_FRAMES_MEM];
+static struct iphdr_t		tcp_payload_ip[TCP_FRAMES_MEM];
 /* TCP segments with payload for IPv4 frames */
-static struct tcp_payload_t	tcp4_payload[TCP_FRAMES_MEM];
+static struct tcp_payload_t	tcp_payload[TCP_FRAMES_MEM];
 
-static_assert(MSS4 <= sizeof(tcp4_payload[0].data), "MSS4 is greater than 65516");
+static_assert(sizeof(tcp_payload[0].data) > MSS4, "MSS4 is greater than 65516");
 
 /* References tracking the owner connection of frames in the tap outqueue */
-static struct tcp_tap_conn *tcp4_frame_conns[TCP_FRAMES_MEM];
-static unsigned int tcp4_payload_used;
+static struct tcp_tap_conn *tcp_frame_conns[TCP_FRAMES_MEM];
+static unsigned int tcp_payload_used;
 
-static struct tap_hdr		tcp4_flags_tap_hdr[TCP_FRAMES_MEM];
+static struct tap_hdr		tcp_flags_tap_hdr[TCP_FRAMES_MEM];
 /* IPv4 headers for TCP segment without payload */
-static struct iphdr_t		tcp4_flags_ip[TCP_FRAMES_MEM];
+static struct iphdr_t		tcp_flags_ip[TCP_FRAMES_MEM];
 /* TCP segments without payload for IPv4 frames */
-static struct tcp_flags_t	tcp4_flags[TCP_FRAMES_MEM];
-
-static unsigned int tcp4_flags_used;
+static struct tcp_flags_t	tcp_flags[TCP_FRAMES_MEM];
+static unsigned int tcp_flags_used;
 
 /* Ethernet header for IPv6 frames */
 struct ipv6hdr tcp_payload_ip6;
@@ -109,8 +108,8 @@ struct ipv6hdr tcp_payload_ip6;
 /* recvmsg()/sendmsg() data for tap */
 static struct iovec	iov_sock		[TCP_FRAMES_MEM + 1];
 
-static struct iovec	tcp4_l2_iov		[TCP_FRAMES_MEM][TCP_NUM_IOVS];
-static struct iovec	tcp4_l2_flags_iov	[TCP_FRAMES_MEM][TCP_NUM_IOVS];
+static struct iovec	tcp_l2_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
+static struct iovec	tcp_l2_flags_iov[TCP_FRAMES_MEM][TCP_NUM_IOVS];
 
 /**
  * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
@@ -119,7 +118,7 @@ static struct iovec	tcp4_l2_flags_iov	[TCP_FRAMES_MEM][TCP_NUM_IOVS];
  */
 void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
 {
-	eth_update_mac(&tcp4_eth_src, eth_d, eth_s);
+	eth_update_mac(&tcp_eth_src, eth_d, eth_s);
 }
 
 /**
@@ -132,39 +131,39 @@ void tcp_sock4_iov_init(const struct ctx *c)
 	struct iovec *iov;
 	int i;
 
-	tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
+	tcp_eth_src.h_proto = htons_constant(ETH_P_IP);
 	tcp_payload_ip4 = iph;
 
-	for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) {
-		tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4;
-		tcp4_payload[i].th.ack = 1;
+	for (i = 0; i < ARRAY_SIZE(tcp_payload); i++) {
+		tcp_payload[i].th.doff = sizeof(struct tcphdr) / 4;
+		tcp_payload[i].th.ack = 1;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(tcp4_flags); i++) {
-		tcp4_flags_ip[i].ip4 = iph;
-		tcp4_flags[i].th.doff = sizeof(struct tcphdr) / 4;
-		tcp4_flags[i].th.ack = 1;
+	for (i = 0; i < ARRAY_SIZE(tcp_flags); i++) {
+		tcp_flags_ip[i].ip4 = iph;
+		tcp_flags[i].th.doff = sizeof(struct tcphdr) / 4;
+		tcp_flags[i].th.ack = 1;
 	}
 
 	for (i = 0; i < TCP_FRAMES_MEM; i++) {
-		iov = tcp4_l2_iov[i];
+		iov = tcp_l2_iov[i];
 
-		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_payload_tap_hdr[i]);
-		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
-		iov[TCP_IOV_IP].iov_base = &tcp4_payload_ip[i];
-		iov[TCP_IOV_IP].iov_len = sizeof(tcp4_payload_ip[i].ip4);
-		iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i];
+		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_payload_tap_hdr[i]);
+		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp_eth_src);
+		iov[TCP_IOV_IP].iov_base = &tcp_payload_ip[i];
+		iov[TCP_IOV_IP].iov_len = sizeof(tcp_payload_ip[i].ip4);
+		iov[TCP_IOV_PAYLOAD].iov_base = &tcp_payload[i];
 	}
 
 	for (i = 0; i < TCP_FRAMES_MEM; i++) {
-		iov = tcp4_l2_flags_iov[i];
-
-		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_flags_tap_hdr[i]);
-		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
-		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
-		iov[TCP_IOV_IP].iov_base = &tcp4_flags_ip[i];
-		iov[TCP_IOV_IP].iov_len = sizeof(tcp4_flags_ip[i].ip4);
-		iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_flags[i];
+		iov = tcp_l2_flags_iov[i];
+
+		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp_flags_tap_hdr[i]);
+		iov[TCP_IOV_ETH].iov_base = &tcp_eth_src;
+		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp_eth_src);
+		iov[TCP_IOV_IP].iov_base = &tcp_flags_ip[i];
+		iov[TCP_IOV_IP].iov_len = sizeof(tcp_flags_ip[i].ip4);
+		iov[TCP_IOV_PAYLOAD].iov_base = &tcp_flags[i];
 	}
 }
 
@@ -174,9 +173,9 @@ void tcp_sock4_iov_init(const struct ctx *c)
  */
 void tcp_flags_flush(const struct ctx *c)
 {
-	tap_send_frames(c, &tcp4_l2_flags_iov[0][0], TCP_NUM_IOVS,
-			tcp4_flags_used);
-	tcp4_flags_used = 0;
+	tap_send_frames(c, &tcp_l2_flags_iov[0][0], TCP_NUM_IOVS,
+			tcp_flags_used);
+	tcp_flags_used = 0;
 }
 
 /**
@@ -215,13 +214,13 @@ void tcp_payload_flush(struct ctx *c)
 {
 	size_t m;
 
-	m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS,
-			    tcp4_payload_used);
-	if (m != tcp4_payload_used) {
-		tcp_revert_seq(c, &tcp4_frame_conns[m], &tcp4_l2_iov[m],
-			       tcp4_payload_used - m);
+	m = tap_send_frames(c, &tcp_l2_iov[0][0], TCP_NUM_IOVS,
+			    tcp_payload_used);
+	if (m != tcp_payload_used) {
+		tcp_revert_seq(c, &tcp_frame_conns[m], &tcp_l2_iov[m],
+			       tcp_payload_used - m);
 	}
-	tcp4_payload_used = 0;
+	tcp_payload_used = 0;
 }
 
 /**
@@ -241,13 +240,13 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 	uint32_t seq;
 	int ret;
 
-	iov = tcp4_l2_flags_iov[tcp4_flags_used++];
+	iov = tcp_l2_flags_iov[tcp_flags_used++];
 	payload = iov[TCP_IOV_PAYLOAD].iov_base;
 	seq = conn->seq_to_tap;
 	ret = tcp_prepare_flags(c, conn, flags, &payload->th,
 				payload->opts, &optlen);
 	if (ret <= 0) {
-		tcp4_flags_used--;
+		tcp_flags_used--;
 		return ret;
 	}
 
@@ -258,7 +257,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 		struct iovec *dup_iov;
 		int i;
 
-		dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++];
+		dup_iov = tcp_l2_flags_iov[tcp_flags_used++];
 
 		for (i = 0; i < TCP_NUM_IOVS; i++)
 			memcpy(dup_iov[i].iov_base, iov[i].iov_base,
@@ -266,7 +265,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 		dup_iov[TCP_IOV_PAYLOAD].iov_len = iov[TCP_IOV_PAYLOAD].iov_len;
 	}
 
-	if (tcp4_flags_used > TCP_FRAMES_MEM - 2)
+	if (tcp_flags_used > TCP_FRAMES_MEM - 2)
 		tcp_flags_flush(c);
 
 	return 0;
@@ -283,7 +282,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
 static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn,
 			    ssize_t dlen, int no_csum, uint32_t seq)
 {
-	struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1];
+	struct iovec *iov_prev = tcp_l2_iov[tcp_payload_used - 1];
 	const uint16_t *check = NULL;
 	struct iovec *iov;
 	size_t l4len;
@@ -296,12 +295,12 @@ static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn,
 		check = &iph->check;
 	}
 
-	tcp4_frame_conns[tcp4_payload_used] = conn;
+	tcp_frame_conns[tcp_payload_used] = conn;
 
-	iov = tcp4_l2_iov[tcp4_payload_used++];
+	iov = tcp_l2_iov[tcp_payload_used++];
 	l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq);
 	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
-	if (tcp4_payload_used > TCP_FRAMES_MEM - 1)
+	if (tcp_payload_used > TCP_FRAMES_MEM - 1)
 		tcp_payload_flush(c);
 }
 
@@ -366,15 +365,15 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
 		mh_sock.msg_iovlen = fill_bufs;
 	}
 
-	if ((v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM)) {
+	if ((v4 && tcp_payload_used + fill_bufs > TCP_FRAMES_MEM)) {
 		tcp_payload_flush(c);
 
 		/* Silence Coverity CWE-125 false positive */
-		tcp4_payload_used = 0;
+		tcp_payload_used = 0;
 	}
 
 	for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) {
-		iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data;
+		iov->iov_base = &tcp_payload[tcp_payload_used + i].data;
 		iov->iov_len = mss;
 	}
 	if (iov_rem)
@@ -422,7 +421,7 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
 	dlen = mss;
 	seq = conn->seq_to_tap;
 	for (i = 0; i < send_bufs; i++) {
-		int no_csum = i && i != send_bufs - 1 && tcp4_payload_used;
+		int no_csum = i && i != send_bufs - 1 && tcp_payload_used;
 
 		if (i == send_bufs - 1)
 			dlen = last_len;
diff --git a/tcp_buf.h b/tcp_buf.h
index d7cdbaf..3dc3c95 100644
--- a/tcp_buf.h
+++ b/tcp_buf.h
@@ -13,7 +13,7 @@ void tcp_payload_flush(struct ctx *c);
 int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn);
 int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags);
 
-extern struct ethhdr		tcp4_eth_src;
+extern struct ethhdr		tcp_eth_src;
 extern struct iphdr		tcp_payload_ip4;
 extern struct ipv6hdr		tcp_payload_ip6;
 #endif  /*TCP_BUF_H */
-- 
@@ -13,7 +13,7 @@ void tcp_payload_flush(struct ctx *c);
 int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn);
 int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags);
 
-extern struct ethhdr		tcp4_eth_src;
+extern struct ethhdr		tcp_eth_src;
 extern struct iphdr		tcp_payload_ip4;
 extern struct ipv6hdr		tcp_payload_ip6;
 #endif  /*TCP_BUF_H */
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] tcp: create unified struct for IPv4 and IPv6 header
  2024-09-06 21:34 ` [PATCH 1/4] tcp: create unified struct for IPv4 and IPv6 header Jon Maloy
@ 2024-09-09  1:04   ` David Gibson
  0 siblings, 0 replies; 9+ messages in thread
From: David Gibson @ 2024-09-09  1:04 UTC (permalink / raw)
  To: Jon Maloy; +Cc: passt-dev, sbrivio, lvivier, dgibson

[-- Attachment #1: Type: text/plain, Size: 6634 bytes --]

On Fri, Sep 06, 2024 at 05:34:24PM -0400, Jon Maloy wrote:
> As a preparation for unifying the l2 queues for TCPv4 and TCPv6 traffic
> we prepare a unified struct that has space for both IP address types.
> 
> With this change, the arrays tcp4_l2_iov and tcp4_l2_iov, as well as
> tcp4_l2_flags_iov and tcp4_l2_flags_iov, are identical in structure
> and size.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  tcp_buf.c | 40 ++++++++++++++++++++++++++++------------
>  1 file changed, 28 insertions(+), 12 deletions(-)
> 
> diff --git a/tcp_buf.c b/tcp_buf.c
> index c31e9f3..1af4786 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -52,6 +52,18 @@ struct tcp_payload_t {
>  } __attribute__ ((packed, aligned(__alignof__(unsigned int))));
>  #endif
>  
> +/**
> + * struct iphdr_t - Area with space for both IPv4 and IPv6 header
> + * @ip4:	IPv4 header
> + * @ip6:	IPv6 header
> + */
> +struct iphdr_t {
> +	union {
> +		struct iphdr ip4;
> +		struct ipv6hdr ip6;
> +	};
> +};

So, I haven't read the rest of the series yet, so it's possible
something there would clarify this.

Having the two headers in a union does require that essentially the
whole header be re-initialized for every packet.  For UDP, what we do
instead is have both headers in paralllel, partially initialised.  For
each packet we update the appropriate header as needed, and point our
IOV to the correct header (v4 or v6).

I don't know if that's a better approach or not: it's plausible the
dcache savings outweigh the extra re-initialisation, but it's
plausible they don't too.  I wonder if it might be better sticking
with the same approach as UDP for consistency, at least until we have
specific data suggesting one or the other approach is better.

> +
>  /**
>   * struct tcp_flags_t - TCP header and data to send zero-length
>   *                      segments (flags)
> @@ -72,7 +84,7 @@ static struct ethhdr		tcp4_eth_src;
>  
>  static struct tap_hdr		tcp4_payload_tap_hdr[TCP_FRAMES_MEM];
>  /* IPv4 headers */
> -static struct iphdr		tcp4_payload_ip[TCP_FRAMES_MEM];
> +static struct iphdr_t		tcp4_payload_ip[TCP_FRAMES_MEM];
>  /* TCP segments with payload for IPv4 frames */
>  static struct tcp_payload_t	tcp4_payload[TCP_FRAMES_MEM];
>  
> @@ -84,7 +96,7 @@ static unsigned int tcp4_payload_used;
>  
>  static struct tap_hdr		tcp4_flags_tap_hdr[TCP_FRAMES_MEM];
>  /* IPv4 headers for TCP segment without payload */
> -static struct iphdr		tcp4_flags_ip[TCP_FRAMES_MEM];
> +static struct iphdr_t		tcp4_flags_ip[TCP_FRAMES_MEM];
>  /* TCP segments without payload for IPv4 frames */
>  static struct tcp_flags_t	tcp4_flags[TCP_FRAMES_MEM];
>  
> @@ -95,7 +107,7 @@ static struct ethhdr		tcp6_eth_src;
>  
>  static struct tap_hdr		tcp6_payload_tap_hdr[TCP_FRAMES_MEM];
>  /* IPv6 headers */
> -static struct ipv6hdr		tcp6_payload_ip[TCP_FRAMES_MEM];
> +static struct iphdr_t		tcp6_payload_ip[TCP_FRAMES_MEM];
>  /* TCP headers and data for IPv6 frames */
>  static struct tcp_payload_t	tcp6_payload[TCP_FRAMES_MEM];
>  
> @@ -107,7 +119,7 @@ static unsigned int tcp6_payload_used;
>  
>  static struct tap_hdr		tcp6_flags_tap_hdr[TCP_FRAMES_MEM];
>  /* IPv6 headers for TCP segment without payload */
> -static struct ipv6hdr		tcp6_flags_ip[TCP_FRAMES_MEM];
> +static struct iphdr_t		tcp6_flags_ip[TCP_FRAMES_MEM];
>  /* TCP segment without payload for IPv6 frames */
>  static struct tcp_flags_t	tcp6_flags[TCP_FRAMES_MEM];
>  
> @@ -144,13 +156,13 @@ void tcp_sock4_iov_init(const struct ctx *c)
>  	tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
>  
>  	for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) {
> -		tcp4_payload_ip[i] = iph;
> +		tcp4_payload_ip[i].ip4 = iph;
>  		tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4;
>  		tcp4_payload[i].th.ack = 1;
>  	}
>  
>  	for (i = 0; i < ARRAY_SIZE(tcp4_flags); i++) {
> -		tcp4_flags_ip[i] = iph;
> +		tcp4_flags_ip[i].ip4 = iph;
>  		tcp4_flags[i].th.doff = sizeof(struct tcphdr) / 4;
>  		tcp4_flags[i].th.ack = 1;
>  	}
> @@ -160,7 +172,8 @@ void tcp_sock4_iov_init(const struct ctx *c)
>  
>  		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_payload_tap_hdr[i]);
>  		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
> -		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_payload_ip[i]);
> +		iov[TCP_IOV_IP].iov_base = &tcp4_payload_ip[i];
> +		iov[TCP_IOV_IP].iov_len = sizeof(tcp4_payload_ip[i].ip4);
>  		iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_payload[i];
>  	}
>  
> @@ -170,7 +183,8 @@ void tcp_sock4_iov_init(const struct ctx *c)
>  		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp4_flags_tap_hdr[i]);
>  		iov[TCP_IOV_ETH].iov_base = &tcp4_eth_src;
>  		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp4_eth_src);
> -		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp4_flags_ip[i]);
> +		iov[TCP_IOV_IP].iov_base = &tcp4_flags_ip[i];
> +		iov[TCP_IOV_IP].iov_len = sizeof(tcp4_flags_ip[i].ip4);
>  		iov[TCP_IOV_PAYLOAD].iov_base = &tcp4_flags[i];
>  	}
>  }
> @@ -188,13 +202,13 @@ void tcp_sock6_iov_init(const struct ctx *c)
>  	tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
>  
>  	for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) {
> -		tcp6_payload_ip[i] = ip6;
> +		tcp6_payload_ip[i].ip6 = ip6;
>  		tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4;
>  		tcp6_payload[i].th.ack = 1;
>  	}
>  
>  	for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) {
> -		tcp6_flags_ip[i] = ip6;
> +		tcp6_flags_ip[i].ip6 = ip6;
>  		tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4;
>  		tcp6_flags[i].th .ack = 1;
>  	}
> @@ -204,7 +218,8 @@ void tcp_sock6_iov_init(const struct ctx *c)
>  
>  		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]);
>  		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
> -		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_payload_ip[i]);
> +		iov[TCP_IOV_IP].iov_base = &tcp6_payload_ip[i];
> +		iov[TCP_IOV_IP].iov_len = sizeof(tcp6_payload_ip[i].ip6);
>  		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i];
>  	}
>  
> @@ -213,7 +228,8 @@ void tcp_sock6_iov_init(const struct ctx *c)
>  
>  		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]);
>  		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
> -		iov[TCP_IOV_IP] = IOV_OF_LVALUE(tcp6_flags_ip[i]);
> +		iov[TCP_IOV_IP].iov_base = &tcp6_flags_ip[i];
> +		iov[TCP_IOV_IP].iov_len = sizeof(tcp6_flags_ip[i].ip6);
>  		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i];
>  	}
>  }

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/4] tcp: update ip address in l2 tap queues on the fly
  2024-09-06 21:34 ` [PATCH 2/4] tcp: update ip address in l2 tap queues on the fly Jon Maloy
@ 2024-09-09  1:08   ` David Gibson
  0 siblings, 0 replies; 9+ messages in thread
From: David Gibson @ 2024-09-09  1:08 UTC (permalink / raw)
  To: Jon Maloy; +Cc: passt-dev, sbrivio, lvivier, dgibson

[-- Attachment #1: Type: text/plain, Size: 4531 bytes --]

On Fri, Sep 06, 2024 at 05:34:25PM -0400, Jon Maloy wrote:
> l2 tap queue entries are currently initialized at system start, and
> reused with preset headers through its whole life time. The only
> fields we need to update per message are things like payload size
> and checksums.
> 
> If we want to reuse these entries between ipv4 and ipv6 messages we
> will now need to initialize the complete ip header on the fly.
> 
> We do this here.

Comments from 1/4 relevant here too.

> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  tcp.c     | 14 +++++++++-----
>  tcp_buf.c |  6 ++++--
>  tcp_buf.h |  2 ++
>  3 files changed, 15 insertions(+), 7 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index 77c62f0..006e503 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -920,6 +920,7 @@ static size_t tcp_fill_headers4(const struct tcp_tap_conn *conn,
>  
>  	ASSERT(src4 && dst4);
>  
> +	*iph = tcp_payload_ip4;
>  	iph->tot_len = htons(l3len);
>  	iph->saddr = src4->s_addr;
>  	iph->daddr = dst4->s_addr;
> @@ -956,6 +957,7 @@ static size_t tcp_fill_headers6(const struct tcp_tap_conn *conn,
>  	const struct flowside *tapside = TAPFLOW(conn);
>  	size_t l4len = dlen + sizeof(*th);
>  
> +	*ip6h = tcp_payload_ip6;
>  	ip6h->payload_len = htons(l4len);
>  	ip6h->saddr = tapside->oaddr.a6;
>  	ip6h->daddr = tapside->eaddr.a6;
> @@ -995,16 +997,18 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
>  	const struct in_addr *a4 = inany_v4(&tapside->oaddr);
>  
>  	if (a4) {
> +		iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr);
>  		return tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base,
>  					 iov[TCP_IOV_IP].iov_base,
>  					 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
>  					 check, seq);
> +	} else {
> +		iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr);
> +		return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base,
> +					 iov[TCP_IOV_IP].iov_base,
> +					 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
> +					 seq);

Personally I marginally prefer this formatting, but one of the static
checkers is going to whinge about an else clause after a 'return' in
the if clause.

>  	}
> -
> -	return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base,
> -				 iov[TCP_IOV_IP].iov_base,
> -				 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
> -				 seq);
>  }
>  
>  /**
> diff --git a/tcp_buf.c b/tcp_buf.c
> index 1af4786..6e6549f 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -84,6 +84,7 @@ static struct ethhdr		tcp4_eth_src;
>  
>  static struct tap_hdr		tcp4_payload_tap_hdr[TCP_FRAMES_MEM];
>  /* IPv4 headers */
> +struct iphdr		tcp_payload_ip4;
>  static struct iphdr_t		tcp4_payload_ip[TCP_FRAMES_MEM];
>  /* TCP segments with payload for IPv4 frames */
>  static struct tcp_payload_t	tcp4_payload[TCP_FRAMES_MEM];
> @@ -107,6 +108,7 @@ static struct ethhdr		tcp6_eth_src;
>  
>  static struct tap_hdr		tcp6_payload_tap_hdr[TCP_FRAMES_MEM];
>  /* IPv6 headers */
> +struct ipv6hdr		tcp_payload_ip6;
>  static struct iphdr_t		tcp6_payload_ip[TCP_FRAMES_MEM];
>  /* TCP headers and data for IPv6 frames */
>  static struct tcp_payload_t	tcp6_payload[TCP_FRAMES_MEM];
> @@ -154,9 +156,9 @@ void tcp_sock4_iov_init(const struct ctx *c)
>  	int i;
>  
>  	tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
> +	tcp_payload_ip4 = iph;
>  
>  	for (i = 0; i < ARRAY_SIZE(tcp4_payload); i++) {
> -		tcp4_payload_ip[i].ip4 = iph;
>  		tcp4_payload[i].th.doff = sizeof(struct tcphdr) / 4;
>  		tcp4_payload[i].th.ack = 1;
>  	}
> @@ -200,9 +202,9 @@ void tcp_sock6_iov_init(const struct ctx *c)
>  	int i;
>  
>  	tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
> +	tcp_payload_ip6 = ip6;
>  
>  	for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) {
> -		tcp6_payload_ip[i].ip6 = ip6;
>  		tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4;
>  		tcp6_payload[i].th.ack = 1;
>  	}
> diff --git a/tcp_buf.h b/tcp_buf.h
> index 3db4c56..d3d0d7f 100644
> --- a/tcp_buf.h
> +++ b/tcp_buf.h
> @@ -13,4 +13,6 @@ void tcp_payload_flush(struct ctx *c);
>  int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn);
>  int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags);
>  
> +extern struct iphdr		tcp_payload_ip4;
> +extern struct ipv6hdr		tcp_payload_ip6;
>  #endif  /*TCP_BUF_H */

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/4] tcp: unify l2 TCPv4 and TCPv6 queues and structures
  2024-09-06 21:34 ` [PATCH 3/4] tcp: unify l2 TCPv4 and TCPv6 queues and structures Jon Maloy
@ 2024-09-09  1:17   ` David Gibson
  0 siblings, 0 replies; 9+ messages in thread
From: David Gibson @ 2024-09-09  1:17 UTC (permalink / raw)
  To: Jon Maloy; +Cc: passt-dev, sbrivio, lvivier, dgibson

[-- Attachment #1: Type: text/plain, Size: 11606 bytes --]

On Fri, Sep 06, 2024 at 05:34:26PM -0400, Jon Maloy wrote:
> Following the preparations in the previous commits, we can now
> remove the queues dedicated for TCPv6 and move that traffic over
> to the queues currently used for TCPv4.
> 
> Signed-off-by: Jon Maloy <jmaloy@redhat.com>
> ---
>  tcp.c     |   8 ++-
>  tcp_buf.c | 158 +++++++++---------------------------------------------
>  tcp_buf.h |   1 +
>  3 files changed, 28 insertions(+), 139 deletions(-)
> 
> diff --git a/tcp.c b/tcp.c
> index 006e503..19cf9e5 100644
> --- a/tcp.c
> +++ b/tcp.c
> @@ -998,12 +998,14 @@ size_t tcp_l2_buf_fill_headers(const struct tcp_tap_conn *conn,
>  
>  	if (a4) {
>  		iov[TCP_IOV_IP].iov_len = sizeof(struct iphdr);
> +		tcp4_eth_src.h_proto = htons_constant(ETH_P_IP);
>  		return tcp_fill_headers4(conn, iov[TCP_IOV_TAP].iov_base,
>  					 iov[TCP_IOV_IP].iov_base,
>  					 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
>  					 check, seq);
>  	} else {
>  		iov[TCP_IOV_IP].iov_len = sizeof(struct ipv6hdr);
> +		tcp4_eth_src.h_proto = htons_constant(ETH_P_IPV6);
>  		return tcp_fill_headers6(conn, iov[TCP_IOV_TAP].iov_base,
>  					 iov[TCP_IOV_IP].iov_base,
>  					 iov[TCP_IOV_PAYLOAD].iov_base, dlen,
> @@ -2508,11 +2510,7 @@ int tcp_init(struct ctx *c)
>  {
>  	ASSERT(!c->no_tcp);
>  
> -	if (c->ifi4)
> -		tcp_sock4_iov_init(c);
> -
> -	if (c->ifi6)
> -		tcp_sock6_iov_init(c);
> +	tcp_sock4_iov_init(c);

I'd prefer to see the renames to remove the now incorrect '4' and '6'
folded in here, rather than waiting for the next patch.
>  
>  	memset(init_sock_pool4,		0xff,	sizeof(init_sock_pool4));
>  	memset(init_sock_pool6,		0xff,	sizeof(init_sock_pool6));
> diff --git a/tcp_buf.c b/tcp_buf.c
> index 6e6549f..92c4d73 100644
> --- a/tcp_buf.c
> +++ b/tcp_buf.c
> @@ -80,7 +80,7 @@ struct tcp_flags_t {
>  #endif
>  
>  /* Ethernet header for IPv4 frames */
> -static struct ethhdr		tcp4_eth_src;
> +struct ethhdr		tcp4_eth_src;

Ditto.

Also, as with the IP header, for UDP we still have separate ethernet
header structures for v4 and v6 and just update the IOV per-packet to
point to the right one.

>  
>  static struct tap_hdr		tcp4_payload_tap_hdr[TCP_FRAMES_MEM];
>  /* IPv4 headers */
> @@ -104,36 +104,14 @@ static struct tcp_flags_t	tcp4_flags[TCP_FRAMES_MEM];
>  static unsigned int tcp4_flags_used;
>  
>  /* Ethernet header for IPv6 frames */
> -static struct ethhdr		tcp6_eth_src;
> -
> -static struct tap_hdr		tcp6_payload_tap_hdr[TCP_FRAMES_MEM];
> -/* IPv6 headers */
> -struct ipv6hdr		tcp_payload_ip6;
> -static struct iphdr_t		tcp6_payload_ip[TCP_FRAMES_MEM];
> -/* TCP headers and data for IPv6 frames */
> -static struct tcp_payload_t	tcp6_payload[TCP_FRAMES_MEM];
> -
> -static_assert(MSS6 <= sizeof(tcp6_payload[0].data), "MSS6 is greater than 65516");
> -
> -/* References tracking the owner connection of frames in the tap outqueue */
> -static struct tcp_tap_conn *tcp6_frame_conns[TCP_FRAMES_MEM];
> -static unsigned int tcp6_payload_used;
> -
> -static struct tap_hdr		tcp6_flags_tap_hdr[TCP_FRAMES_MEM];
> -/* IPv6 headers for TCP segment without payload */
> -static struct iphdr_t		tcp6_flags_ip[TCP_FRAMES_MEM];
> -/* TCP segment without payload for IPv6 frames */
> -static struct tcp_flags_t	tcp6_flags[TCP_FRAMES_MEM];
> -
> -static unsigned int tcp6_flags_used;
> +struct ipv6hdr tcp_payload_ip6;
>  
>  /* recvmsg()/sendmsg() data for tap */
>  static struct iovec	iov_sock		[TCP_FRAMES_MEM + 1];
>  
>  static struct iovec	tcp4_l2_iov		[TCP_FRAMES_MEM][TCP_NUM_IOVS];
> -static struct iovec	tcp6_l2_iov		[TCP_FRAMES_MEM][TCP_NUM_IOVS];
>  static struct iovec	tcp4_l2_flags_iov	[TCP_FRAMES_MEM][TCP_NUM_IOVS];
> -static struct iovec	tcp6_l2_flags_iov	[TCP_FRAMES_MEM][TCP_NUM_IOVS];
> +
>  /**
>   * tcp_update_l2_buf() - Update Ethernet header buffers with addresses
>   * @eth_d:	Ethernet destination address, NULL if unchanged
> @@ -142,7 +120,6 @@ static struct iovec	tcp6_l2_flags_iov	[TCP_FRAMES_MEM][TCP_NUM_IOVS];
>  void tcp_update_l2_buf(const unsigned char *eth_d, const unsigned char *eth_s)
>  {
>  	eth_update_mac(&tcp4_eth_src, eth_d, eth_s);
> -	eth_update_mac(&tcp6_eth_src, eth_d, eth_s);
>  }
>  
>  /**
> @@ -191,61 +168,12 @@ void tcp_sock4_iov_init(const struct ctx *c)
>  	}
>  }
>  
> -/**
> - * tcp_sock6_iov_init() - Initialise scatter-gather L2 buffers for IPv6 sockets
> - * @c:		Execution context
> - */
> -void tcp_sock6_iov_init(const struct ctx *c)
> -{
> -	struct ipv6hdr ip6 = L2_BUF_IP6_INIT(IPPROTO_TCP);
> -	struct iovec *iov;
> -	int i;
> -
> -	tcp6_eth_src.h_proto = htons_constant(ETH_P_IPV6);
> -	tcp_payload_ip6 = ip6;
> -
> -	for (i = 0; i < ARRAY_SIZE(tcp6_payload); i++) {
> -		tcp6_payload[i].th.doff = sizeof(struct tcphdr) / 4;
> -		tcp6_payload[i].th.ack = 1;
> -	}
> -
> -	for (i = 0; i < ARRAY_SIZE(tcp6_flags); i++) {
> -		tcp6_flags_ip[i].ip6 = ip6;
> -		tcp6_flags[i].th.doff = sizeof(struct tcphdr) / 4;
> -		tcp6_flags[i].th .ack = 1;
> -	}
> -
> -	for (i = 0; i < TCP_FRAMES_MEM; i++) {
> -		iov = tcp6_l2_iov[i];
> -
> -		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_payload_tap_hdr[i]);
> -		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
> -		iov[TCP_IOV_IP].iov_base = &tcp6_payload_ip[i];
> -		iov[TCP_IOV_IP].iov_len = sizeof(tcp6_payload_ip[i].ip6);
> -		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_payload[i];
> -	}
> -
> -	for (i = 0; i < TCP_FRAMES_MEM; i++) {
> -		iov = tcp6_l2_flags_iov[i];
> -
> -		iov[TCP_IOV_TAP] = tap_hdr_iov(c, &tcp6_flags_tap_hdr[i]);
> -		iov[TCP_IOV_ETH] = IOV_OF_LVALUE(tcp6_eth_src);
> -		iov[TCP_IOV_IP].iov_base = &tcp6_flags_ip[i];
> -		iov[TCP_IOV_IP].iov_len = sizeof(tcp6_flags_ip[i].ip6);
> -		iov[TCP_IOV_PAYLOAD].iov_base = &tcp6_flags[i];
> -	}
> -}
> -
>  /**
>   * tcp_flags_flush() - Send out buffers for segments with no data (flags)
>   * @c:		Execution context
>   */
>  void tcp_flags_flush(const struct ctx *c)
>  {
> -	tap_send_frames(c, &tcp6_l2_flags_iov[0][0], TCP_NUM_IOVS,
> -			tcp6_flags_used);
> -	tcp6_flags_used = 0;
> -
>  	tap_send_frames(c, &tcp4_l2_flags_iov[0][0], TCP_NUM_IOVS,
>  			tcp4_flags_used);
>  	tcp4_flags_used = 0;
> @@ -287,14 +215,6 @@ void tcp_payload_flush(struct ctx *c)
>  {
>  	size_t m;
>  
> -	m = tap_send_frames(c, &tcp6_l2_iov[0][0], TCP_NUM_IOVS,
> -			    tcp6_payload_used);
> -	if (m != tcp6_payload_used) {
> -		tcp_revert_seq(c, &tcp6_frame_conns[m], &tcp6_l2_iov[m],
> -			       tcp6_payload_used - m);
> -	}
> -	tcp6_payload_used = 0;
> -
>  	m = tap_send_frames(c, &tcp4_l2_iov[0][0], TCP_NUM_IOVS,
>  			    tcp4_payload_used);
>  	if (m != tcp4_payload_used) {
> @@ -321,21 +241,13 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  	uint32_t seq;
>  	int ret;
>  
> -	if (CONN_V4(conn))
> -		iov = tcp4_l2_flags_iov[tcp4_flags_used++];
> -	else
> -		iov = tcp6_l2_flags_iov[tcp6_flags_used++];
> -
> +	iov = tcp4_l2_flags_iov[tcp4_flags_used++];
>  	payload = iov[TCP_IOV_PAYLOAD].iov_base;
> -
>  	seq = conn->seq_to_tap;
>  	ret = tcp_prepare_flags(c, conn, flags, &payload->th,
>  				payload->opts, &optlen);
>  	if (ret <= 0) {
> -		if (CONN_V4(conn))
> -			tcp4_flags_used--;
> -		else
> -			tcp6_flags_used--;
> +		tcp4_flags_used--;
>  		return ret;
>  	}
>  
> @@ -346,10 +258,7 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  		struct iovec *dup_iov;
>  		int i;
>  
> -		if (CONN_V4(conn))
> -			dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++];
> -		else
> -			dup_iov = tcp6_l2_flags_iov[tcp6_flags_used++];
> +		dup_iov = tcp4_l2_flags_iov[tcp4_flags_used++];
>  
>  		for (i = 0; i < TCP_NUM_IOVS; i++)
>  			memcpy(dup_iov[i].iov_base, iov[i].iov_base,
> @@ -357,13 +266,8 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  		dup_iov[TCP_IOV_PAYLOAD].iov_len = iov[TCP_IOV_PAYLOAD].iov_len;
>  	}
>  
> -	if (CONN_V4(conn)) {
> -		if (tcp4_flags_used > TCP_FRAMES_MEM - 2)
> -			tcp_flags_flush(c);
> -	} else {
> -		if (tcp6_flags_used > TCP_FRAMES_MEM - 2)
> -			tcp_flags_flush(c);
> -	}
> +	if (tcp4_flags_used > TCP_FRAMES_MEM - 2)
> +		tcp_flags_flush(c);
>  
>  	return 0;
>  }
> @@ -379,36 +283,26 @@ int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags)
>  static void tcp_data_to_tap(struct ctx *c, struct tcp_tap_conn *conn,
>  			    ssize_t dlen, int no_csum, uint32_t seq)
>  {
> +	struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1];
> +	const uint16_t *check = NULL;
>  	struct iovec *iov;
>  	size_t l4len;
>  
>  	conn->seq_to_tap = seq + dlen;
>  
> -	if (CONN_V4(conn)) {
> -		struct iovec *iov_prev = tcp4_l2_iov[tcp4_payload_used - 1];
> -		const uint16_t *check = NULL;
> -
> -		if (no_csum) {
> -			struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base;
> -			check = &iph->check;
> -		}
> +	if (CONN_V4(conn) && no_csum) {
> +		struct iphdr *iph = iov_prev[TCP_IOV_IP].iov_base;
>  
> -		tcp4_frame_conns[tcp4_payload_used] = conn;
> -
> -		iov = tcp4_l2_iov[tcp4_payload_used++];
> -		l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq);
> -		iov[TCP_IOV_PAYLOAD].iov_len = l4len;
> -		if (tcp4_payload_used > TCP_FRAMES_MEM - 1)
> -			tcp_payload_flush(c);
> -	} else if (CONN_V6(conn)) {
> -		tcp6_frame_conns[tcp6_payload_used] = conn;
> -
> -		iov = tcp6_l2_iov[tcp6_payload_used++];
> -		l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, NULL, seq);
> -		iov[TCP_IOV_PAYLOAD].iov_len = l4len;
> -		if (tcp6_payload_used > TCP_FRAMES_MEM - 1)
> -			tcp_payload_flush(c);
> +		check = &iph->check;
>  	}
> +
> +	tcp4_frame_conns[tcp4_payload_used] = conn;
> +
> +	iov = tcp4_l2_iov[tcp4_payload_used++];
> +	l4len = tcp_l2_buf_fill_headers(conn, iov, dlen, check, seq);
> +	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
> +	if (tcp4_payload_used > TCP_FRAMES_MEM - 1)
> +		tcp_payload_flush(c);
>  }
>  
>  /**
> @@ -472,19 +366,15 @@ int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn)
>  		mh_sock.msg_iovlen = fill_bufs;
>  	}
>  
> -	if (( v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM) ||
> -	    (!v4 && tcp6_payload_used + fill_bufs > TCP_FRAMES_MEM)) {
> +	if ((v4 && tcp4_payload_used + fill_bufs > TCP_FRAMES_MEM)) {
>  		tcp_payload_flush(c);
>  
>  		/* Silence Coverity CWE-125 false positive */
> -		tcp4_payload_used = tcp6_payload_used = 0;
> +		tcp4_payload_used = 0;
>  	}
>  
>  	for (i = 0, iov = iov_sock + 1; i < fill_bufs; i++, iov++) {
> -		if (v4)
> -			iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data;
> -		else
> -			iov->iov_base = &tcp6_payload[tcp6_payload_used + i].data;
> +		iov->iov_base = &tcp4_payload[tcp4_payload_used + i].data;
>  		iov->iov_len = mss;
>  	}
>  	if (iov_rem)
> diff --git a/tcp_buf.h b/tcp_buf.h
> index d3d0d7f..d7cdbaf 100644
> --- a/tcp_buf.h
> +++ b/tcp_buf.h
> @@ -13,6 +13,7 @@ void tcp_payload_flush(struct ctx *c);
>  int tcp_buf_data_from_sock(struct ctx *c, struct tcp_tap_conn *conn);
>  int tcp_buf_send_flag(struct ctx *c, struct tcp_tap_conn *conn, int flags);
>  
> +extern struct ethhdr		tcp4_eth_src;
>  extern struct iphdr		tcp_payload_ip4;
>  extern struct ipv6hdr		tcp_payload_ip6;
>  #endif  /*TCP_BUF_H */

-- 
David Gibson (he or they)	| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you, not the other way
				| around.
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/4] tcp: unify IPv4 and IPv6 l2 tap queues
  2024-09-06 21:34 [PATCH 0/4] tcp: unify IPv4 and IPv6 l2 tap queues Jon Maloy
                   ` (3 preceding siblings ...)
  2024-09-06 21:34 ` [PATCH 4/4] tcp: change prefix tcp4_ to tcp_ where applicable Jon Maloy
@ 2024-09-09  8:31 ` Stefano Brivio
  4 siblings, 0 replies; 9+ messages in thread
From: Stefano Brivio @ 2024-09-09  8:31 UTC (permalink / raw)
  To: Jon Maloy; +Cc: passt-dev, lvivier, dgibson

On Fri,  6 Sep 2024 17:34:23 -0400
Jon Maloy <jmaloy@redhat.com> wrote:

> This should save us some memory and code.
> 
> Jon Maloy (4):
>   tcp: create unified struct for IPv4 and IPv6 header
>   tcp: update ip address in l2 tap queues on the fly
>   tcp: unify l2 TCPv4 and TCPv6 queues and structures
>   tcp: change prefix tcp4_ to tcp_ where applicable
> 
>  tcp.c     |  22 ++---
>  tcp_buf.c | 259 +++++++++++++++++-------------------------------------
>  tcp_buf.h |   3 +
>  3 files changed, 98 insertions(+), 186 deletions(-)

The saving in lines of code is nice (68 lines excluding comments), I'm
not quite sure about memory savings and overhead.

Memory-wise, if one IP version is disabled, we won't initialise any
related buffer, so in that case we won't save any memory with these
changes.

If both IPv4 and IPv6 are enabled, we'll halve the memory usage for
inbound payload buffers (about 8 MiB instead of 16 MiB):

  $ nm -td -P passt.avx2 | grep "tcp6_payload "
  tcp6_payload b 48607968 8388608

but that also halves the total amount of available buffers. If we have
occasional usage of one IP family, these changes decrease waste, but if
both IPv4 and IPv6 are used consistently, this simply means we're
cutting the amount of buffers in half. However, we could double
TCP_FRAMES_MEM and have, as a result, a more optimised management of
buffers.

But I'm not sure it makes sense considered the potential overhead.

That is, there's a reason why I originally added two separate sets of
buffers for IPv4 and IPv6: to enable pre-computations of headers, so
that we avoid populating the full headers at every received packet.
This series reverses that.

There's the possibility that improvements in data locality and lower
data cache usage outweigh that (as David mentioned in his comment to
1/4), but that should be demonstrated first.

By the way, I tested this series, the clang-tidy test fails (see
David's comment to 2/4) and the "TCP throughput over IPv6: guest to
host" test consistently hangs as we switch to the 65520 bytes MTU. It
looks like the iperf3 client fails to connect altogether, but I
didn't really investigate.

-- 
Stefano


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-09-09  8:31 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-09-06 21:34 [PATCH 0/4] tcp: unify IPv4 and IPv6 l2 tap queues Jon Maloy
2024-09-06 21:34 ` [PATCH 1/4] tcp: create unified struct for IPv4 and IPv6 header Jon Maloy
2024-09-09  1:04   ` David Gibson
2024-09-06 21:34 ` [PATCH 2/4] tcp: update ip address in l2 tap queues on the fly Jon Maloy
2024-09-09  1:08   ` David Gibson
2024-09-06 21:34 ` [PATCH 3/4] tcp: unify l2 TCPv4 and TCPv6 queues and structures Jon Maloy
2024-09-09  1:17   ` David Gibson
2024-09-06 21:34 ` [PATCH 4/4] tcp: change prefix tcp4_ to tcp_ where applicable Jon Maloy
2024-09-09  8:31 ` [PATCH 0/4] tcp: unify IPv4 and IPv6 l2 tap queues Stefano Brivio

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).