public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: jmaloy@redhat.com, David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v8 03/27] tcp, flow: Remove redundant information, repack connection structures
Date: Thu, 18 Jul 2024 15:26:29 +1000	[thread overview]
Message-ID: <20240718052653.3241585-4-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240718052653.3241585-1-david@gibson.dropbear.id.au>

Some information we explicitly store in the TCP connection is now
duplicated in the common flow structure.  Access it from there instead, and
remove it from the TCP specific structure.   With that done we can reorder
both the "tap" and "splice" TCP structures a bit to get better packing for
the new combined flow table entries.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c          | 52 ++++++++++++++++++++++++++------------------------
 tcp_conn.h     | 40 +++++++++++++++-----------------------
 tcp_internal.h |  6 +++++-
 3 files changed, 47 insertions(+), 51 deletions(-)

diff --git a/tcp.c b/tcp.c
index 914a0746..3d3df4c9 100644
--- a/tcp.c
+++ b/tcp.c
@@ -333,8 +333,6 @@
 
 #define ACK_IF_NEEDED	0		/* See tcp_send_flag() */
 
-#define TAPSIDE(conn_)	((conn_)->f.pif[1] == PIF_TAP)
-
 #define CONN_IS_CLOSING(conn)						\
 	(((conn)->events & ESTABLISHED) &&				\
 	 ((conn)->events & (SOCK_FIN_RCVD | TAP_FIN_RCVD)))
@@ -673,10 +671,11 @@ void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
  */
 static int tcp_rtt_dst_low(const struct tcp_tap_conn *conn)
 {
+	const struct flowside *tapside = TAPFLOW(conn);
 	int i;
 
 	for (i = 0; i < LOW_RTT_TABLE_SIZE; i++)
-		if (inany_equals(&conn->faddr, low_rtt_dst + i))
+		if (inany_equals(&tapside->faddr, low_rtt_dst + i))
 			return 1;
 
 	return 0;
@@ -691,6 +690,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
 			      const struct tcp_info *tinfo)
 {
 #ifdef HAS_MIN_RTT
+	const struct flowside *tapside = TAPFLOW(conn);
 	int i, hole = -1;
 
 	if (!tinfo->tcpi_min_rtt ||
@@ -698,7 +698,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
 		return;
 
 	for (i = 0; i < LOW_RTT_TABLE_SIZE; i++) {
-		if (inany_equals(&conn->faddr, low_rtt_dst + i))
+		if (inany_equals(&tapside->faddr, low_rtt_dst + i))
 			return;
 		if (hole == -1 && IN6_IS_ADDR_UNSPECIFIED(low_rtt_dst + i))
 			hole = i;
@@ -710,7 +710,7 @@ static void tcp_rtt_dst_check(const struct tcp_tap_conn *conn,
 	if (hole == -1)
 		return;
 
-	low_rtt_dst[hole++] = conn->faddr;
+	low_rtt_dst[hole++] = tapside->faddr;
 	if (hole == LOW_RTT_TABLE_SIZE)
 		hole = 0;
 	inany_from_af(low_rtt_dst + hole, AF_INET6, &in6addr_any);
@@ -865,8 +865,10 @@ static int tcp_hash_match(const struct tcp_tap_conn *conn,
 			  const union inany_addr *faddr,
 			  in_port_t eport, in_port_t fport)
 {
-	if (inany_equals(&conn->faddr, faddr) &&
-	    conn->eport == eport && conn->fport == fport)
+	const struct flowside *tapside = TAPFLOW(conn);
+
+	if (inany_equals(&tapside->faddr, faddr) &&
+	    tapside->eport == eport && tapside->fport == fport)
 		return 1;
 
 	return 0;
@@ -900,7 +902,10 @@ static uint64_t tcp_hash(const struct ctx *c, const union inany_addr *faddr,
 static uint64_t tcp_conn_hash(const struct ctx *c,
 			      const struct tcp_tap_conn *conn)
 {
-	return tcp_hash(c, &conn->faddr, conn->eport, conn->fport);
+	const struct flowside *tapside = TAPFLOW(conn);
+
+	return tcp_hash(c, &tapside->faddr, tapside->eport,
+			tapside->fport);
 }
 
 /**
@@ -1035,10 +1040,12 @@ void tcp_defer_handler(struct ctx *c)
  * @seq:	Sequence number
  */
 static void tcp_fill_header(struct tcphdr *th,
-			       const struct tcp_tap_conn *conn, uint32_t seq)
+			    const struct tcp_tap_conn *conn, uint32_t seq)
 {
-	th->source = htons(conn->fport);
-	th->dest = htons(conn->eport);
+	const struct flowside *tapside = TAPFLOW(conn);
+
+	th->source = htons(tapside->fport);
+	th->dest = htons(tapside->eport);
 	th->seq = htonl(seq);
 	th->ack_seq = htonl(conn->seq_ack_to_tap);
 	if (conn->events & ESTABLISHED)	{
@@ -1070,7 +1077,8 @@ static size_t tcp_fill_headers4(const struct ctx *c,
 				size_t dlen, const uint16_t *check,
 				uint32_t seq)
 {
-	const struct in_addr *a4 = inany_v4(&conn->faddr);
+	const struct flowside *tapside = TAPFLOW(conn);
+	const struct in_addr *a4 = inany_v4(&tapside->faddr);
 	size_t l4len = dlen + sizeof(*th);
 	size_t l3len = l4len + sizeof(*iph);
 
@@ -1112,10 +1120,11 @@ static size_t tcp_fill_headers6(const struct ctx *c,
 				struct ipv6hdr *ip6h, struct tcphdr *th,
 				size_t dlen, uint32_t seq)
 {
+	const struct flowside *tapside = TAPFLOW(conn);
 	size_t l4len = dlen + sizeof(*th);
 
 	ip6h->payload_len = htons(l4len);
-	ip6h->saddr = conn->faddr.a6;
+	ip6h->saddr = tapside->faddr.a6;
 	if (IN6_IS_ADDR_LINKLOCAL(&ip6h->saddr))
 		ip6h->daddr = c->ip6.addr_ll_seen;
 	else
@@ -1154,7 +1163,8 @@ size_t tcp_l2_buf_fill_headers(const struct ctx *c,
 			       struct iovec *iov, size_t dlen,
 			       const uint16_t *check, uint32_t seq)
 {
-	const struct in_addr *a4 = inany_v4(&conn->faddr);
+	const struct flowside *tapside = TAPFLOW(conn);
+	const struct in_addr *a4 = inany_v4(&tapside->faddr);
 
 	if (a4) {
 		return tcp_fill_headers4(c, conn, iov[TCP_IOV_TAP].iov_base,
@@ -1465,6 +1475,7 @@ static void tcp_seq_init(const struct ctx *c, struct tcp_tap_conn *conn,
 			 const struct timespec *now)
 {
 	struct siphash_state state = SIPHASH_INIT(c->hash_secret);
+	const struct flowside *tapside = TAPFLOW(conn);
 	union inany_addr aany;
 	uint64_t hash;
 	uint32_t ns;
@@ -1474,10 +1485,10 @@ static void tcp_seq_init(const struct ctx *c, struct tcp_tap_conn *conn,
 	else
 		inany_from_af(&aany, AF_INET6, &c->ip6.addr);
 
-	inany_siphash_feed(&state, &conn->faddr);
+	inany_siphash_feed(&state, &tapside->faddr);
 	inany_siphash_feed(&state, &aany);
 	hash = siphash_final(&state, 36,
-			     (uint64_t)conn->fport << 16 | conn->eport);
+			     (uint64_t)tapside->fport << 16 | tapside->eport);
 
 	/* 32ns ticks, overflows 32 bits every 137s */
 	ns = (now->tv_sec * 1000000000 + now->tv_nsec) >> 5;
@@ -1766,11 +1777,6 @@ static void tcp_conn_from_tap(struct ctx *c, sa_family_t af,
 	if (!(conn->wnd_from_tap = (htons(th->window) >> conn->ws_from_tap)))
 		conn->wnd_from_tap = 1;
 
-	inany_from_af(&conn->faddr, af, daddr);
-
-	conn->fport = dstport;
-	conn->eport = srcport;
-
 	conn->seq_init_from_tap = ntohl(th->seq);
 	conn->seq_from_tap = conn->seq_init_from_tap + 1;
 	conn->seq_ack_to_tap = conn->seq_from_tap;
@@ -2314,10 +2320,6 @@ static void tcp_tap_conn_from_sock(struct ctx *c, in_port_t dstport,
 	conn->ws_to_tap = conn->ws_from_tap = 0;
 	conn_event(c, conn, SOCK_ACCEPTED);
 
-	conn->faddr = saddr;
-	conn->fport = srcport;
-	conn->eport = dstport;
-
 	tcp_seq_init(c, conn, now);
 	tcp_hash_insert(c, conn);
 
diff --git a/tcp_conn.h b/tcp_conn.h
index f80ef67b..4e7c57a4 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -13,19 +13,16 @@
  * struct tcp_tap_conn - Descriptor for a TCP connection (not spliced)
  * @f:			Generic flow information
  * @in_epoll:		Is the connection in the epoll set?
+ * @retrans:		Number of retransmissions occurred due to ACK_TIMEOUT
+ * @ws_from_tap:	Window scaling factor advertised from tap/guest
+ * @ws_to_tap:		Window scaling factor advertised to tap/guest
  * @tap_mss:		MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
  * @sock:		Socket descriptor number
  * @events:		Connection events, implying connection states
  * @timer:		timerfd descriptor for timeout events
  * @flags:		Connection flags representing internal attributes
- * @retrans:		Number of retransmissions occurred due to ACK_TIMEOUT
- * @ws_from_tap:	Window scaling factor advertised from tap/guest
- * @ws_to_tap:		Window scaling factor advertised to tap/guest
  * @sndbuf:		Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
  * @seq_dup_ack_approx:	Last duplicate ACK number sent to tap
- * @faddr:		Guest side forwarding address (guest's remote address)
- * @eport:		Guest side endpoint port (guest's local port)
- * @fport:		Guest side forwarding port (guest's remote port)
  * @wnd_from_tap:	Last window size from tap, unscaled (as received)
  * @wnd_to_tap:		Sending window advertised to tap, unscaled (as sent)
  * @seq_to_tap:		Next sequence for packets to tap
@@ -49,6 +46,10 @@ struct tcp_tap_conn {
 	unsigned int	ws_from_tap	:TCP_WS_BITS;
 	unsigned int	ws_to_tap	:TCP_WS_BITS;
 
+#define TCP_MSS_BITS			14
+	unsigned int	tap_mss		:TCP_MSS_BITS;
+#define MSS_SET(conn, mss)	(conn->tap_mss = (mss >> (16 - TCP_MSS_BITS)))
+#define MSS_GET(conn)		(conn->tap_mss << (16 - TCP_MSS_BITS))
 
 	int		sock		:FD_REF_BITS;
 
@@ -77,13 +78,6 @@ struct tcp_tap_conn {
 #define ACK_TO_TAP_DUE		BIT(3)
 #define ACK_FROM_TAP_DUE	BIT(4)
 
-
-#define TCP_MSS_BITS			14
-	unsigned int	tap_mss		:TCP_MSS_BITS;
-#define MSS_SET(conn, mss)	(conn->tap_mss = (mss >> (16 - TCP_MSS_BITS)))
-#define MSS_GET(conn)		(conn->tap_mss << (16 - TCP_MSS_BITS))
-
-
 #define SNDBUF_BITS		24
 	unsigned int	sndbuf		:SNDBUF_BITS;
 #define SNDBUF_SET(conn, bytes)	(conn->sndbuf = ((bytes) >> (32 - SNDBUF_BITS)))
@@ -91,11 +85,6 @@ struct tcp_tap_conn {
 
 	uint8_t		seq_dup_ack_approx;
 
-
-	union inany_addr faddr;
-	in_port_t	eport;
-	in_port_t	fport;
-
 	uint16_t	wnd_from_tap;
 	uint16_t	wnd_to_tap;
 
@@ -109,22 +98,24 @@ struct tcp_tap_conn {
 /**
  * struct tcp_splice_conn - Descriptor for a spliced TCP connection
  * @f:			Generic flow information
- * @in_epoll:		Is the connection in the epoll set?
  * @s:			File descriptor for sockets
  * @pipe:		File descriptors for pipes
- * @events:		Events observed/actions performed on connection
- * @flags:		Connection flags (attributes, not events)
  * @read:		Bytes read (not fully written to other side in one shot)
  * @written:		Bytes written (not fully written from one other side read)
-*/
+ * @events:		Events observed/actions performed on connection
+ * @flags:		Connection flags (attributes, not events)
+ * @in_epoll:		Is the connection in the epoll set?
+ */
 struct tcp_splice_conn {
 	/* Must be first element */
 	struct flow_common f;
 
-	bool in_epoll	:1;
 	int s[SIDES];
 	int pipe[SIDES][2];
 
+	uint32_t read[SIDES];
+	uint32_t written[SIDES];
+
 	uint8_t events;
 #define SPLICE_CLOSED			0
 #define SPLICE_CONNECT			BIT(0)
@@ -139,8 +130,7 @@ struct tcp_splice_conn {
 #define RCVLOWAT_ACT(sidei_)		((sidei_) ? BIT(4) : BIT(3))
 #define CLOSING				BIT(5)
 
-	uint32_t read[SIDES];
-	uint32_t written[SIDES];
+	bool in_epoll	:1;
 };
 
 /* Socket pools */
diff --git a/tcp_internal.h b/tcp_internal.h
index 51aaa169..4f61e5c3 100644
--- a/tcp_internal.h
+++ b/tcp_internal.h
@@ -39,7 +39,11 @@
 #define OPT_SACKP	4
 #define OPT_SACK	5
 #define OPT_TS		8
-#define CONN_V4(conn)		(!!inany_v4(&(conn)->faddr))
+
+#define TAPSIDE(conn_)	((conn_)->f.pif[1] == PIF_TAP)
+#define TAPFLOW(conn_)	(&((conn_)->f.side[TAPSIDE(conn_)]))
+
+#define CONN_V4(conn)		(!!inany_v4(&TAPFLOW(conn)->faddr))
 #define CONN_V6(conn)		(!CONN_V4(conn))
 
 /*
-- 
@@ -39,7 +39,11 @@
 #define OPT_SACKP	4
 #define OPT_SACK	5
 #define OPT_TS		8
-#define CONN_V4(conn)		(!!inany_v4(&(conn)->faddr))
+
+#define TAPSIDE(conn_)	((conn_)->f.pif[1] == PIF_TAP)
+#define TAPFLOW(conn_)	(&((conn_)->f.side[TAPSIDE(conn_)]))
+
+#define CONN_V4(conn)		(!!inany_v4(&TAPFLOW(conn)->faddr))
 #define CONN_V6(conn)		(!CONN_V4(conn))
 
 /*
-- 
2.45.2


  parent reply	other threads:[~2024-07-18  5:27 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-18  5:26 [PATCH v8 00/27] Unified flow table David Gibson
2024-07-18  5:26 ` [PATCH v8 01/27] flow: Common address information for initiating side David Gibson
2024-07-18  5:26 ` [PATCH v8 02/27] flow: Common address information for target side David Gibson
2024-07-18  5:26 ` David Gibson [this message]
2024-07-18  5:26 ` [PATCH v8 04/27] tcp: Obtain guest address from flowside David Gibson
2024-07-18  5:26 ` [PATCH v8 05/27] tcp: Manage outbound address via flow table David Gibson
2024-07-18  5:26 ` [PATCH v8 06/27] tcp: Simplify endpoint validation using flowside information David Gibson
2024-07-18  5:26 ` [PATCH v8 07/27] tcp_splice: Eliminate SPLICE_V6 flag David Gibson
2024-07-18  5:26 ` [PATCH v8 08/27] tcp, flow: Replace TCP specific hash function with general flow hash David Gibson
2024-07-18  5:26 ` [PATCH v8 09/27] flow, tcp: Generalise TCP hash table to general flow hash table David Gibson
2024-07-18  5:26 ` [PATCH v8 10/27] tcp: Re-use flow hash for initial sequence number generation David Gibson
2024-07-18  5:26 ` [PATCH v8 11/27] icmp: Remove redundant id field from flow table entry David Gibson
2024-07-18  5:26 ` [PATCH v8 12/27] icmp: Obtain destination addresses from the flowsides David Gibson
2024-07-18  5:26 ` [PATCH v8 13/27] icmp: Look up ping flows using flow hash David Gibson
2024-07-18  5:26 ` [PATCH v8 14/27] icmp: Eliminate icmp_id_map David Gibson
2024-07-18  5:26 ` [PATCH v8 15/27] flow: Helper to create sockets based on flowside David Gibson
2024-07-18  5:26 ` [PATCH v8 16/27] icmp: Manage outbound socket address via flow table David Gibson
2024-07-18  5:26 ` [PATCH v8 17/27] flow, tcp: Flow based NAT and port forwarding for TCP David Gibson
2024-07-18  5:26 ` [PATCH v8 18/27] flow, icmp: Use general flow forwarding rules for ICMP David Gibson
2024-07-18  5:26 ` [PATCH v8 19/27] fwd: Update flow forwarding logic for UDP David Gibson
2024-07-18  5:26 ` [PATCH v8 20/27] udp: Create flows for datagrams from originating sockets David Gibson
2024-07-18  5:26 ` [PATCH v8 21/27] udp: Handle "spliced" datagrams with per-flow sockets David Gibson
2024-07-18  5:26 ` [PATCH v8 22/27] udp: Remove obsolete splice tracking David Gibson
2024-07-18  5:26 ` [PATCH v8 23/27] udp: Find or create flows for datagrams from tap interface David Gibson
2024-07-18  5:26 ` [PATCH v8 24/27] udp: Direct datagrams from host to guest via flow table David Gibson
2024-07-18  5:26 ` [PATCH v8 25/27] udp: Remove obsolete socket tracking David Gibson
2024-07-18  5:26 ` [PATCH v8 26/27] udp: Remove rdelta port forwarding maps David Gibson
2024-07-18  5:26 ` [PATCH v8 27/27] udp: Rename UDP listening sockets David Gibson
2024-07-19 19:20 ` [PATCH v8 00/27] Unified flow table Stefano Brivio
2024-07-20  3:37   ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240718052653.3241585-4-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=jmaloy@redhat.com \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).