public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 09/10] tcp: Maintain host flowside for connections
Date: Mon, 28 Aug 2023 15:41:45 +1000	[thread overview]
Message-ID: <20230828054146.48673-10-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20230828054146.48673-1-david@gibson.dropbear.id.au>

We now maintain a struct flowside describing each TCP connection as it
appears to the guest.  We don't explicitly have the same information
for the connections as they appear to the host, however.  Rather, that
information is implicit in the state of the host side socket.  For
future generalisations of flow/connection tracking, we're going to
need to use this information more heavily, so properly populate the
other flowside in each flow table entry.

This does require an additional getsockname() call for each new connection.
We hope to optimise that away for at least some cases in future.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 flow.c     | 26 ++++++++++++++++++++++++++
 flow.h     |  1 +
 tcp.c      | 46 ++++++++++++++++++++++++++++++++++++++++++----
 tcp_conn.h |  4 ----
 4 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/flow.c b/flow.c
index 4521a43..f2a7377 100644
--- a/flow.c
+++ b/flow.c
@@ -7,6 +7,7 @@
 
 #include <unistd.h>
 #include <string.h>
+#include <errno.h>
 #include <arpa/inet.h>
 
 #include "util.h"
@@ -83,3 +84,28 @@ void flow_table_compact(struct ctx *c, union flow *hole)
 
 	memset(from, 0, sizeof(*from));
 }
+
+/** flowside_getsockname - Initialize flowside f{addr,port} from a bound socket
+ * @fs:		flowside to initialize
+ * @s:		bound socket
+ *
+ * #syscalls getsockname
+ */
+int flowside_getsockname(struct flowside *fs, int s)
+{
+	struct sockaddr_storage sa;
+	socklen_t sl = sizeof(sa);
+
+	/* FIXME: Workaround clang-tidy not realizing that getsockname() writes
+	 * the socket address.  See
+	 * https://github.com/llvm/llvm-project/issues/58992
+	 */
+	memset(&sa, 0, sizeof(struct sockaddr_in6));
+	if (getsockname(s, (struct sockaddr *)&sa, &sl) < 0)
+		return -errno;
+
+	inany_from_sockaddr(&fs->faddr, &fs->fport,
+			    (const struct sockaddr *)&sa);
+
+	return 0;
+}
diff --git a/flow.h b/flow.h
index b4f042b..4a27303 100644
--- a/flow.h
+++ b/flow.h
@@ -61,6 +61,7 @@ static inline bool flowside_complete(const struct flowside *fs)
 
 #define FLOWSIDE_STRLEN		(2*(INET6_ADDRSTRLEN+8) + 6)
 
+int flowside_getsockname(struct flowside *fs, int s);
 const char *flowside_fmt(const struct flowside *fs, char *buf, size_t size);
 
 /**
diff --git a/tcp.c b/tcp.c
index a9ddce6..297134f 100644
--- a/tcp.c
+++ b/tcp.c
@@ -397,6 +397,7 @@ struct tcp6_l2_head {	/* For MSS6 macro: keep in sync with tcp6_l2_buf_t */
 #define OPT_SACK	5
 #define OPT_TS		8
 
+#define SOCKSIDE(conn)		(&(conn)->f.side[0])
 #define TAPSIDE(conn)		(&(conn)->f.side[1])
 
 #define CONN_V4(conn)		(!!inany_v4(&TAPSIDE(conn)->faddr))
@@ -2020,6 +2021,19 @@ static void tcp_conn_from_tap(struct ctx *c,
 		conn_event(c, conn, TAP_SYN_ACK_SENT);
 	}
 
+	/* Initialise sock-side demiflow */
+	SOCKSIDE(conn)->eaddr = TAPSIDE(conn)->faddr;
+	SOCKSIDE(conn)->eport = TAPSIDE(conn)->fport;
+	if (flowside_getsockname(SOCKSIDE(conn), s) < 0) {
+		err("tcp: Failed to get local name for outgoing connection");
+		tcp_rst(c, conn);
+		return;
+	}
+
+	ASSERT(flowside_complete(SOCKSIDE(conn)));
+	debug("TCP: index %li, connection forwarded to socket, %s", FLOW_IDX(conn),
+	      flowside_fmt(SOCKSIDE(conn), fsstr, sizeof(fsstr)));
+
 	tcp_epoll_ctl(c, conn);
 }
 
@@ -2629,20 +2643,35 @@ static void tcp_snat_inbound(const struct ctx *c, union inany_addr *addr)
  * @s:		Accepted socket
  * @sa:		Peer socket address (from accept())
  * @now:	Current timestamp
+ *
+ * Return: true if able to create a tap connection, false otherwise
  */
-static void tcp_tap_conn_from_sock(struct ctx *c,
+static bool tcp_tap_conn_from_sock(struct ctx *c,
 				   union tcp_listen_epoll_ref ref,
 				   struct tcp_tap_conn *conn, int s,
 				   struct sockaddr *sa,
 				   const struct timespec *now)
 {
+	char fsstr[FLOWSIDE_STRLEN];
+
 	conn->f.type = FLOW_TCP;
 	conn->sock = s;
 	conn->timer = -1;
 	conn->ws_to_tap = conn->ws_from_tap = 0;
 	conn_event(c, conn, SOCK_ACCEPTED);
 
-	inany_from_sockaddr(&TAPSIDE(conn)->faddr, &TAPSIDE(conn)->fport, sa);
+	if (flowside_getsockname(SOCKSIDE(conn), s) < 0) {
+		err("tcp: Failed to get local name, connection dropped");
+		return false;
+	}
+	inany_from_sockaddr(&SOCKSIDE(conn)->eaddr, &SOCKSIDE(conn)->eport, sa);
+
+	ASSERT(flowside_complete(SOCKSIDE(conn)));
+	debug("TCP: index %li, new connection from socket, %s", FLOW_IDX(conn),
+	      flowside_fmt(SOCKSIDE(conn), fsstr, sizeof(fsstr)));
+
+	TAPSIDE(conn)->faddr = SOCKSIDE(conn)->eaddr;
+	TAPSIDE(conn)->fport = SOCKSIDE(conn)->eport;
 	tcp_snat_inbound(c, &TAPSIDE(conn)->faddr);
 
 	if (CONN_V4(conn)) {
@@ -2656,6 +2685,8 @@ static void tcp_tap_conn_from_sock(struct ctx *c,
 	TAPSIDE(conn)->eport = ref.port;
 
 	ASSERT(flowside_complete(TAPSIDE(conn)));
+	debug("TCP: index %li, connection forwarded to tap, %s", FLOW_IDX(conn),
+	      flowside_fmt(TAPSIDE(conn), fsstr, sizeof(fsstr)));
 
 	tcp_seq_init(c, conn, now);
 	tcp_hash_insert(c, conn);
@@ -2668,6 +2699,8 @@ static void tcp_tap_conn_from_sock(struct ctx *c,
 	conn_flag(c, conn, ACK_FROM_TAP_DUE);
 
 	tcp_get_sndbuf(conn);
+
+	return true;
 }
 
 /**
@@ -2704,8 +2737,13 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
 				      s, (struct sockaddr *)&sa))
 		return;
 
-	tcp_tap_conn_from_sock(c, ref.tcp_listen, &flow->tcp, s,
-			       (struct sockaddr *)&sa, now);
+	if (tcp_tap_conn_from_sock(c, ref.tcp_listen, &flow->tcp, s,
+				   (struct sockaddr *)&sa, now))
+		return;
+
+	/* Failed to create the connection */
+	close(s);
+	c->flow_count--;
 }
 
 /**
diff --git a/tcp_conn.h b/tcp_conn.h
index 3482759..2ef0130 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -24,10 +24,6 @@
  * @ws_to_tap:		Window scaling factor advertised to tap/guest
  * @sndbuf:		Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
  * @seq_dup_ack_approx:	Last duplicate ACK number sent to tap
- * @eaddr:		Guest side endpoint address (guest's local address)
- * @faddr:		Guest side forwarding address (guest's remote address)
- * @eport:		Guest side endpoint port (guest's local port)
- * @fport:		Guest side forwarding port (guest's remote port)
  * @wnd_from_tap:	Last window size from tap, unscaled (as received)
  * @wnd_to_tap:		Sending window advertised to tap, unscaled (as sent)
  * @seq_to_tap:		Next sequence for packets to tap
-- 
@@ -24,10 +24,6 @@
  * @ws_to_tap:		Window scaling factor advertised to tap/guest
  * @sndbuf:		Sending buffer in kernel, rounded to 2 ^ SNDBUF_BITS
  * @seq_dup_ack_approx:	Last duplicate ACK number sent to tap
- * @eaddr:		Guest side endpoint address (guest's local address)
- * @faddr:		Guest side forwarding address (guest's remote address)
- * @eport:		Guest side endpoint port (guest's local port)
- * @fport:		Guest side forwarding port (guest's remote port)
  * @wnd_from_tap:	Last window size from tap, unscaled (as received)
  * @wnd_to_tap:		Sending window advertised to tap, unscaled (as sent)
  * @seq_to_tap:		Next sequence for packets to tap
-- 
2.41.0


  parent reply	other threads:[~2023-08-28  5:42 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-28  5:41 [PATCH v2 00/10] RFC: Convert TCP connection table to generalisable flow table David Gibson
2023-08-28  5:41 ` [PATCH v2 01/10] flow, tcp: Generalise connection types David Gibson
2023-08-28  5:41 ` [PATCH v2 02/10] flow, tcp: Move TCP connection table to unified flow table David Gibson
2023-08-28  5:41 ` [PATCH v2 03/10] flow, tcp: Consolidate flow pointer<->index helpers David Gibson
2023-09-07  1:01   ` Stefano Brivio
2023-09-07  3:48     ` David Gibson
2023-08-28  5:41 ` [PATCH v2 04/10] flow: Make unified version of flow table compaction David Gibson
2023-08-28  5:41 ` [PATCH v2 05/10] flow: Introduce struct flowside, space for uniform tracking of addresses David Gibson
2023-09-07  1:01   ` Stefano Brivio
2023-09-07  4:05     ` David Gibson
2023-09-07  7:55       ` Stefano Brivio
2023-08-28  5:41 ` [PATCH v2 06/10] tcp: Move guest side address tracking to flow/flowside David Gibson
2023-08-28  5:41 ` [PATCH v2 07/10] tcp, flow: Perform TCP hash calculations based on flowside David Gibson
2023-08-28  5:41 ` [PATCH v2 08/10] tcp: Re-use flowside_hash for initial sequence number generation David Gibson
2023-08-28  5:41 ` David Gibson [this message]
2023-08-28  5:41 ` [PATCH v2 10/10] tcp_splice: Fill out flowside information for spliced connections David Gibson
2023-09-07  1:02   ` Stefano Brivio
2023-09-07  4:14     ` David Gibson
2023-09-07  7:55       ` Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230828054146.48673-10-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).