public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v3 04/15] tcp_splice,flow: Maintain flow information for spliced connections
Date: Thu, 21 Dec 2023 18:02:26 +1100	[thread overview]
Message-ID: <20231221070237.1422557-5-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20231221070237.1422557-1-david@gibson.dropbear.id.au>

Every flow in the flow table now has space for the the addresses as seen by
both the host and guest side.  We fill that information in for regular
"tap" TCP connections, but not for spliced connections.

Fill in that information for spliced connections too, so it's now uniformly
available for all flow types (that are implemented so far).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c        | 35 +++++++++++++----------------
 tcp_splice.c | 62 +++++++++++++++++++++++++++++++++++++---------------
 tcp_splice.h |  3 +--
 3 files changed, 60 insertions(+), 40 deletions(-)

diff --git a/tcp.c b/tcp.c
index 18ab3ac..6d77cf6 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2658,32 +2658,23 @@ static void tcp_snat_inbound(const struct ctx *c, union inany_addr *addr)
  * tcp_tap_conn_from_sock() - Initialize state for non-spliced connection
  * @c:		Execution context
  * @ref:	epoll reference of listening socket
- * @conn:	connection structure to initialize
+ * @conn:	connection structure (with TAPFSIDE(@conn) completed)
  * @s:		Accepted socket
- * @sa:		Peer socket address (from accept())
  * @now:	Current timestamp
- *
- * Return: true if able to create a tap connection, false otherwise
  */
-static bool tcp_tap_conn_from_sock(struct ctx *c,
+static void tcp_tap_conn_from_sock(struct ctx *c,
 				   union tcp_listen_epoll_ref ref,
 				   struct tcp_tap_conn *conn, int s,
-				   const struct sockaddr *sa,
 				   const struct timespec *now)
 {
+	ASSERT(flowside_complete(SOCKFSIDE(conn)));
+
 	conn->f.type = FLOW_TCP;
 	conn->sock = s;
 	conn->timer = -1;
 	conn->ws_to_tap = conn->ws_from_tap = 0;
 	conn_event(c, conn, SOCK_ACCEPTED);
 
-	if (flowside_from_sock(SOCKFSIDE(conn), PIF_HOST, s, NULL, sa) < 0) {
-		err("tcp: Failed to get local name, connection dropped");
-		return false;
-	}
-
-	ASSERT(flowside_complete(SOCKFSIDE(conn)));
-
 	TAPFSIDE(conn)->pif = PIF_TAP;
 	TAPFSIDE(conn)->faddr = SOCKFSIDE(conn)->eaddr;
 	TAPFSIDE(conn)->fport = SOCKFSIDE(conn)->eport;
@@ -2712,8 +2703,6 @@ static bool tcp_tap_conn_from_sock(struct ctx *c,
 	conn_flag(c, conn, ACK_FROM_TAP_DUE);
 
 	tcp_get_sndbuf(conn);
-
-	return true;
 }
 
 /**
@@ -2737,15 +2726,21 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
 	if (s < 0)
 		goto cancel;
 
-	if (c->mode == MODE_PASTA &&
-	    tcp_splice_conn_from_sock(c, ref.tcp_listen, &flow->tcp_splice,
-				      s, (struct sockaddr *)&sa))
+	if (flowside_from_sock(&flow->f.side[0], ref.tcp_listen.pif, s,
+			       NULL, &sa) < 0) {
+		err("tcp: Failed to get local name, connection dropped");
+		close(s);
+		flow_alloc_cancel(flow);
 		return;
+	}
 
-	if (tcp_tap_conn_from_sock(c, ref.tcp_listen, &flow->tcp, s,
-				   (struct sockaddr *)&sa, now))
+	if (c->mode == MODE_PASTA &&
+	    tcp_splice_conn_from_sock(c, ref.tcp_listen, &flow->tcp_splice, s))
 		return;
 
+	tcp_tap_conn_from_sock(c, ref.tcp_listen, &flow->tcp, s, now);
+	return;	
+
 cancel:
 	/* Failed to create the connection */
 	if (s >= 0)
diff --git a/tcp_splice.c b/tcp_splice.c
index eec02fe..0faeb1b 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -72,6 +72,9 @@ static int ns_sock_pool6	[TCP_SOCK_POOL_SIZE];
 /* Pool of pre-opened pipes */
 static int splice_pipe_pool		[TCP_SPLICE_PIPE_POOL_SIZE][2];
 
+#define FSIDE0(conn)			(&(conn)->f.side[0])
+#define FSIDE1(conn)			(&(conn)->f.side[1])
+
 #define CONN_V6(x)			(x->flags & SPLICE_V6)
 #define CONN_V4(x)			(!CONN_V6(x))
 #define CONN_HAS(conn, set)		((conn->events & (set)) == (set))
@@ -280,9 +283,21 @@ bool tcp_splice_flow_defer(union flow *flow)
 static int tcp_splice_connect_finish(const struct ctx *c,
 				     struct tcp_splice_conn *conn)
 {
+	struct sockaddr_storage sa;
+	socklen_t sl = sizeof(sa);
 	unsigned side;
 	int i = 0;
 
+	if (getsockname(conn->s[1], (struct sockaddr *)&sa, &sl) < 0) {
+		int ret = -errno;
+		conn_flag(c, conn, CLOSING);
+		return ret;
+	}
+	inany_from_sockaddr(&FSIDE1(conn)->faddr, &FSIDE1(conn)->fport,
+			    (struct sockaddr *)&sa);
+
+	ASSERT(flowside_complete(FSIDE1(conn)));
+
 	for (side = 0; side < SIDES; side++) {
 		conn->pipe[side][0] = conn->pipe[side][1] = -1;
 
@@ -352,13 +367,24 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
 			   conn->s[1]);
 	}
 
+	/* It would be nicer if we could initialise FSIDE1 all at once with
+	 * flowaddrs_from_af() or flowaddrs_from_sock().  However, we can't get
+	 * the forwarding port until the connect() has finished and we don't
+	 * want to block to wait for it.  Meanwhile we have the endpoint address
+	 * here, but don't have a place to stash it other than in the flowaddrs
+	 * itself. So, initialisation of FSIDE1 is split between here and
+	 * tcp_splice_connect_finish().  Ugly but necessary.
+	 */
 	if (CONN_V6(conn)) {
 		sa = (struct sockaddr *)&addr6;
 		sl = sizeof(addr6);
+		inany_from_af(&FSIDE1(conn)->eaddr, AF_INET6, &addr6.sin6_addr);
 	} else {
 		sa = (struct sockaddr *)&addr4;
 		sl = sizeof(addr4);
+		inany_from_af(&FSIDE1(conn)->eaddr, AF_INET, &addr4.sin_addr);
 	}
+	FSIDE1(conn)->eport = port;
 
 	if (connect(conn->s[1], sa, sl)) {
 		if (errno != EINPROGRESS) {
@@ -381,13 +407,13 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
  * @c:		Execution context
  * @conn:	Connection pointer
  * @port:	Destination port, host order
- * @pif:	Originating pif of the splice
  *
  * Return: return code from connect()
  */
 static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
-			  in_port_t port, uint8_t pif)
+			  in_port_t port)
 {
+	uint8_t pif0 = FSIDE0(conn)->pif, pif1;
 	int s = -1;
 
 	/* If the pool is empty we take slightly different approaches
@@ -397,17 +423,19 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
 	 * entering the ns anyway, so we might as well refill the
 	 * pool.
 	 */
-	if (pif == PIF_SPLICE) {
+	if (pif0 == PIF_SPLICE) {
 		int *p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
 		int af = CONN_V6(conn) ? AF_INET6 : AF_INET;
 
+		pif1 = PIF_HOST;
 		s = tcp_conn_pool_sock(p);
 		if (s < 0)
 			s = tcp_conn_new_sock(c, af);
 	} else {
 		int *p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
 
-		ASSERT(pif == PIF_HOST);
+		ASSERT(pif0 == PIF_HOST);
+		pif1 = PIF_SPLICE;
 
 		/* If pool is empty, refill it first */
 		if (p[TCP_SOCK_POOL_SIZE-1] < 0)
@@ -421,6 +449,7 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
 		return s;
 	}
 
+	FSIDE1(conn)->pif = pif1;
 	return tcp_splice_connect(c, conn, s, port);
 }
 
@@ -428,34 +457,31 @@ static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
  * tcp_splice_conn_from_sock() - Attempt to init state for a spliced connection
  * @c:		Execution context
  * @ref:	epoll reference of listening socket
- * @conn:	connection structure to initialize
+ * @conn:	connection structure (with FSIDE0(@conn) completed)
  * @s:		Accepted socket
- * @sa:		Peer address of connection
  *
  * Return: true if able to create a spliced connection, false otherwise
  * #syscalls:pasta setsockopt
  */
 bool tcp_splice_conn_from_sock(const struct ctx *c,
 			       union tcp_listen_epoll_ref ref,
-			       struct tcp_splice_conn *conn, int s,
-			       const struct sockaddr *sa)
+			       struct tcp_splice_conn *conn, int s)
 {
-	const struct in_addr *a4;
-	union inany_addr aany;
-	in_port_t port;
+	const struct in_addr *e4 = inany_v4(&FSIDE0(conn)->eaddr);
+	const struct in_addr *f4 = inany_v4(&FSIDE0(conn)->faddr);
 
 	ASSERT(c->mode == MODE_PASTA);
+	ASSERT(flowside_complete(FSIDE0(conn)));
 
-	inany_from_sockaddr(&aany, &port, sa);
-	a4 = inany_v4(&aany);
-
-	if (a4) {
-		if (!IN4_IS_ADDR_LOOPBACK(a4))
+	if (e4) {
+		if (!IN4_IS_ADDR_LOOPBACK(e4))
 			return false;
+		ASSERT(f4 && IN4_IS_ADDR_LOOPBACK(f4));
 		conn->flags = 0;
 	} else {
-		if (!IN6_IS_ADDR_LOOPBACK(&aany.a6))
+		if (!IN6_IS_ADDR_LOOPBACK(&FSIDE0(conn)->eaddr.a6))
 			return false;
+		ASSERT(IN6_IS_ADDR_LOOPBACK(&FSIDE0(conn)->faddr.a6));
 		conn->flags = SPLICE_V6;
 	}
 
@@ -465,7 +491,7 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
 	conn->f.type = FLOW_TCP_SPLICE;
 	conn->s[0] = s;
 
-	if (tcp_splice_new(c, conn, ref.port, ref.pif))
+	if (tcp_splice_new(c, conn, ref.port))
 		conn_flag(c, conn, CLOSING);
 
 	return true;
diff --git a/tcp_splice.h b/tcp_splice.h
index 18193e4..e1863a9 100644
--- a/tcp_splice.h
+++ b/tcp_splice.h
@@ -12,8 +12,7 @@ void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
 			     uint32_t events);
 bool tcp_splice_conn_from_sock(const struct ctx *c,
 			       union tcp_listen_epoll_ref ref,
-			       struct tcp_splice_conn *conn, int s,
-			       const struct sockaddr *sa);
+			       struct tcp_splice_conn *conn, int s);
 void tcp_splice_init(struct ctx *c);
 
 #endif /* TCP_SPLICE_H */
-- 
@@ -12,8 +12,7 @@ void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
 			     uint32_t events);
 bool tcp_splice_conn_from_sock(const struct ctx *c,
 			       union tcp_listen_epoll_ref ref,
-			       struct tcp_splice_conn *conn, int s,
-			       const struct sockaddr *sa);
+			       struct tcp_splice_conn *conn, int s);
 void tcp_splice_init(struct ctx *c);
 
 #endif /* TCP_SPLICE_H */
-- 
2.43.0


  parent reply	other threads:[~2023-12-21  7:02 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-21  7:02 [PATCH v3 00/15] RFC: Unified flow table David Gibson
2023-12-21  7:02 ` [PATCH v3 01/15] flow: Common data structures for tracking flow addresses David Gibson
2024-01-13 22:50   ` Stefano Brivio
2024-01-16  6:14     ` David Gibson
2023-12-21  7:02 ` [PATCH v3 02/15] tcp, flow: Maintain guest side flow information David Gibson
2024-01-13 22:51   ` Stefano Brivio
2024-01-16  6:23     ` David Gibson
2023-12-21  7:02 ` [PATCH v3 03/15] tcp, flow: Maintain host " David Gibson
2023-12-21  7:02 ` David Gibson [this message]
2024-01-17 19:59   ` [PATCH v3 04/15] tcp_splice,flow: Maintain flow information for spliced connections Stefano Brivio
2024-01-18  1:01     ` David Gibson
2023-12-21  7:02 ` [PATCH v3 05/15] flow, tcp, tcp_splice: Uniform debug helpers for new flows David Gibson
2024-01-17 19:59   ` Stefano Brivio
2024-01-18  1:04     ` David Gibson
2024-01-18 15:40       ` Stefano Brivio
2023-12-21  7:02 ` [PATCH v3 06/15] tcp, flow: Replace TCP specific hash function with general flow hash David Gibson
2024-01-17 19:59   ` Stefano Brivio
2024-01-18  1:15     ` David Gibson
2024-01-18 15:42       ` Stefano Brivio
2024-01-18 23:55         ` David Gibson
2023-12-21  7:02 ` [PATCH v3 07/15] flow: Add helper to determine a flow's protocol David Gibson
2023-12-21  7:02 ` [PATCH v3 08/15] flow, tcp: Generalise TCP hash table to general flow hash table David Gibson
2023-12-21  7:02 ` [PATCH v3 09/15] tcp: Re-use flow hash for initial sequence number generation David Gibson
2023-12-21  7:02 ` [PATCH v3 10/15] icmp: Store ping socket information in the flow table David Gibson
2023-12-21  7:02 ` [PATCH v3 11/15] icmp: Populate guest side information for ping flows David Gibson
2023-12-21  7:02 ` [PATCH v3 12/15] icmp: Populate and use host side flow information David Gibson
2024-01-17 19:59   ` Stefano Brivio
2024-01-18  1:22     ` David Gibson
2024-01-18 15:43       ` Stefano Brivio
2024-01-18 23:58         ` David Gibson
2023-12-21  7:02 ` [PATCH v3 13/15] icmp: Use 'flowside' epoll references for ping sockets David Gibson
2023-12-21  7:02 ` [PATCH v3 14/15] icmp: Merge EPOLL_TYPE_ICMP and EPOLL_TYPE_ICMPV6 David Gibson
2023-12-21  7:02 ` [PATCH v3 15/15] icmp: Eliminate icmp_id_map David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231221070237.1422557-5-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).