public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Stefano Brivio <sbrivio@redhat.com>, passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 12/16] tcp_splice: Improve logic deciding when to splice
Date: Mon, 29 Jan 2024 15:35:53 +1100	[thread overview]
Message-ID: <20240129043557.823451-13-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20240129043557.823451-1-david@gibson.dropbear.id.au>

This makes several tweaks to improve the logic which decides whether we're
able to use the splice method for a new connection.

 * Rather than only calling tcp_splice_conn_from_sock() in pasta mode, we
   check for pasta mode within it, better localising the checks.
 * Previously if we got a connection from a non-loopback address we'd
   always fall back to the "tap" path, even if the  connection was on a
   socket in the namespace.  If we did get a non-loopback address on a
   namespace socket, something has gone wrong and the "tap" path certainly
   won't be able to handle it.  Report the error and close, rather than
   passing it along to tap.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 inany.c      |  1 -
 tcp.c        |  3 +--
 tcp_splice.c | 49 ++++++++++++++++++++++++++++++++-----------------
 3 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/inany.c b/inany.c
index edf0b055..eaf2755d 100644
--- a/inany.c
+++ b/inany.c
@@ -23,7 +23,6 @@
  *
  * Return: On success, a non-null pointer to @dst, NULL on failure
  */
-/* cppcheck-suppress unusedFunction */
 const char *inany_ntop(const union inany_addr *src, char *dst, socklen_t size)
 {
 	const struct in_addr *v4 = inany_v4(src);
diff --git a/tcp.c b/tcp.c
index 8daefe99..052bf7cb 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2733,8 +2733,7 @@ void tcp_listen_handler(struct ctx *c, union epoll_ref ref,
 	if (s < 0)
 		goto cancel;
 
-	if (c->mode == MODE_PASTA &&
-	    tcp_splice_conn_from_sock(c, ref.tcp_listen, &flow->tcp_splice,
+	if (tcp_splice_conn_from_sock(c, ref.tcp_listen, &flow->tcp_splice,
 				      s, (struct sockaddr *)&sa))
 		return;
 
diff --git a/tcp_splice.c b/tcp_splice.c
index abd698d4..3b438313 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -389,36 +389,51 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
 	sa_family_t af;
 	int s1, rc;
 
-	ASSERT(c->mode == MODE_PASTA);
+	if (c->mode != MODE_PASTA)
+		return false;
 
 	inany_from_sockaddr(&src, &srcport, sa);
-	if (!inany_is_loopback(&src))
-		return false;
+	af = inany_v4(&src) ? AF_INET : AF_INET6;
 
-	conn->flags = inany_v4(&src) ? 0 : SPLICE_V6;
-	af = CONN_V6(conn) ? AF_INET6 : AF_INET;
+	switch (ref.pif) {
+	case PIF_SPLICE:
+		if (!inany_is_loopback(&src)) {
+			char str[INANY_ADDRSTRLEN];
 
-	if (setsockopt(s0, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int)))
-		flow_trace(conn, "failed to set TCP_QUICKACK on %i", s0);
+			flow_err(conn, "Bad source address %s for splice, closing",
+				 inany_ntop(&src, str, sizeof(str)));
 
-	conn->f.type = FLOW_TCP_SPLICE;
-	conn->s[0] = s0;
-	conn->s[1] = -1;
-	conn->pipe[0][0] = conn->pipe[0][1] = -1;
-	conn->pipe[1][0] = conn->pipe[1][1] = -1;
+			/* We *don't* want to fall back to tap */
+			flow_alloc_cancel((union flow *)conn);
+			return true;
+		}
 
-	if (ref.pif == PIF_SPLICE) {
 		dstport += c->tcp.fwd_out.delta[dstport];
-
 		s1 = tcp_conn_sock(c, af);
-	} else {
-		ASSERT(ref.pif == PIF_HOST);
+		break;
 
-		dstport += c->tcp.fwd_in.delta[dstport];
+	case PIF_HOST:
+		if (!inany_is_loopback(&src))
+			return false;
 
+		dstport += c->tcp.fwd_in.delta[dstport];
 		s1 = tcp_conn_sock_ns(c, af);
+		break;
+
+	default:
+		return false;
 	}
 
+	conn->f.type = FLOW_TCP_SPLICE;
+	conn->s[0] = s0;
+	conn->s[1] = -1;
+	conn->pipe[0][0] = conn->pipe[0][1] = -1;
+	conn->pipe[1][0] = conn->pipe[1][1] = -1;
+	conn->flags = af == AF_INET ? 0 : SPLICE_V6;
+
+	if (setsockopt(s0, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int)))
+		flow_trace(conn, "failed to set TCP_QUICKACK on %i", s0);
+
 	if (s1 < 0) {
 		flow_err(conn,
 			 "Couldn't open connectable socket for splice: %s",
-- 
@@ -389,36 +389,51 @@ bool tcp_splice_conn_from_sock(const struct ctx *c,
 	sa_family_t af;
 	int s1, rc;
 
-	ASSERT(c->mode == MODE_PASTA);
+	if (c->mode != MODE_PASTA)
+		return false;
 
 	inany_from_sockaddr(&src, &srcport, sa);
-	if (!inany_is_loopback(&src))
-		return false;
+	af = inany_v4(&src) ? AF_INET : AF_INET6;
 
-	conn->flags = inany_v4(&src) ? 0 : SPLICE_V6;
-	af = CONN_V6(conn) ? AF_INET6 : AF_INET;
+	switch (ref.pif) {
+	case PIF_SPLICE:
+		if (!inany_is_loopback(&src)) {
+			char str[INANY_ADDRSTRLEN];
 
-	if (setsockopt(s0, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int)))
-		flow_trace(conn, "failed to set TCP_QUICKACK on %i", s0);
+			flow_err(conn, "Bad source address %s for splice, closing",
+				 inany_ntop(&src, str, sizeof(str)));
 
-	conn->f.type = FLOW_TCP_SPLICE;
-	conn->s[0] = s0;
-	conn->s[1] = -1;
-	conn->pipe[0][0] = conn->pipe[0][1] = -1;
-	conn->pipe[1][0] = conn->pipe[1][1] = -1;
+			/* We *don't* want to fall back to tap */
+			flow_alloc_cancel((union flow *)conn);
+			return true;
+		}
 
-	if (ref.pif == PIF_SPLICE) {
 		dstport += c->tcp.fwd_out.delta[dstport];
-
 		s1 = tcp_conn_sock(c, af);
-	} else {
-		ASSERT(ref.pif == PIF_HOST);
+		break;
 
-		dstport += c->tcp.fwd_in.delta[dstport];
+	case PIF_HOST:
+		if (!inany_is_loopback(&src))
+			return false;
 
+		dstport += c->tcp.fwd_in.delta[dstport];
 		s1 = tcp_conn_sock_ns(c, af);
+		break;
+
+	default:
+		return false;
 	}
 
+	conn->f.type = FLOW_TCP_SPLICE;
+	conn->s[0] = s0;
+	conn->s[1] = -1;
+	conn->pipe[0][0] = conn->pipe[0][1] = -1;
+	conn->pipe[1][0] = conn->pipe[1][1] = -1;
+	conn->flags = af == AF_INET ? 0 : SPLICE_V6;
+
+	if (setsockopt(s0, SOL_TCP, TCP_QUICKACK, &((int){ 1 }), sizeof(int)))
+		flow_trace(conn, "failed to set TCP_QUICKACK on %i", s0);
+
 	if (s1 < 0) {
 		flow_err(conn,
 			 "Couldn't open connectable socket for splice: %s",
-- 
2.43.0


  parent reply	other threads:[~2024-01-29  4:36 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-29  4:35 [PATCH 00/16] More flow table preliminaries: address handling improvements David Gibson
2024-01-29  4:35 ` [PATCH 01/16] treewide: Use sa_family_t for address family variables David Gibson
2024-01-29  4:35 ` [PATCH 02/16] tcp, udp: Don't precompute port remappings in epoll references David Gibson
2024-01-29  4:35 ` [PATCH 03/16] flow: Add helper to determine a flow's protocol David Gibson
2024-01-29  4:35 ` [PATCH 04/16] tcp_splice: Simplify clean up logic David Gibson
2024-01-29  4:35 ` [PATCH 05/16] inany: Helper to test for IPv4 or IPv6 loopback address David Gibson
2024-01-29  4:35 ` [PATCH 06/16] tcp, tcp_splice: Helpers for getting sockets from the pools David Gibson
2024-01-29  4:35 ` [PATCH 07/16] tcp_splice: More specific variable names in new splice path David Gibson
2024-01-29  4:35 ` [PATCH 08/16] tcp_splice: Fix incorrect parameter comment for tcp_splice_connect() David Gibson
2024-01-29  4:35 ` [PATCH 09/16] tcp_splice: Merge tcp_splice_new() into its caller David Gibson
2024-01-29  4:35 ` [PATCH 10/16] tcp_splice: Improve error reporting on connect path David Gibson
2024-01-29  4:35 ` [PATCH 11/16] inany: Add inany_ntop() helper David Gibson
2024-01-29  4:35 ` David Gibson [this message]
2024-01-29  4:35 ` [PATCH 13/16] util: Provide global constants for IPv4 loopback and unspecified address David Gibson
2024-01-29  4:35 ` [PATCH 14/16] inany: Introduce union sockaddr_inany David Gibson
2024-01-29  4:35 ` [PATCH 15/16] tcp, tcp_splice: Better construction of IPv4 or IPv6 sockaddrs David Gibson
2024-01-29  4:35 ` [PATCH 16/16] inany: Extend inany_from_af to easily set unspecified addresses David Gibson
2024-01-29  9:02 ` [PATCH 00/16] More flow table preliminaries: address handling improvements David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240129043557.823451-13-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).