public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 5/5] tcp: Improve handling of fallback if socket pool is empty on new splice
Date: Tue, 14 Feb 2023 10:48:23 +1100	[thread overview]
Message-ID: <20230213234823.749379-6-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20230213234823.749379-1-david@gibson.dropbear.id.au>

When creating a new spliced connection, we need to get a socket in the
other ns from the originating one.  To avoid excessive ns switches we
usually get these from a pool refilled on a timer.  However, if the pool
runs out we need a fallback.  Currently that's done by passing -1 as the
socket to tcp_splice_connnect() and running it in the target ns.

This means that tcp_splice_connect() itself needs to have different cases
depending on whether it's given an existing socket or not, which is
a separate concern from what it's mostly doing.  We change it to require
a suitable open socket to be passed in, and ensuring in the caller that we
have one.

This requires adding the fallback paths to the caller, tcp_splice_new().
We use slightly different approaches for a socket in the init ns versus the
guest ns.

This also means that we no longer need to run tcp_splice_connect() itself
in the guest ns, which allows us to remove a bunch of boilerplate code.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c        |  2 +-
 tcp_conn.h   |  1 +
 tcp_splice.c | 89 ++++++++++++++++++----------------------------------
 3 files changed, 32 insertions(+), 60 deletions(-)

diff --git a/tcp.c b/tcp.c
index a0e2e34..f028e01 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1882,7 +1882,7 @@ int tcp_conn_pool_sock(int pool[])
  *
  * Return: socket number on success, negative code if socket creation failed
  */
-static int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
+int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
 {
 	int s;
 
diff --git a/tcp_conn.h b/tcp_conn.h
index c807e8b..a499f34 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -193,6 +193,7 @@ void tcp_table_compact(struct ctx *c, union tcp_conn *hole);
 void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union);
 void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union);
 int tcp_conn_pool_sock(int pool[]);
+int tcp_conn_new_sock(const struct ctx *c, sa_family_t af);
 void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af);
 void tcp_splice_refill(const struct ctx *c);
 
diff --git a/tcp_splice.c b/tcp_splice.c
index 3bf6368..84f855e 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -87,6 +87,9 @@ static const char *tcp_splice_flag_str[] __attribute((__unused__)) = {
 	"RCVLOWAT_ACT_B", "CLOSING",
 };
 
+/* Forward declaration */
+static int tcp_sock_refill_ns(void *arg);
+
 /**
  * tcp_splice_conn_epoll_events() - epoll events masks for given state
  * @events:	Connection event flags
@@ -347,12 +350,8 @@ static int tcp_splice_connect_finish(const struct ctx *c,
  * Return: 0 for connect() succeeded or in progress, negative value on error
  */
 static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
-			      int s, in_port_t port)
+			      int sock_conn, in_port_t port)
 {
-	int sock_conn = (s >= 0) ? s : socket(CONN_V6(conn) ? AF_INET6 :
-							      AF_INET,
-					      SOCK_STREAM | SOCK_NONBLOCK,
-					      IPPROTO_TCP);
 	struct sockaddr_in6 addr6 = {
 		.sin6_family = AF_INET6,
 		.sin6_port = htons(port),
@@ -366,19 +365,8 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
 	const struct sockaddr *sa;
 	socklen_t sl;
 
-	if (sock_conn < 0)
-		return -errno;
-
-	if (sock_conn > SOCKET_MAX) {
-		close(sock_conn);
-		return -EIO;
-	}
-
 	conn->b = sock_conn;
 
-	if (s < 0)
-		tcp_sock_set_bufsize(c, conn->b);
-
 	if (setsockopt(conn->b, SOL_TCP, TCP_QUICKACK,
 		       &((int){ 1 }), sizeof(int))) {
 		trace("TCP (spliced): failed to set TCP_QUICKACK on socket %i",
@@ -409,36 +397,6 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
 	return 0;
 }
 
-/**
- * struct tcp_splice_connect_ns_arg - Arguments for tcp_splice_connect_ns()
- * @c:		Execution context
- * @conn:	Accepted inbound connection
- * @port:	Destination port, host order
- * @ret:	Return value of tcp_splice_connect_ns()
- */
-struct tcp_splice_connect_ns_arg {
-	const struct ctx *c;
-	struct tcp_splice_conn *conn;
-	in_port_t port;
-	int ret;
-};
-
-/**
- * tcp_splice_connect_ns() - Enter namespace and call tcp_splice_connect()
- * @arg:	See struct tcp_splice_connect_ns_arg
- *
- * Return: 0
- */
-static int tcp_splice_connect_ns(void *arg)
-{
-	struct tcp_splice_connect_ns_arg *a;
-
-	a = (struct tcp_splice_connect_ns_arg *)arg;
-	ns_enter(a->c);
-	a->ret = tcp_splice_connect(a->c, a->conn, -1, a->port);
-	return 0;
-}
-
 /**
  * tcp_splice_new() - Handle new spliced connection
  * @c:		Execution context
@@ -451,24 +409,37 @@ static int tcp_splice_connect_ns(void *arg)
 static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
 			  in_port_t port, int outbound)
 {
-	int *p, s = -1;
-
-	if (outbound)
-		p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
-	else
-		p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
+	int s = -1;
+
+	/* If the pool is empty we take slightly different approaches
+	 * for init or ns sockets.  For init sockets we just open a
+	 * new one without refilling the pool to keep latency down.
+	 * For ns sockets, we're going to incur the latency of
+	 * entering the ns anyway, so we might as well refill the
+	 * pool.
+	 */
+	if (outbound) {
+		int *p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
+		int af = CONN_V6(conn) ? AF_INET6 : AF_INET;
+
+		s = tcp_conn_pool_sock(p);
+		if (s < 0)
+			s = tcp_conn_new_sock(c, af);
+	} else {
+		int *p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
 
-	s = tcp_conn_pool_sock(p);
+		/* If pool is empty, refill it first */
+		if (p[TCP_SOCK_POOL_SIZE-1] < 0)
+			NS_CALL(tcp_sock_refill_ns, c);
 
-	/* No socket available in namespace: create a new one for connect() */
-	if (s < 0 && !outbound) {
-		struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
+		s = tcp_conn_pool_sock(p);
+	}
 
-		NS_CALL(tcp_splice_connect_ns, &ns_arg);
-		return ns_arg.ret;
+	if (s < 0) {
+		warn("Couldn't open connectable socket for splice (%d)", s);
+		return s;
 	}
 
-	/* Otherwise, the socket will connect on the side it was created on */
 	return tcp_splice_connect(c, conn, s, port);
 }
 
-- 
@@ -87,6 +87,9 @@ static const char *tcp_splice_flag_str[] __attribute((__unused__)) = {
 	"RCVLOWAT_ACT_B", "CLOSING",
 };
 
+/* Forward declaration */
+static int tcp_sock_refill_ns(void *arg);
+
 /**
  * tcp_splice_conn_epoll_events() - epoll events masks for given state
  * @events:	Connection event flags
@@ -347,12 +350,8 @@ static int tcp_splice_connect_finish(const struct ctx *c,
  * Return: 0 for connect() succeeded or in progress, negative value on error
  */
 static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
-			      int s, in_port_t port)
+			      int sock_conn, in_port_t port)
 {
-	int sock_conn = (s >= 0) ? s : socket(CONN_V6(conn) ? AF_INET6 :
-							      AF_INET,
-					      SOCK_STREAM | SOCK_NONBLOCK,
-					      IPPROTO_TCP);
 	struct sockaddr_in6 addr6 = {
 		.sin6_family = AF_INET6,
 		.sin6_port = htons(port),
@@ -366,19 +365,8 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
 	const struct sockaddr *sa;
 	socklen_t sl;
 
-	if (sock_conn < 0)
-		return -errno;
-
-	if (sock_conn > SOCKET_MAX) {
-		close(sock_conn);
-		return -EIO;
-	}
-
 	conn->b = sock_conn;
 
-	if (s < 0)
-		tcp_sock_set_bufsize(c, conn->b);
-
 	if (setsockopt(conn->b, SOL_TCP, TCP_QUICKACK,
 		       &((int){ 1 }), sizeof(int))) {
 		trace("TCP (spliced): failed to set TCP_QUICKACK on socket %i",
@@ -409,36 +397,6 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
 	return 0;
 }
 
-/**
- * struct tcp_splice_connect_ns_arg - Arguments for tcp_splice_connect_ns()
- * @c:		Execution context
- * @conn:	Accepted inbound connection
- * @port:	Destination port, host order
- * @ret:	Return value of tcp_splice_connect_ns()
- */
-struct tcp_splice_connect_ns_arg {
-	const struct ctx *c;
-	struct tcp_splice_conn *conn;
-	in_port_t port;
-	int ret;
-};
-
-/**
- * tcp_splice_connect_ns() - Enter namespace and call tcp_splice_connect()
- * @arg:	See struct tcp_splice_connect_ns_arg
- *
- * Return: 0
- */
-static int tcp_splice_connect_ns(void *arg)
-{
-	struct tcp_splice_connect_ns_arg *a;
-
-	a = (struct tcp_splice_connect_ns_arg *)arg;
-	ns_enter(a->c);
-	a->ret = tcp_splice_connect(a->c, a->conn, -1, a->port);
-	return 0;
-}
-
 /**
  * tcp_splice_new() - Handle new spliced connection
  * @c:		Execution context
@@ -451,24 +409,37 @@ static int tcp_splice_connect_ns(void *arg)
 static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
 			  in_port_t port, int outbound)
 {
-	int *p, s = -1;
-
-	if (outbound)
-		p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
-	else
-		p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
+	int s = -1;
+
+	/* If the pool is empty we take slightly different approaches
+	 * for init or ns sockets.  For init sockets we just open a
+	 * new one without refilling the pool to keep latency down.
+	 * For ns sockets, we're going to incur the latency of
+	 * entering the ns anyway, so we might as well refill the
+	 * pool.
+	 */
+	if (outbound) {
+		int *p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
+		int af = CONN_V6(conn) ? AF_INET6 : AF_INET;
+
+		s = tcp_conn_pool_sock(p);
+		if (s < 0)
+			s = tcp_conn_new_sock(c, af);
+	} else {
+		int *p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
 
-	s = tcp_conn_pool_sock(p);
+		/* If pool is empty, refill it first */
+		if (p[TCP_SOCK_POOL_SIZE-1] < 0)
+			NS_CALL(tcp_sock_refill_ns, c);
 
-	/* No socket available in namespace: create a new one for connect() */
-	if (s < 0 && !outbound) {
-		struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
+		s = tcp_conn_pool_sock(p);
+	}
 
-		NS_CALL(tcp_splice_connect_ns, &ns_arg);
-		return ns_arg.ret;
+	if (s < 0) {
+		warn("Couldn't open connectable socket for splice (%d)", s);
+		return s;
 	}
 
-	/* Otherwise, the socket will connect on the side it was created on */
 	return tcp_splice_connect(c, conn, s, port);
 }
 
-- 
2.39.1


  parent reply	other threads:[~2023-02-13 23:48 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-13 23:48 [PATCH v2 0/5] Cleanups to tcp socket pool handling David Gibson
2023-02-13 23:48 ` [PATCH v2 1/5] tcp: Make a helper to refill each socket pool David Gibson
2023-02-13 23:48 ` [PATCH v2 2/5] tcp: Split init and ns cases for tcp_sock_refill() David Gibson
2023-02-13 23:48 ` [PATCH v2 3/5] tcp: Move socket pool declarations around David Gibson
2023-02-13 23:48 ` [PATCH v2 4/5] tcp: Split pool lookup from creating new sockets in tcp_conn_new_sock() David Gibson
2023-02-13 23:48 ` David Gibson [this message]
2023-02-14 16:27 ` [PATCH v2 0/5] Cleanups to tcp socket pool handling Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230213234823.749379-6-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).