public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH 06/11] tcp_splice: Don't pool pipes in pairs
Date: Thu, 12 Oct 2023 12:51:09 +1100	[thread overview]
Message-ID: <20231012015114.2612066-7-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20231012015114.2612066-1-david@gibson.dropbear.id.au>

To reduce latencies, the tcp splice code maintains a pool of pre-opened
pipes to use for new connections.  This is structured as an array of pairs
of pipes, with each pipe, of course, being a pair of fds.  Thus when we
use the pool, a single pool "slot" provides both the a->b and b->a pipes.

There's no strong reason to store the pool in pairs, though - we can
with not much difficulty instead take the a->b and b->a pipes for a new
connection independently from separate slots in the pool, or even take one
from the the pool and create the other as we need it, if there's only one
pipe left in the pool.

This marginally increases the length of code, but simplifies the structure
of the pipe pool.  We should be able to re-shrink the code with later
changes, too.

In the process we also fix some minor bugs:
- If we both failed to find a pipe in the pool and to create a new one, we
  didn't log an error and would silently drop the connection.  That could
  make debugging such a situation difficult.  Add in an error message for
  that case
- When refilling the pool, if we were only able to open a single pipe in
  the pair, we attempted to rollback, but instead of closing the opened
  pipe, we instead closed the pipe we failed to open (probably leading to
  some ignored EBADFD errors).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp_splice.c | 60 +++++++++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/tcp_splice.c b/tcp_splice.c
index 3419207..b783326 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -58,7 +58,7 @@
 #include "tcp_conn.h"
 
 #define MAX_PIPE_SIZE			(8UL * 1024 * 1024)
-#define TCP_SPLICE_PIPE_POOL_SIZE	16
+#define TCP_SPLICE_PIPE_POOL_SIZE	32
 #define TCP_SPLICE_CONN_PRESSURE	30	/* % of conn_count */
 #define TCP_SPLICE_FILE_PRESSURE	30	/* % of c->nofile */
 
@@ -69,7 +69,7 @@ static int ns_sock_pool4	[TCP_SOCK_POOL_SIZE];
 static int ns_sock_pool6	[TCP_SOCK_POOL_SIZE];
 
 /* Pool of pre-opened pipes */
-static int splice_pipe_pool		[TCP_SPLICE_PIPE_POOL_SIZE][2][2];
+static int splice_pipe_pool		[TCP_SPLICE_PIPE_POOL_SIZE][2];
 
 #define CONN_V6(x)			(x->flags & SPLICE_V6)
 #define CONN_V4(x)			(!CONN_V6(x))
@@ -307,19 +307,16 @@ static int tcp_splice_connect_finish(const struct ctx *c,
 	conn->pipe_a_b[1] = conn->pipe_b_a[1] = -1;
 
 	for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) {
-		if (splice_pipe_pool[i][0][0] >= 0) {
-			SWAP(conn->pipe_a_b[0], splice_pipe_pool[i][0][0]);
-			SWAP(conn->pipe_a_b[1], splice_pipe_pool[i][0][1]);
-
-			SWAP(conn->pipe_b_a[0], splice_pipe_pool[i][1][0]);
-			SWAP(conn->pipe_b_a[1], splice_pipe_pool[i][1][1]);
+		if (splice_pipe_pool[i][0] >= 0) {
+			SWAP(conn->pipe_a_b[0], splice_pipe_pool[i][0]);
+			SWAP(conn->pipe_a_b[1], splice_pipe_pool[i][1]);
 			break;
 		}
 	}
-
 	if (conn->pipe_a_b[0] < 0) {
-		if (pipe2(conn->pipe_a_b, O_NONBLOCK | O_CLOEXEC) ||
-		    pipe2(conn->pipe_b_a, O_NONBLOCK | O_CLOEXEC)) {
+		if (pipe2(conn->pipe_a_b, O_NONBLOCK | O_CLOEXEC)) {
+			err("TCP (spliced): cannot create a->b pipe: %s",
+			    strerror(errno));
 			conn_flag(c, conn, CLOSING);
 			return -EIO;
 		}
@@ -328,6 +325,22 @@ static int tcp_splice_connect_finish(const struct ctx *c,
 			trace("TCP (spliced): cannot set a->b pipe size to %lu",
 			      c->tcp.pipe_size);
 		}
+	}
+
+	for (; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) {
+		if (splice_pipe_pool[i][0] >= 0) {
+			SWAP(conn->pipe_b_a[0], splice_pipe_pool[i][0]);
+			SWAP(conn->pipe_b_a[1], splice_pipe_pool[i][1]);
+			break;
+		}
+	}
+	if (conn->pipe_b_a[0] < 0) {
+		if (pipe2(conn->pipe_b_a, O_NONBLOCK | O_CLOEXEC)) {
+			err("TCP (spliced): cannot create b->a pipe: %s",
+			    strerror(errno));
+			conn_flag(c, conn, CLOSING);
+			return -EIO;
+		}
 
 		if (fcntl(conn->pipe_b_a[0], F_SETPIPE_SZ, c->tcp.pipe_size)) {
 			trace("TCP (spliced): cannot set b->a pipe size to %lu",
@@ -716,12 +729,12 @@ close:
  */
 static void tcp_set_pipe_size(struct ctx *c)
 {
-	int probe_pipe[TCP_SPLICE_PIPE_POOL_SIZE * 2][2], i, j;
+	int probe_pipe[TCP_SPLICE_PIPE_POOL_SIZE][2], i, j;
 
 	c->tcp.pipe_size = MAX_PIPE_SIZE;
 
 smaller:
-	for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE * 2; i++) {
+	for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) {
 		if (pipe2(probe_pipe[i], O_CLOEXEC)) {
 			i++;
 			break;
@@ -736,7 +749,7 @@ smaller:
 		close(probe_pipe[j][1]);
 	}
 
-	if (i == TCP_SPLICE_PIPE_POOL_SIZE * 2)
+	if (i == TCP_SPLICE_PIPE_POOL_SIZE)
 		return;
 
 	if (!(c->tcp.pipe_size /= 2)) {
@@ -756,25 +769,14 @@ static void tcp_splice_pipe_refill(const struct ctx *c)
 	int i;
 
 	for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) {
-		if (splice_pipe_pool[i][0][0] >= 0)
+		if (splice_pipe_pool[i][0] >= 0)
 			break;
-		if (pipe2(splice_pipe_pool[i][0], O_NONBLOCK | O_CLOEXEC))
-			continue;
-		if (pipe2(splice_pipe_pool[i][1], O_NONBLOCK | O_CLOEXEC)) {
-			close(splice_pipe_pool[i][1][0]);
-			close(splice_pipe_pool[i][1][1]);
+		if (pipe2(splice_pipe_pool[i], O_NONBLOCK | O_CLOEXEC))
 			continue;
-		}
 
-		if (fcntl(splice_pipe_pool[i][0][0], F_SETPIPE_SZ,
+		if (fcntl(splice_pipe_pool[i][0], F_SETPIPE_SZ,
 			  c->tcp.pipe_size)) {
-			trace("TCP (spliced): cannot set a->b pipe size to %lu",
-			      c->tcp.pipe_size);
-		}
-
-		if (fcntl(splice_pipe_pool[i][1][0], F_SETPIPE_SZ,
-			  c->tcp.pipe_size)) {
-			trace("TCP (spliced): cannot set b->a pipe size to %lu",
+			trace("TCP (spliced): cannot set pool pipe size to %lu",
 			      c->tcp.pipe_size);
 		}
 	}
-- 
@@ -58,7 +58,7 @@
 #include "tcp_conn.h"
 
 #define MAX_PIPE_SIZE			(8UL * 1024 * 1024)
-#define TCP_SPLICE_PIPE_POOL_SIZE	16
+#define TCP_SPLICE_PIPE_POOL_SIZE	32
 #define TCP_SPLICE_CONN_PRESSURE	30	/* % of conn_count */
 #define TCP_SPLICE_FILE_PRESSURE	30	/* % of c->nofile */
 
@@ -69,7 +69,7 @@ static int ns_sock_pool4	[TCP_SOCK_POOL_SIZE];
 static int ns_sock_pool6	[TCP_SOCK_POOL_SIZE];
 
 /* Pool of pre-opened pipes */
-static int splice_pipe_pool		[TCP_SPLICE_PIPE_POOL_SIZE][2][2];
+static int splice_pipe_pool		[TCP_SPLICE_PIPE_POOL_SIZE][2];
 
 #define CONN_V6(x)			(x->flags & SPLICE_V6)
 #define CONN_V4(x)			(!CONN_V6(x))
@@ -307,19 +307,16 @@ static int tcp_splice_connect_finish(const struct ctx *c,
 	conn->pipe_a_b[1] = conn->pipe_b_a[1] = -1;
 
 	for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) {
-		if (splice_pipe_pool[i][0][0] >= 0) {
-			SWAP(conn->pipe_a_b[0], splice_pipe_pool[i][0][0]);
-			SWAP(conn->pipe_a_b[1], splice_pipe_pool[i][0][1]);
-
-			SWAP(conn->pipe_b_a[0], splice_pipe_pool[i][1][0]);
-			SWAP(conn->pipe_b_a[1], splice_pipe_pool[i][1][1]);
+		if (splice_pipe_pool[i][0] >= 0) {
+			SWAP(conn->pipe_a_b[0], splice_pipe_pool[i][0]);
+			SWAP(conn->pipe_a_b[1], splice_pipe_pool[i][1]);
 			break;
 		}
 	}
-
 	if (conn->pipe_a_b[0] < 0) {
-		if (pipe2(conn->pipe_a_b, O_NONBLOCK | O_CLOEXEC) ||
-		    pipe2(conn->pipe_b_a, O_NONBLOCK | O_CLOEXEC)) {
+		if (pipe2(conn->pipe_a_b, O_NONBLOCK | O_CLOEXEC)) {
+			err("TCP (spliced): cannot create a->b pipe: %s",
+			    strerror(errno));
 			conn_flag(c, conn, CLOSING);
 			return -EIO;
 		}
@@ -328,6 +325,22 @@ static int tcp_splice_connect_finish(const struct ctx *c,
 			trace("TCP (spliced): cannot set a->b pipe size to %lu",
 			      c->tcp.pipe_size);
 		}
+	}
+
+	for (; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) {
+		if (splice_pipe_pool[i][0] >= 0) {
+			SWAP(conn->pipe_b_a[0], splice_pipe_pool[i][0]);
+			SWAP(conn->pipe_b_a[1], splice_pipe_pool[i][1]);
+			break;
+		}
+	}
+	if (conn->pipe_b_a[0] < 0) {
+		if (pipe2(conn->pipe_b_a, O_NONBLOCK | O_CLOEXEC)) {
+			err("TCP (spliced): cannot create b->a pipe: %s",
+			    strerror(errno));
+			conn_flag(c, conn, CLOSING);
+			return -EIO;
+		}
 
 		if (fcntl(conn->pipe_b_a[0], F_SETPIPE_SZ, c->tcp.pipe_size)) {
 			trace("TCP (spliced): cannot set b->a pipe size to %lu",
@@ -716,12 +729,12 @@ close:
  */
 static void tcp_set_pipe_size(struct ctx *c)
 {
-	int probe_pipe[TCP_SPLICE_PIPE_POOL_SIZE * 2][2], i, j;
+	int probe_pipe[TCP_SPLICE_PIPE_POOL_SIZE][2], i, j;
 
 	c->tcp.pipe_size = MAX_PIPE_SIZE;
 
 smaller:
-	for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE * 2; i++) {
+	for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) {
 		if (pipe2(probe_pipe[i], O_CLOEXEC)) {
 			i++;
 			break;
@@ -736,7 +749,7 @@ smaller:
 		close(probe_pipe[j][1]);
 	}
 
-	if (i == TCP_SPLICE_PIPE_POOL_SIZE * 2)
+	if (i == TCP_SPLICE_PIPE_POOL_SIZE)
 		return;
 
 	if (!(c->tcp.pipe_size /= 2)) {
@@ -756,25 +769,14 @@ static void tcp_splice_pipe_refill(const struct ctx *c)
 	int i;
 
 	for (i = 0; i < TCP_SPLICE_PIPE_POOL_SIZE; i++) {
-		if (splice_pipe_pool[i][0][0] >= 0)
+		if (splice_pipe_pool[i][0] >= 0)
 			break;
-		if (pipe2(splice_pipe_pool[i][0], O_NONBLOCK | O_CLOEXEC))
-			continue;
-		if (pipe2(splice_pipe_pool[i][1], O_NONBLOCK | O_CLOEXEC)) {
-			close(splice_pipe_pool[i][1][0]);
-			close(splice_pipe_pool[i][1][1]);
+		if (pipe2(splice_pipe_pool[i], O_NONBLOCK | O_CLOEXEC))
 			continue;
-		}
 
-		if (fcntl(splice_pipe_pool[i][0][0], F_SETPIPE_SZ,
+		if (fcntl(splice_pipe_pool[i][0], F_SETPIPE_SZ,
 			  c->tcp.pipe_size)) {
-			trace("TCP (spliced): cannot set a->b pipe size to %lu",
-			      c->tcp.pipe_size);
-		}
-
-		if (fcntl(splice_pipe_pool[i][1][0], F_SETPIPE_SZ,
-			  c->tcp.pipe_size)) {
-			trace("TCP (spliced): cannot set b->a pipe size to %lu",
+			trace("TCP (spliced): cannot set pool pipe size to %lu",
 			      c->tcp.pipe_size);
 		}
 	}
-- 
2.41.0


  parent reply	other threads:[~2023-10-12  1:51 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-12  1:51 [PATCH 00/11] tcp_splice: Better exploit symmetry between sides of connection David Gibson
2023-10-12  1:51 ` [PATCH 01/11] tcp_splice: Remove redundant tcp_splice_epoll_ctl() David Gibson
2023-10-12  1:51 ` [PATCH 02/11] tcp_splice: Correct error handling in tcp_splice_epoll_ctl() David Gibson
2023-10-12  1:51 ` [PATCH 03/11] tcp_splice: Don't handle EPOLL_CTL_DEL as part of tcp_splice_epoll_ctl() David Gibson
2023-11-03 16:20   ` Stefano Brivio
2023-11-04  5:56     ` David Gibson
2023-10-12  1:51 ` [PATCH 04/11] tcp_splice: Remove unnecessary forward declaration David Gibson
2023-10-12  1:51 ` [PATCH 05/11] tcp_splice: Avoid awkward temporaries in tcp_splice_epoll_ctl() David Gibson
2023-11-03 16:21   ` Stefano Brivio
2023-11-04  5:58     ` David Gibson
2023-10-12  1:51 ` David Gibson [this message]
2023-10-12  1:51 ` [PATCH 07/11] tcp_splice: Rename sides of connection from a/b to 0/1 David Gibson
2023-10-12  1:51 ` [PATCH 08/11] tcp_splice: Exploit side symmetry in tcp_splice_timer() David Gibson
2023-11-03 16:21   ` Stefano Brivio
2023-11-04  5:59     ` David Gibson
2023-10-12  1:51 ` [PATCH 09/11] tcp_splice: Exploit side symmetry in tcp_splice_connect_finish() David Gibson
2023-10-12  1:51 ` [PATCH 10/11] tcp_splice: Exploit side symmetry in tcp_splice_destroy() David Gibson
2023-11-03 16:22   ` Stefano Brivio
2023-11-06  2:39     ` David Gibson
2023-11-06 13:21       ` Stefano Brivio
2023-10-12  1:51 ` [PATCH 11/11] tcp_splice: Simplify selection of socket and pipe sides in socket handler David Gibson
2023-11-03 16:21   ` Stefano Brivio
2023-11-04  6:02     ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231012015114.2612066-7-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).