public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
* [PATCH v2 0/5] Cleanups to tcp socket pool handling
@ 2023-02-13 23:48 David Gibson
  2023-02-13 23:48 ` [PATCH v2 1/5] tcp: Make a helper to refill each socket pool David Gibson
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: David Gibson @ 2023-02-13 23:48 UTC (permalink / raw)
  To: passt-dev, Stefano Brivio; +Cc: David Gibson

This is a bit of a diversion from what I'm notionally working on at
the moment.  While thinking about what we'd need to use the
IP_TRANSPARENT socket option to broaden the cases where we can
"splice", I noticed some inelegancies in how we handle the pool of
pre-opened sockets in the TCP code, and well, here we are.

This makes a number of cleanups to the handling of these pools.  Most
notably, tcp_splice_connect() now has simpler semantics: it now always
runs in the init namespace, and is always given a pre-opened socket
(which could be in the guest ns).

Changes since v1:
 * Rebased
 * Improved wording of some commit messages

David Gibson (5):
  tcp: Make a helper to refill each socket pool
  tcp: Split init and ns cases for tcp_sock_refill()
  tcp: Move socket pool declarations around
  tcp: Split pool lookup from creating new sockets in
    tcp_conn_new_sock()
  tcp: Improve handling of fallback if socket pool is empty on new
    splice

 tcp.c        | 138 ++++++++++++++++++-------------------------------
 tcp.h        |   2 -
 tcp_conn.h   |  12 ++++-
 tcp_splice.c | 141 ++++++++++++++++++++++++++-------------------------
 4 files changed, 132 insertions(+), 161 deletions(-)

-- 
2.39.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 1/5] tcp: Make a helper to refill each socket pool
  2023-02-13 23:48 [PATCH v2 0/5] Cleanups to tcp socket pool handling David Gibson
@ 2023-02-13 23:48 ` David Gibson
  2023-02-13 23:48 ` [PATCH v2 2/5] tcp: Split init and ns cases for tcp_sock_refill() David Gibson
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: David Gibson @ 2023-02-13 23:48 UTC (permalink / raw)
  To: passt-dev, Stefano Brivio; +Cc: David Gibson

tcp_sock_refill() contains two near-identical loops to refill the IPv4
and IPv6 socket pools.  In addition, if we get an error on the IPv4
pool we exit early and won't attempt to refill the IPv6 pool.  At
least theoretically, these are independent from each other and there's
value to filling up either pool without the other.  So, there's no
strong reason to give up on one because the other failed.

Address both of these with a helper function 'tcp_sock_refill_pool()' to
refill a single given pool.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c | 63 +++++++++++++++++++++++++++++++----------------------------
 1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/tcp.c b/tcp.c
index 8424d8e..5d13244 100644
--- a/tcp.c
+++ b/tcp.c
@@ -3009,6 +3009,34 @@ static int tcp_ns_socks_init(void *arg)
 	return 0;
 }
 
+/**
+ * tcp_sock_refill_pool() - Refill one pool of pre-opened sockets
+ * @c:		Execution context
+ * @pool:	Pool of sockets to refill
+ * @af:		Address family to use
+ */
+static void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af)
+{
+	int i;
+
+	for (i = 0; i < TCP_SOCK_POOL_SIZE; i++) {
+		int *s = &pool[i];
+
+		if (*s >= 0)
+			break;
+
+		*s = socket(af, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP);
+		if (*s > SOCKET_MAX) {
+			close(*s);
+			*s = -1;
+			return;
+		}
+
+		if (*s >= 0)
+			tcp_sock_set_bufsize(c, *s);
+	}
+}
+
 /**
  * struct tcp_sock_refill_arg - Arguments for tcp_sock_refill()
  * @c:		Execution context
@@ -3028,7 +3056,7 @@ struct tcp_sock_refill_arg {
 static int tcp_sock_refill(void *arg)
 {
 	struct tcp_sock_refill_arg *a = (struct tcp_sock_refill_arg *)arg;
-	int i, *p4, *p6;
+	int *p4, *p6;
 
 	if (a->ns) {
 		ns_enter(a->c);
@@ -3039,36 +3067,11 @@ static int tcp_sock_refill(void *arg)
 		p6 = init_sock_pool6;
 	}
 
-	for (i = 0; a->c->ifi4 && i < TCP_SOCK_POOL_SIZE; i++, p4++) {
-		if (*p4 >= 0)
-			break;
-
-		*p4 = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP);
-		if (*p4 > SOCKET_MAX) {
-			close(*p4);
-			*p4 = -1;
-			return -EIO;
-		}
-
-		if (*p4 >= 0)
-			tcp_sock_set_bufsize(a->c, *p4);
-	}
-
-	for (i = 0; a->c->ifi6 && i < TCP_SOCK_POOL_SIZE; i++, p6++) {
-		if (*p6 >= 0)
-			break;
-
-		*p6 = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK,
-			     IPPROTO_TCP);
-		if (*p6 > SOCKET_MAX) {
-			close(*p6);
-			*p6 = -1;
-			return -EIO;
-		}
+	if (a->c->ifi4)
+		tcp_sock_refill_pool(a->c, p4, AF_INET);
 
-		if (*p6 >= 0)
-			tcp_sock_set_bufsize(a->c, *p6);
-	}
+	if (a->c->ifi6)
+		tcp_sock_refill_pool(a->c, p6, AF_INET6);
 
 	return 0;
 }
-- 
@@ -3009,6 +3009,34 @@ static int tcp_ns_socks_init(void *arg)
 	return 0;
 }
 
+/**
+ * tcp_sock_refill_pool() - Refill one pool of pre-opened sockets
+ * @c:		Execution context
+ * @pool:	Pool of sockets to refill
+ * @af:		Address family to use
+ */
+static void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af)
+{
+	int i;
+
+	for (i = 0; i < TCP_SOCK_POOL_SIZE; i++) {
+		int *s = &pool[i];
+
+		if (*s >= 0)
+			break;
+
+		*s = socket(af, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP);
+		if (*s > SOCKET_MAX) {
+			close(*s);
+			*s = -1;
+			return;
+		}
+
+		if (*s >= 0)
+			tcp_sock_set_bufsize(c, *s);
+	}
+}
+
 /**
  * struct tcp_sock_refill_arg - Arguments for tcp_sock_refill()
  * @c:		Execution context
@@ -3028,7 +3056,7 @@ struct tcp_sock_refill_arg {
 static int tcp_sock_refill(void *arg)
 {
 	struct tcp_sock_refill_arg *a = (struct tcp_sock_refill_arg *)arg;
-	int i, *p4, *p6;
+	int *p4, *p6;
 
 	if (a->ns) {
 		ns_enter(a->c);
@@ -3039,36 +3067,11 @@ static int tcp_sock_refill(void *arg)
 		p6 = init_sock_pool6;
 	}
 
-	for (i = 0; a->c->ifi4 && i < TCP_SOCK_POOL_SIZE; i++, p4++) {
-		if (*p4 >= 0)
-			break;
-
-		*p4 = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP);
-		if (*p4 > SOCKET_MAX) {
-			close(*p4);
-			*p4 = -1;
-			return -EIO;
-		}
-
-		if (*p4 >= 0)
-			tcp_sock_set_bufsize(a->c, *p4);
-	}
-
-	for (i = 0; a->c->ifi6 && i < TCP_SOCK_POOL_SIZE; i++, p6++) {
-		if (*p6 >= 0)
-			break;
-
-		*p6 = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK,
-			     IPPROTO_TCP);
-		if (*p6 > SOCKET_MAX) {
-			close(*p6);
-			*p6 = -1;
-			return -EIO;
-		}
+	if (a->c->ifi4)
+		tcp_sock_refill_pool(a->c, p4, AF_INET);
 
-		if (*p6 >= 0)
-			tcp_sock_set_bufsize(a->c, *p6);
-	}
+	if (a->c->ifi6)
+		tcp_sock_refill_pool(a->c, p6, AF_INET6);
 
 	return 0;
 }
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 2/5] tcp: Split init and ns cases for tcp_sock_refill()
  2023-02-13 23:48 [PATCH v2 0/5] Cleanups to tcp socket pool handling David Gibson
  2023-02-13 23:48 ` [PATCH v2 1/5] tcp: Make a helper to refill each socket pool David Gibson
@ 2023-02-13 23:48 ` David Gibson
  2023-02-13 23:48 ` [PATCH v2 3/5] tcp: Move socket pool declarations around David Gibson
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: David Gibson @ 2023-02-13 23:48 UTC (permalink / raw)
  To: passt-dev, Stefano Brivio; +Cc: David Gibson

With the creation of the tcp_sock_refill_pool() helper, the ns==true and
ns==false paths for tcp_sock_refill() now have almost nothing in common.
Split the two versions into tcp_sock_refill_init() and tcp_sock_refill_ns()
functions.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c | 53 +++++++++++++++++++++--------------------------------
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/tcp.c b/tcp.c
index 5d13244..1d1b4ee 100644
--- a/tcp.c
+++ b/tcp.c
@@ -3038,40 +3038,33 @@ static void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af)
 }
 
 /**
- * struct tcp_sock_refill_arg - Arguments for tcp_sock_refill()
+ * tcp_sock_refill_init() - Refill pools of pre-opened sockets in init ns
  * @c:		Execution context
- * @ns:		Set to refill pool of sockets created in namespace
  */
-struct tcp_sock_refill_arg {
-	struct ctx *c;
-	int ns;
-};
+static void tcp_sock_refill_init(const struct ctx *c)
+{
+	if (c->ifi4)
+		tcp_sock_refill_pool(c, init_sock_pool4, AF_INET);
+	if (c->ifi6)
+		tcp_sock_refill_pool(c, init_sock_pool6, AF_INET6);
+}
 
 /**
- * tcp_sock_refill() - Refill pool of pre-opened sockets
- * @arg:	See @tcp_sock_refill_arg
+ * tcp_sock_refill_ns() - Refill pools of pre-opened sockets in namespace
+ * @arg:	Execution context cast to void *
  *
  * Return: 0
  */
-static int tcp_sock_refill(void *arg)
+static int tcp_sock_refill_ns(void *arg)
 {
-	struct tcp_sock_refill_arg *a = (struct tcp_sock_refill_arg *)arg;
-	int *p4, *p6;
+	const struct ctx *c = (const struct ctx *)arg;
 
-	if (a->ns) {
-		ns_enter(a->c);
-		p4 = ns_sock_pool4;
-		p6 = ns_sock_pool6;
-	} else {
-		p4 = init_sock_pool4;
-		p6 = init_sock_pool6;
-	}
-
-	if (a->c->ifi4)
-		tcp_sock_refill_pool(a->c, p4, AF_INET);
+	ns_enter(c);
 
-	if (a->c->ifi6)
-		tcp_sock_refill_pool(a->c, p6, AF_INET6);
+	if (c->ifi4)
+		tcp_sock_refill_pool(c, ns_sock_pool4, AF_INET);
+	if (c->ifi6)
+		tcp_sock_refill_pool(c, ns_sock_pool6, AF_INET6);
 
 	return 0;
 }
@@ -3084,7 +3077,6 @@ static int tcp_sock_refill(void *arg)
  */
 int tcp_init(struct ctx *c)
 {
-	struct tcp_sock_refill_arg refill_arg = { c, 0 };
 	int i;
 #ifndef HAS_GETRANDOM
 	int dev_random = open("/dev/random", O_RDONLY);
@@ -3130,15 +3122,14 @@ int tcp_init(struct ctx *c)
 	memset(tcp_sock_init_ext,	0xff,	sizeof(tcp_sock_init_ext));
 	memset(tcp_sock_ns,		0xff,	sizeof(tcp_sock_ns));
 
-	tcp_sock_refill(&refill_arg);
+	tcp_sock_refill_init(c);
 
 	if (c->mode == MODE_PASTA) {
 		tcp_splice_init(c);
 
 		NS_CALL(tcp_ns_socks_init, c);
 
-		refill_arg.ns = 1;
-		NS_CALL(tcp_sock_refill, &refill_arg);
+		NS_CALL(tcp_sock_refill_ns, c);
 	}
 
 	return 0;
@@ -3258,7 +3249,6 @@ static int tcp_port_rebind(void *arg)
  */
 void tcp_timer(struct ctx *c, const struct timespec *ts)
 {
-	struct tcp_sock_refill_arg refill_arg = { c, 0 };
 	union tcp_conn *conn;
 
 	(void)ts;
@@ -3291,12 +3281,11 @@ void tcp_timer(struct ctx *c, const struct timespec *ts)
 		}
 	}
 
-	tcp_sock_refill(&refill_arg);
+	tcp_sock_refill_init(c);
 	if (c->mode == MODE_PASTA) {
-		refill_arg.ns = 1;
 		if ((c->ifi4 && ns_sock_pool4[TCP_SOCK_POOL_TSH] < 0) ||
 		    (c->ifi6 && ns_sock_pool6[TCP_SOCK_POOL_TSH] < 0))
-			NS_CALL(tcp_sock_refill, &refill_arg);
+			NS_CALL(tcp_sock_refill_ns, c);
 
 		tcp_splice_pipe_refill(c);
 	}
-- 
@@ -3038,40 +3038,33 @@ static void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af)
 }
 
 /**
- * struct tcp_sock_refill_arg - Arguments for tcp_sock_refill()
+ * tcp_sock_refill_init() - Refill pools of pre-opened sockets in init ns
  * @c:		Execution context
- * @ns:		Set to refill pool of sockets created in namespace
  */
-struct tcp_sock_refill_arg {
-	struct ctx *c;
-	int ns;
-};
+static void tcp_sock_refill_init(const struct ctx *c)
+{
+	if (c->ifi4)
+		tcp_sock_refill_pool(c, init_sock_pool4, AF_INET);
+	if (c->ifi6)
+		tcp_sock_refill_pool(c, init_sock_pool6, AF_INET6);
+}
 
 /**
- * tcp_sock_refill() - Refill pool of pre-opened sockets
- * @arg:	See @tcp_sock_refill_arg
+ * tcp_sock_refill_ns() - Refill pools of pre-opened sockets in namespace
+ * @arg:	Execution context cast to void *
  *
  * Return: 0
  */
-static int tcp_sock_refill(void *arg)
+static int tcp_sock_refill_ns(void *arg)
 {
-	struct tcp_sock_refill_arg *a = (struct tcp_sock_refill_arg *)arg;
-	int *p4, *p6;
+	const struct ctx *c = (const struct ctx *)arg;
 
-	if (a->ns) {
-		ns_enter(a->c);
-		p4 = ns_sock_pool4;
-		p6 = ns_sock_pool6;
-	} else {
-		p4 = init_sock_pool4;
-		p6 = init_sock_pool6;
-	}
-
-	if (a->c->ifi4)
-		tcp_sock_refill_pool(a->c, p4, AF_INET);
+	ns_enter(c);
 
-	if (a->c->ifi6)
-		tcp_sock_refill_pool(a->c, p6, AF_INET6);
+	if (c->ifi4)
+		tcp_sock_refill_pool(c, ns_sock_pool4, AF_INET);
+	if (c->ifi6)
+		tcp_sock_refill_pool(c, ns_sock_pool6, AF_INET6);
 
 	return 0;
 }
@@ -3084,7 +3077,6 @@ static int tcp_sock_refill(void *arg)
  */
 int tcp_init(struct ctx *c)
 {
-	struct tcp_sock_refill_arg refill_arg = { c, 0 };
 	int i;
 #ifndef HAS_GETRANDOM
 	int dev_random = open("/dev/random", O_RDONLY);
@@ -3130,15 +3122,14 @@ int tcp_init(struct ctx *c)
 	memset(tcp_sock_init_ext,	0xff,	sizeof(tcp_sock_init_ext));
 	memset(tcp_sock_ns,		0xff,	sizeof(tcp_sock_ns));
 
-	tcp_sock_refill(&refill_arg);
+	tcp_sock_refill_init(c);
 
 	if (c->mode == MODE_PASTA) {
 		tcp_splice_init(c);
 
 		NS_CALL(tcp_ns_socks_init, c);
 
-		refill_arg.ns = 1;
-		NS_CALL(tcp_sock_refill, &refill_arg);
+		NS_CALL(tcp_sock_refill_ns, c);
 	}
 
 	return 0;
@@ -3258,7 +3249,6 @@ static int tcp_port_rebind(void *arg)
  */
 void tcp_timer(struct ctx *c, const struct timespec *ts)
 {
-	struct tcp_sock_refill_arg refill_arg = { c, 0 };
 	union tcp_conn *conn;
 
 	(void)ts;
@@ -3291,12 +3281,11 @@ void tcp_timer(struct ctx *c, const struct timespec *ts)
 		}
 	}
 
-	tcp_sock_refill(&refill_arg);
+	tcp_sock_refill_init(c);
 	if (c->mode == MODE_PASTA) {
-		refill_arg.ns = 1;
 		if ((c->ifi4 && ns_sock_pool4[TCP_SOCK_POOL_TSH] < 0) ||
 		    (c->ifi6 && ns_sock_pool6[TCP_SOCK_POOL_TSH] < 0))
-			NS_CALL(tcp_sock_refill, &refill_arg);
+			NS_CALL(tcp_sock_refill_ns, c);
 
 		tcp_splice_pipe_refill(c);
 	}
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 3/5] tcp: Move socket pool declarations around
  2023-02-13 23:48 [PATCH v2 0/5] Cleanups to tcp socket pool handling David Gibson
  2023-02-13 23:48 ` [PATCH v2 1/5] tcp: Make a helper to refill each socket pool David Gibson
  2023-02-13 23:48 ` [PATCH v2 2/5] tcp: Split init and ns cases for tcp_sock_refill() David Gibson
@ 2023-02-13 23:48 ` David Gibson
  2023-02-13 23:48 ` [PATCH v2 4/5] tcp: Split pool lookup from creating new sockets in tcp_conn_new_sock() David Gibson
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: David Gibson @ 2023-02-13 23:48 UTC (permalink / raw)
  To: passt-dev, Stefano Brivio; +Cc: David Gibson

tcp_splice.c has some explicit extern declarations to access the
socket pools.  This is pretty dangerous - if we changed the type of
these variables in tcp.c, we'd have tcp.c and tcp_splice.c using the
same memory in different ways with no compiler error.  So, move the
extern declarations to tcp_conn.h so they're visible to both tcp.c and
tcp_splice.c, but not the rest of pasta.

In fact the pools for the guest namespace are necessarily only used by
tcp_splice.c - we have no sockets on the guest side if we're not
splicing.  So move those declarations and the functions that deal
exclusively with them to tcp_splice.c

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c        | 41 ++++-------------------------------------
 tcp.h        |  2 --
 tcp_conn.h   | 10 ++++++++--
 tcp_splice.c | 50 +++++++++++++++++++++++++++++++++++++++++++-------
 4 files changed, 55 insertions(+), 48 deletions(-)

diff --git a/tcp.c b/tcp.c
index 1d1b4ee..2f40d62 100644
--- a/tcp.c
+++ b/tcp.c
@@ -369,8 +369,6 @@ struct tcp6_l2_head {	/* For MSS6 macro: keep in sync with tcp6_l2_buf_t */
 #define FIN_TIMEOUT			60
 #define ACT_TIMEOUT			7200
 
-#define TCP_SOCK_POOL_TSH		16 /* Refill in ns if > x used */
-
 #define LOW_RTT_TABLE_SIZE		8
 #define LOW_RTT_THRESHOLD		10 /* us */
 
@@ -594,11 +592,9 @@ static inline struct tcp_tap_conn *conn_at_idx(int index)
 /* Table for lookup from remote address, local port, remote port */
 static struct tcp_tap_conn *tc_hash[TCP_HASH_TABLE_SIZE];
 
-/* Pools for pre-opened sockets */
+/* Pools for pre-opened sockets (in init) */
 int init_sock_pool4		[TCP_SOCK_POOL_SIZE];
 int init_sock_pool6		[TCP_SOCK_POOL_SIZE];
-int ns_sock_pool4		[TCP_SOCK_POOL_SIZE];
-int ns_sock_pool6		[TCP_SOCK_POOL_SIZE];
 
 /**
  * tcp_conn_epoll_events() - epoll events mask for given connection state
@@ -3015,7 +3011,7 @@ static int tcp_ns_socks_init(void *arg)
  * @pool:	Pool of sockets to refill
  * @af:		Address family to use
  */
-static void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af)
+void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af)
 {
 	int i;
 
@@ -3049,26 +3045,6 @@ static void tcp_sock_refill_init(const struct ctx *c)
 		tcp_sock_refill_pool(c, init_sock_pool6, AF_INET6);
 }
 
-/**
- * tcp_sock_refill_ns() - Refill pools of pre-opened sockets in namespace
- * @arg:	Execution context cast to void *
- *
- * Return: 0
- */
-static int tcp_sock_refill_ns(void *arg)
-{
-	const struct ctx *c = (const struct ctx *)arg;
-
-	ns_enter(c);
-
-	if (c->ifi4)
-		tcp_sock_refill_pool(c, ns_sock_pool4, AF_INET);
-	if (c->ifi6)
-		tcp_sock_refill_pool(c, ns_sock_pool6, AF_INET6);
-
-	return 0;
-}
-
 /**
  * tcp_init() - Get initial sequence, hash secret, initialise per-socket data
  * @c:		Execution context
@@ -3117,8 +3093,6 @@ int tcp_init(struct ctx *c)
 
 	memset(init_sock_pool4,		0xff,	sizeof(init_sock_pool4));
 	memset(init_sock_pool6,		0xff,	sizeof(init_sock_pool6));
-	memset(ns_sock_pool4,		0xff,	sizeof(ns_sock_pool4));
-	memset(ns_sock_pool6,		0xff,	sizeof(ns_sock_pool6));
 	memset(tcp_sock_init_ext,	0xff,	sizeof(tcp_sock_init_ext));
 	memset(tcp_sock_ns,		0xff,	sizeof(tcp_sock_ns));
 
@@ -3128,8 +3102,6 @@ int tcp_init(struct ctx *c)
 		tcp_splice_init(c);
 
 		NS_CALL(tcp_ns_socks_init, c);
-
-		NS_CALL(tcp_sock_refill_ns, c);
 	}
 
 	return 0;
@@ -3282,11 +3254,6 @@ void tcp_timer(struct ctx *c, const struct timespec *ts)
 	}
 
 	tcp_sock_refill_init(c);
-	if (c->mode == MODE_PASTA) {
-		if ((c->ifi4 && ns_sock_pool4[TCP_SOCK_POOL_TSH] < 0) ||
-		    (c->ifi6 && ns_sock_pool6[TCP_SOCK_POOL_TSH] < 0))
-			NS_CALL(tcp_sock_refill_ns, c);
-
-		tcp_splice_pipe_refill(c);
-	}
+	if (c->mode == MODE_PASTA)
+		tcp_splice_refill(c);
 }
diff --git a/tcp.h b/tcp.h
index 739b451..236038f 100644
--- a/tcp.h
+++ b/tcp.h
@@ -11,8 +11,6 @@
 #define TCP_CONN_INDEX_BITS		17	/* 128k */
 #define TCP_MAX_CONNS			(1 << TCP_CONN_INDEX_BITS)
 
-#define TCP_SOCK_POOL_SIZE		32
-
 struct ctx;
 
 void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
diff --git a/tcp_conn.h b/tcp_conn.h
index 70f4a7c..9951b0a 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -182,11 +182,17 @@ union tcp_conn {
 /* TCP connections */
 extern union tcp_conn tc[];
 
+/* Socket pools */
+#define TCP_SOCK_POOL_SIZE		32
+
+extern int init_sock_pool4	[TCP_SOCK_POOL_SIZE];
+extern int init_sock_pool6	[TCP_SOCK_POOL_SIZE];
+
 void tcp_splice_conn_update(struct ctx *c, struct tcp_splice_conn *new);
 void tcp_table_compact(struct ctx *c, union tcp_conn *hole);
 void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union);
 void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union);
-void tcp_splice_pipe_refill(const struct ctx *c);
-
+void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af);
+void tcp_splice_refill(const struct ctx *c);
 
 #endif /* TCP_CONN_H */
diff --git a/tcp_splice.c b/tcp_splice.c
index 1d624f1..09f0e3e 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -60,11 +60,11 @@
 #define TCP_SPLICE_CONN_PRESSURE	30	/* % of conn_count */
 #define TCP_SPLICE_FILE_PRESSURE	30	/* % of c->nofile */
 
-/* From tcp.c */
-extern int init_sock_pool4		[TCP_SOCK_POOL_SIZE];
-extern int init_sock_pool6		[TCP_SOCK_POOL_SIZE];
-extern int ns_sock_pool4		[TCP_SOCK_POOL_SIZE];
-extern int ns_sock_pool6		[TCP_SOCK_POOL_SIZE];
+/* Pools for pre-opened sockets (in namespace) */
+#define TCP_SOCK_POOL_TSH		16 /* Refill in ns if > x used */
+
+static int ns_sock_pool4	[TCP_SOCK_POOL_SIZE];
+static int ns_sock_pool6	[TCP_SOCK_POOL_SIZE];
 
 /* Pool of pre-opened pipes */
 static int splice_pipe_pool		[TCP_SPLICE_PIPE_POOL_SIZE][2][2];
@@ -782,7 +782,7 @@ smaller:
  * tcp_splice_pipe_refill() - Refill pool of pre-opened pipes
  * @c:		Execution context
  */
-void tcp_splice_pipe_refill(const struct ctx *c)
+static void tcp_splice_pipe_refill(const struct ctx *c)
 {
 	int i;
 
@@ -811,6 +811,39 @@ void tcp_splice_pipe_refill(const struct ctx *c)
 	}
 }
 
+/**
+ * tcp_sock_refill_ns() - Refill pools of pre-opened sockets in namespace
+ * @arg:	Execution context cast to void *
+ *
+ * Return: 0
+ */
+static int tcp_sock_refill_ns(void *arg)
+{
+	const struct ctx *c = (const struct ctx *)arg;
+
+	ns_enter(c);
+
+	if (c->ifi4)
+		tcp_sock_refill_pool(c, ns_sock_pool4, AF_INET);
+	if (c->ifi6)
+		tcp_sock_refill_pool(c, ns_sock_pool6, AF_INET6);
+
+	return 0;
+}
+
+/**
+ * tcp_splice_refill() - Refill pools of resources needed for splicing
+ * @c:		Execution context
+ */
+void tcp_splice_refill(const struct ctx *c)
+{
+	if ((c->ifi4 && ns_sock_pool4[TCP_SOCK_POOL_TSH] < 0) ||
+	    (c->ifi6 && ns_sock_pool6[TCP_SOCK_POOL_TSH] < 0))
+		NS_CALL(tcp_sock_refill_ns, c);
+
+	tcp_splice_pipe_refill(c);
+}
+
 /**
  * tcp_splice_init() - Initialise pipe pool and size
  * @c:		Execution context
@@ -819,7 +852,10 @@ void tcp_splice_init(struct ctx *c)
 {
 	memset(splice_pipe_pool, 0xff, sizeof(splice_pipe_pool));
 	tcp_set_pipe_size(c);
-	tcp_splice_pipe_refill(c);
+
+	memset(&ns_sock_pool4,		0xff,	sizeof(ns_sock_pool4));
+	memset(&ns_sock_pool6,		0xff,	sizeof(ns_sock_pool6));
+	NS_CALL(tcp_sock_refill_ns, c);
 }
 
 /**
-- 
@@ -60,11 +60,11 @@
 #define TCP_SPLICE_CONN_PRESSURE	30	/* % of conn_count */
 #define TCP_SPLICE_FILE_PRESSURE	30	/* % of c->nofile */
 
-/* From tcp.c */
-extern int init_sock_pool4		[TCP_SOCK_POOL_SIZE];
-extern int init_sock_pool6		[TCP_SOCK_POOL_SIZE];
-extern int ns_sock_pool4		[TCP_SOCK_POOL_SIZE];
-extern int ns_sock_pool6		[TCP_SOCK_POOL_SIZE];
+/* Pools for pre-opened sockets (in namespace) */
+#define TCP_SOCK_POOL_TSH		16 /* Refill in ns if > x used */
+
+static int ns_sock_pool4	[TCP_SOCK_POOL_SIZE];
+static int ns_sock_pool6	[TCP_SOCK_POOL_SIZE];
 
 /* Pool of pre-opened pipes */
 static int splice_pipe_pool		[TCP_SPLICE_PIPE_POOL_SIZE][2][2];
@@ -782,7 +782,7 @@ smaller:
  * tcp_splice_pipe_refill() - Refill pool of pre-opened pipes
  * @c:		Execution context
  */
-void tcp_splice_pipe_refill(const struct ctx *c)
+static void tcp_splice_pipe_refill(const struct ctx *c)
 {
 	int i;
 
@@ -811,6 +811,39 @@ void tcp_splice_pipe_refill(const struct ctx *c)
 	}
 }
 
+/**
+ * tcp_sock_refill_ns() - Refill pools of pre-opened sockets in namespace
+ * @arg:	Execution context cast to void *
+ *
+ * Return: 0
+ */
+static int tcp_sock_refill_ns(void *arg)
+{
+	const struct ctx *c = (const struct ctx *)arg;
+
+	ns_enter(c);
+
+	if (c->ifi4)
+		tcp_sock_refill_pool(c, ns_sock_pool4, AF_INET);
+	if (c->ifi6)
+		tcp_sock_refill_pool(c, ns_sock_pool6, AF_INET6);
+
+	return 0;
+}
+
+/**
+ * tcp_splice_refill() - Refill pools of resources needed for splicing
+ * @c:		Execution context
+ */
+void tcp_splice_refill(const struct ctx *c)
+{
+	if ((c->ifi4 && ns_sock_pool4[TCP_SOCK_POOL_TSH] < 0) ||
+	    (c->ifi6 && ns_sock_pool6[TCP_SOCK_POOL_TSH] < 0))
+		NS_CALL(tcp_sock_refill_ns, c);
+
+	tcp_splice_pipe_refill(c);
+}
+
 /**
  * tcp_splice_init() - Initialise pipe pool and size
  * @c:		Execution context
@@ -819,7 +852,10 @@ void tcp_splice_init(struct ctx *c)
 {
 	memset(splice_pipe_pool, 0xff, sizeof(splice_pipe_pool));
 	tcp_set_pipe_size(c);
-	tcp_splice_pipe_refill(c);
+
+	memset(&ns_sock_pool4,		0xff,	sizeof(ns_sock_pool4));
+	memset(&ns_sock_pool6,		0xff,	sizeof(ns_sock_pool6));
+	NS_CALL(tcp_sock_refill_ns, c);
 }
 
 /**
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 4/5] tcp: Split pool lookup from creating new sockets in tcp_conn_new_sock()
  2023-02-13 23:48 [PATCH v2 0/5] Cleanups to tcp socket pool handling David Gibson
                   ` (2 preceding siblings ...)
  2023-02-13 23:48 ` [PATCH v2 3/5] tcp: Move socket pool declarations around David Gibson
@ 2023-02-13 23:48 ` David Gibson
  2023-02-13 23:48 ` [PATCH v2 5/5] tcp: Improve handling of fallback if socket pool is empty on new splice David Gibson
  2023-02-14 16:27 ` [PATCH v2 0/5] Cleanups to tcp socket pool handling Stefano Brivio
  5 siblings, 0 replies; 7+ messages in thread
From: David Gibson @ 2023-02-13 23:48 UTC (permalink / raw)
  To: passt-dev, Stefano Brivio; +Cc: David Gibson

tcp_conn_new_sock() first looks for a socket in a pre-opened pool, then if
that's empty creates a new socket in the init namespace.  Both parts of
this are duplicated in other places: the pool lookup logic is duplicated in
tcp_splice_new(), and the socket opening logic is duplicated in
tcp_sock_refill_pool().

Split the function into separate parts so we can remove both these
duplications.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c        | 53 +++++++++++++++++++++++++++-------------------------
 tcp_conn.h   |  1 +
 tcp_splice.c |  8 ++------
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/tcp.c b/tcp.c
index 2f40d62..a0e2e34 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1858,24 +1858,35 @@ static void tcp_seq_init(const struct ctx *c, struct tcp_tap_conn *conn,
 }
 
 /**
- * tcp_conn_new_sock() - Get socket for new connection from pool or make new one
- * @c:		Execution context
- * @af:		Address family
+ * tcp_conn_pool_sock() - Get socket for new connection from pre-opened pool
+ * @pool:	Pool of pre-opened sockets
  *
- * Return: socket number if available, negative code if socket creation failed
+ * Return: socket number if available, negative code if pool is empty
  */
-static int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
+int tcp_conn_pool_sock(int pool[])
 {
-	int *p = af == AF_INET6 ? init_sock_pool6 : init_sock_pool4, i, s = -1;
+	int s = -1, i;
 
-	for (i = 0; i < TCP_SOCK_POOL_SIZE; i++, p++) {
-		SWAP(s, *p);
+	for (i = 0; i < TCP_SOCK_POOL_SIZE; i++) {
+		SWAP(s, pool[i]);
 		if (s >= 0)
-			break;
+			return s;
 	}
+	return -1;
+}
 
-	if (s < 0)
-		s = socket(af, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP);
+/**
+ * tcp_conn_new_sock() - Open and prepare new socket for connection
+ * @c:		Execution context
+ * @af:		Address family
+ *
+ * Return: socket number on success, negative code if socket creation failed
+ */
+static int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
+{
+	int s;
+
+	s = socket(af, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP);
 
 	if (s > SOCKET_MAX) {
 		close(s);
@@ -1936,6 +1947,7 @@ static void tcp_conn_from_tap(struct ctx *c, int af, const void *addr,
 			      const struct tcphdr *th, const char *opts,
 			      size_t optlen, const struct timespec *now)
 {
+	int *pool = af == AF_INET6 ? init_sock_pool6 : init_sock_pool4;
 	struct sockaddr_in addr4 = {
 		.sin_family = AF_INET,
 		.sin_port = th->dest,
@@ -1954,8 +1966,9 @@ static void tcp_conn_from_tap(struct ctx *c, int af, const void *addr,
 	if (c->tcp.conn_count >= TCP_MAX_CONNS)
 		return;
 
-	if ((s = tcp_conn_new_sock(c, af)) < 0)
-		return;
+	if ((s = tcp_conn_pool_sock(pool)) < 0)
+		if ((s = tcp_conn_new_sock(c, af)) < 0)
+			return;
 
 	if (!c->no_map_gw) {
 		if (af == AF_INET && IN4_ARE_ADDR_EQUAL(addr, &c->ip4.gw))
@@ -3016,20 +3029,10 @@ void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af)
 	int i;
 
 	for (i = 0; i < TCP_SOCK_POOL_SIZE; i++) {
-		int *s = &pool[i];
-
-		if (*s >= 0)
+		if (pool[i] >= 0)
 			break;
 
-		*s = socket(af, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP);
-		if (*s > SOCKET_MAX) {
-			close(*s);
-			*s = -1;
-			return;
-		}
-
-		if (*s >= 0)
-			tcp_sock_set_bufsize(c, *s);
+		pool[i] = tcp_conn_new_sock(c, af);
 	}
 }
 
diff --git a/tcp_conn.h b/tcp_conn.h
index 9951b0a..c807e8b 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -192,6 +192,7 @@ void tcp_splice_conn_update(struct ctx *c, struct tcp_splice_conn *new);
 void tcp_table_compact(struct ctx *c, union tcp_conn *hole);
 void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union);
 void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union);
+int tcp_conn_pool_sock(int pool[]);
 void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af);
 void tcp_splice_refill(const struct ctx *c);
 
diff --git a/tcp_splice.c b/tcp_splice.c
index 09f0e3e..3bf6368 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -451,18 +451,14 @@ static int tcp_splice_connect_ns(void *arg)
 static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
 			  in_port_t port, int outbound)
 {
-	int *p, i, s = -1;
+	int *p, s = -1;
 
 	if (outbound)
 		p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
 	else
 		p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
 
-	for (i = 0; i < TCP_SOCK_POOL_SIZE; i++, p++) {
-		SWAP(s, *p);
-		if (s >= 0)
-			break;
-	}
+	s = tcp_conn_pool_sock(p);
 
 	/* No socket available in namespace: create a new one for connect() */
 	if (s < 0 && !outbound) {
-- 
@@ -451,18 +451,14 @@ static int tcp_splice_connect_ns(void *arg)
 static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
 			  in_port_t port, int outbound)
 {
-	int *p, i, s = -1;
+	int *p, s = -1;
 
 	if (outbound)
 		p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
 	else
 		p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
 
-	for (i = 0; i < TCP_SOCK_POOL_SIZE; i++, p++) {
-		SWAP(s, *p);
-		if (s >= 0)
-			break;
-	}
+	s = tcp_conn_pool_sock(p);
 
 	/* No socket available in namespace: create a new one for connect() */
 	if (s < 0 && !outbound) {
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 5/5] tcp: Improve handling of fallback if socket pool is empty on new splice
  2023-02-13 23:48 [PATCH v2 0/5] Cleanups to tcp socket pool handling David Gibson
                   ` (3 preceding siblings ...)
  2023-02-13 23:48 ` [PATCH v2 4/5] tcp: Split pool lookup from creating new sockets in tcp_conn_new_sock() David Gibson
@ 2023-02-13 23:48 ` David Gibson
  2023-02-14 16:27 ` [PATCH v2 0/5] Cleanups to tcp socket pool handling Stefano Brivio
  5 siblings, 0 replies; 7+ messages in thread
From: David Gibson @ 2023-02-13 23:48 UTC (permalink / raw)
  To: passt-dev, Stefano Brivio; +Cc: David Gibson

When creating a new spliced connection, we need to get a socket in the
other ns from the originating one.  To avoid excessive ns switches we
usually get these from a pool refilled on a timer.  However, if the pool
runs out we need a fallback.  Currently that's done by passing -1 as the
socket to tcp_splice_connnect() and running it in the target ns.

This means that tcp_splice_connect() itself needs to have different cases
depending on whether it's given an existing socket or not, which is
a separate concern from what it's mostly doing.  We change it to require
a suitable open socket to be passed in, and ensuring in the caller that we
have one.

This requires adding the fallback paths to the caller, tcp_splice_new().
We use slightly different approaches for a socket in the init ns versus the
guest ns.

This also means that we no longer need to run tcp_splice_connect() itself
in the guest ns, which allows us to remove a bunch of boilerplate code.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 tcp.c        |  2 +-
 tcp_conn.h   |  1 +
 tcp_splice.c | 89 ++++++++++++++++++----------------------------------
 3 files changed, 32 insertions(+), 60 deletions(-)

diff --git a/tcp.c b/tcp.c
index a0e2e34..f028e01 100644
--- a/tcp.c
+++ b/tcp.c
@@ -1882,7 +1882,7 @@ int tcp_conn_pool_sock(int pool[])
  *
  * Return: socket number on success, negative code if socket creation failed
  */
-static int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
+int tcp_conn_new_sock(const struct ctx *c, sa_family_t af)
 {
 	int s;
 
diff --git a/tcp_conn.h b/tcp_conn.h
index c807e8b..a499f34 100644
--- a/tcp_conn.h
+++ b/tcp_conn.h
@@ -193,6 +193,7 @@ void tcp_table_compact(struct ctx *c, union tcp_conn *hole);
 void tcp_splice_destroy(struct ctx *c, union tcp_conn *conn_union);
 void tcp_splice_timer(struct ctx *c, union tcp_conn *conn_union);
 int tcp_conn_pool_sock(int pool[]);
+int tcp_conn_new_sock(const struct ctx *c, sa_family_t af);
 void tcp_sock_refill_pool(const struct ctx *c, int pool[], int af);
 void tcp_splice_refill(const struct ctx *c);
 
diff --git a/tcp_splice.c b/tcp_splice.c
index 3bf6368..84f855e 100644
--- a/tcp_splice.c
+++ b/tcp_splice.c
@@ -87,6 +87,9 @@ static const char *tcp_splice_flag_str[] __attribute((__unused__)) = {
 	"RCVLOWAT_ACT_B", "CLOSING",
 };
 
+/* Forward declaration */
+static int tcp_sock_refill_ns(void *arg);
+
 /**
  * tcp_splice_conn_epoll_events() - epoll events masks for given state
  * @events:	Connection event flags
@@ -347,12 +350,8 @@ static int tcp_splice_connect_finish(const struct ctx *c,
  * Return: 0 for connect() succeeded or in progress, negative value on error
  */
 static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
-			      int s, in_port_t port)
+			      int sock_conn, in_port_t port)
 {
-	int sock_conn = (s >= 0) ? s : socket(CONN_V6(conn) ? AF_INET6 :
-							      AF_INET,
-					      SOCK_STREAM | SOCK_NONBLOCK,
-					      IPPROTO_TCP);
 	struct sockaddr_in6 addr6 = {
 		.sin6_family = AF_INET6,
 		.sin6_port = htons(port),
@@ -366,19 +365,8 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
 	const struct sockaddr *sa;
 	socklen_t sl;
 
-	if (sock_conn < 0)
-		return -errno;
-
-	if (sock_conn > SOCKET_MAX) {
-		close(sock_conn);
-		return -EIO;
-	}
-
 	conn->b = sock_conn;
 
-	if (s < 0)
-		tcp_sock_set_bufsize(c, conn->b);
-
 	if (setsockopt(conn->b, SOL_TCP, TCP_QUICKACK,
 		       &((int){ 1 }), sizeof(int))) {
 		trace("TCP (spliced): failed to set TCP_QUICKACK on socket %i",
@@ -409,36 +397,6 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
 	return 0;
 }
 
-/**
- * struct tcp_splice_connect_ns_arg - Arguments for tcp_splice_connect_ns()
- * @c:		Execution context
- * @conn:	Accepted inbound connection
- * @port:	Destination port, host order
- * @ret:	Return value of tcp_splice_connect_ns()
- */
-struct tcp_splice_connect_ns_arg {
-	const struct ctx *c;
-	struct tcp_splice_conn *conn;
-	in_port_t port;
-	int ret;
-};
-
-/**
- * tcp_splice_connect_ns() - Enter namespace and call tcp_splice_connect()
- * @arg:	See struct tcp_splice_connect_ns_arg
- *
- * Return: 0
- */
-static int tcp_splice_connect_ns(void *arg)
-{
-	struct tcp_splice_connect_ns_arg *a;
-
-	a = (struct tcp_splice_connect_ns_arg *)arg;
-	ns_enter(a->c);
-	a->ret = tcp_splice_connect(a->c, a->conn, -1, a->port);
-	return 0;
-}
-
 /**
  * tcp_splice_new() - Handle new spliced connection
  * @c:		Execution context
@@ -451,24 +409,37 @@ static int tcp_splice_connect_ns(void *arg)
 static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
 			  in_port_t port, int outbound)
 {
-	int *p, s = -1;
-
-	if (outbound)
-		p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
-	else
-		p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
+	int s = -1;
+
+	/* If the pool is empty we take slightly different approaches
+	 * for init or ns sockets.  For init sockets we just open a
+	 * new one without refilling the pool to keep latency down.
+	 * For ns sockets, we're going to incur the latency of
+	 * entering the ns anyway, so we might as well refill the
+	 * pool.
+	 */
+	if (outbound) {
+		int *p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
+		int af = CONN_V6(conn) ? AF_INET6 : AF_INET;
+
+		s = tcp_conn_pool_sock(p);
+		if (s < 0)
+			s = tcp_conn_new_sock(c, af);
+	} else {
+		int *p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
 
-	s = tcp_conn_pool_sock(p);
+		/* If pool is empty, refill it first */
+		if (p[TCP_SOCK_POOL_SIZE-1] < 0)
+			NS_CALL(tcp_sock_refill_ns, c);
 
-	/* No socket available in namespace: create a new one for connect() */
-	if (s < 0 && !outbound) {
-		struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
+		s = tcp_conn_pool_sock(p);
+	}
 
-		NS_CALL(tcp_splice_connect_ns, &ns_arg);
-		return ns_arg.ret;
+	if (s < 0) {
+		warn("Couldn't open connectable socket for splice (%d)", s);
+		return s;
 	}
 
-	/* Otherwise, the socket will connect on the side it was created on */
 	return tcp_splice_connect(c, conn, s, port);
 }
 
-- 
@@ -87,6 +87,9 @@ static const char *tcp_splice_flag_str[] __attribute((__unused__)) = {
 	"RCVLOWAT_ACT_B", "CLOSING",
 };
 
+/* Forward declaration */
+static int tcp_sock_refill_ns(void *arg);
+
 /**
  * tcp_splice_conn_epoll_events() - epoll events masks for given state
  * @events:	Connection event flags
@@ -347,12 +350,8 @@ static int tcp_splice_connect_finish(const struct ctx *c,
  * Return: 0 for connect() succeeded or in progress, negative value on error
  */
 static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
-			      int s, in_port_t port)
+			      int sock_conn, in_port_t port)
 {
-	int sock_conn = (s >= 0) ? s : socket(CONN_V6(conn) ? AF_INET6 :
-							      AF_INET,
-					      SOCK_STREAM | SOCK_NONBLOCK,
-					      IPPROTO_TCP);
 	struct sockaddr_in6 addr6 = {
 		.sin6_family = AF_INET6,
 		.sin6_port = htons(port),
@@ -366,19 +365,8 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
 	const struct sockaddr *sa;
 	socklen_t sl;
 
-	if (sock_conn < 0)
-		return -errno;
-
-	if (sock_conn > SOCKET_MAX) {
-		close(sock_conn);
-		return -EIO;
-	}
-
 	conn->b = sock_conn;
 
-	if (s < 0)
-		tcp_sock_set_bufsize(c, conn->b);
-
 	if (setsockopt(conn->b, SOL_TCP, TCP_QUICKACK,
 		       &((int){ 1 }), sizeof(int))) {
 		trace("TCP (spliced): failed to set TCP_QUICKACK on socket %i",
@@ -409,36 +397,6 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn,
 	return 0;
 }
 
-/**
- * struct tcp_splice_connect_ns_arg - Arguments for tcp_splice_connect_ns()
- * @c:		Execution context
- * @conn:	Accepted inbound connection
- * @port:	Destination port, host order
- * @ret:	Return value of tcp_splice_connect_ns()
- */
-struct tcp_splice_connect_ns_arg {
-	const struct ctx *c;
-	struct tcp_splice_conn *conn;
-	in_port_t port;
-	int ret;
-};
-
-/**
- * tcp_splice_connect_ns() - Enter namespace and call tcp_splice_connect()
- * @arg:	See struct tcp_splice_connect_ns_arg
- *
- * Return: 0
- */
-static int tcp_splice_connect_ns(void *arg)
-{
-	struct tcp_splice_connect_ns_arg *a;
-
-	a = (struct tcp_splice_connect_ns_arg *)arg;
-	ns_enter(a->c);
-	a->ret = tcp_splice_connect(a->c, a->conn, -1, a->port);
-	return 0;
-}
-
 /**
  * tcp_splice_new() - Handle new spliced connection
  * @c:		Execution context
@@ -451,24 +409,37 @@ static int tcp_splice_connect_ns(void *arg)
 static int tcp_splice_new(const struct ctx *c, struct tcp_splice_conn *conn,
 			  in_port_t port, int outbound)
 {
-	int *p, s = -1;
-
-	if (outbound)
-		p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
-	else
-		p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
+	int s = -1;
+
+	/* If the pool is empty we take slightly different approaches
+	 * for init or ns sockets.  For init sockets we just open a
+	 * new one without refilling the pool to keep latency down.
+	 * For ns sockets, we're going to incur the latency of
+	 * entering the ns anyway, so we might as well refill the
+	 * pool.
+	 */
+	if (outbound) {
+		int *p = CONN_V6(conn) ? init_sock_pool6 : init_sock_pool4;
+		int af = CONN_V6(conn) ? AF_INET6 : AF_INET;
+
+		s = tcp_conn_pool_sock(p);
+		if (s < 0)
+			s = tcp_conn_new_sock(c, af);
+	} else {
+		int *p = CONN_V6(conn) ? ns_sock_pool6 : ns_sock_pool4;
 
-	s = tcp_conn_pool_sock(p);
+		/* If pool is empty, refill it first */
+		if (p[TCP_SOCK_POOL_SIZE-1] < 0)
+			NS_CALL(tcp_sock_refill_ns, c);
 
-	/* No socket available in namespace: create a new one for connect() */
-	if (s < 0 && !outbound) {
-		struct tcp_splice_connect_ns_arg ns_arg = { c, conn, port, 0 };
+		s = tcp_conn_pool_sock(p);
+	}
 
-		NS_CALL(tcp_splice_connect_ns, &ns_arg);
-		return ns_arg.ret;
+	if (s < 0) {
+		warn("Couldn't open connectable socket for splice (%d)", s);
+		return s;
 	}
 
-	/* Otherwise, the socket will connect on the side it was created on */
 	return tcp_splice_connect(c, conn, s, port);
 }
 
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 0/5] Cleanups to tcp socket pool handling
  2023-02-13 23:48 [PATCH v2 0/5] Cleanups to tcp socket pool handling David Gibson
                   ` (4 preceding siblings ...)
  2023-02-13 23:48 ` [PATCH v2 5/5] tcp: Improve handling of fallback if socket pool is empty on new splice David Gibson
@ 2023-02-14 16:27 ` Stefano Brivio
  5 siblings, 0 replies; 7+ messages in thread
From: Stefano Brivio @ 2023-02-14 16:27 UTC (permalink / raw)
  To: David Gibson; +Cc: passt-dev

On Tue, 14 Feb 2023 10:48:18 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> This is a bit of a diversion from what I'm notionally working on at
> the moment.  While thinking about what we'd need to use the
> IP_TRANSPARENT socket option to broaden the cases where we can
> "splice", I noticed some inelegancies in how we handle the pool of
> pre-opened sockets in the TCP code, and well, here we are.
> 
> This makes a number of cleanups to the handling of these pools.  Most
> notably, tcp_splice_connect() now has simpler semantics: it now always
> runs in the init namespace, and is always given a pre-opened socket
> (which could be in the guest ns).
> 
> Changes since v1:
>  * Rebased
>  * Improved wording of some commit messages
> 
> David Gibson (5):
>   tcp: Make a helper to refill each socket pool
>   tcp: Split init and ns cases for tcp_sock_refill()
>   tcp: Move socket pool declarations around
>   tcp: Split pool lookup from creating new sockets in
>     tcp_conn_new_sock()
>   tcp: Improve handling of fallback if socket pool is empty on new
>     splice

Applied too.

-- 
Stefano


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-02-14 16:27 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-13 23:48 [PATCH v2 0/5] Cleanups to tcp socket pool handling David Gibson
2023-02-13 23:48 ` [PATCH v2 1/5] tcp: Make a helper to refill each socket pool David Gibson
2023-02-13 23:48 ` [PATCH v2 2/5] tcp: Split init and ns cases for tcp_sock_refill() David Gibson
2023-02-13 23:48 ` [PATCH v2 3/5] tcp: Move socket pool declarations around David Gibson
2023-02-13 23:48 ` [PATCH v2 4/5] tcp: Split pool lookup from creating new sockets in tcp_conn_new_sock() David Gibson
2023-02-13 23:48 ` [PATCH v2 5/5] tcp: Improve handling of fallback if socket pool is empty on new splice David Gibson
2023-02-14 16:27 ` [PATCH v2 0/5] Cleanups to tcp socket pool handling Stefano Brivio

Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).