public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: passt-dev@passt.top, Stefano Brivio <sbrivio@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 14/32] tcp: Separate helpers to create ns listening sockets
Date: Thu, 17 Nov 2022 16:58:50 +1100	[thread overview]
Message-ID: <20221117055908.2782981-15-david@gibson.dropbear.id.au> (raw)
In-Reply-To: <20221117055908.2782981-1-david@gibson.dropbear.id.au>

tcp_sock_init*() can create either sockets listening on the host, or in
the pasta network namespace (with @ns==1).  There are, however, a number
of differences in how these two cases work in practice though.  "ns"
sockets are only used in pasta mode, and they always lead to spliced
connections only.  The functions are also only ever called in "ns" mode
with a NULL address and interface name, and it doesn't really make sense
for them to be called any other way.

Later changes will introduce further differences in behaviour between these
two cases, so it makes more sense to use separate functions for creating
the ns listening sockets than the regular external/host listening sockets.
---
 conf.c |   6 +--
 tcp.c  | 132 ++++++++++++++++++++++++++++++++++++++-------------------
 tcp.h  |   4 +-
 3 files changed, 93 insertions(+), 49 deletions(-)

diff --git a/conf.c b/conf.c
index b07d661..4721c97 100644
--- a/conf.c
+++ b/conf.c
@@ -209,7 +209,7 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg,
 
 		for (i = 0; i < PORT_EPHEMERAL_MIN; i++) {
 			if (optname == 't')
-				tcp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, i);
+				tcp_sock_init(c, AF_UNSPEC, NULL, NULL, i);
 			else if (optname == 'u')
 				udp_sock_init(c, 0, AF_UNSPEC, NULL, NULL, i);
 		}
@@ -287,7 +287,7 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg,
 			bitmap_set(fwd->map, i);
 
 			if (optname == 't')
-				tcp_sock_init(c, 0, af, addr, ifname, i);
+				tcp_sock_init(c, af, addr, ifname, i);
 			else if (optname == 'u')
 				udp_sock_init(c, 0, af, addr, ifname, i);
 		}
@@ -333,7 +333,7 @@ static int conf_ports(const struct ctx *c, char optname, const char *optarg,
 			fwd->delta[i] = mapped_range.first - orig_range.first;
 
 			if (optname == 't')
-				tcp_sock_init(c, 0, af, addr, ifname, i);
+				tcp_sock_init(c, af, addr, ifname, i);
 			else if (optname == 'u')
 				udp_sock_init(c, 0, af, addr, ifname, i);
 		}
diff --git a/tcp.c b/tcp.c
index 35fca31..306f928 100644
--- a/tcp.c
+++ b/tcp.c
@@ -2987,15 +2987,15 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 /**
  * tcp_sock_init4() - Initialise listening sockets for a given IPv4 port
  * @c:		Execution context
- * @ns:		In pasta mode, if set, bind with loopback address in namespace
  * @addr:	Pointer to address for binding, NULL if not configured
  * @ifname:	Name of interface to bind to, NULL if not configured
  * @port:	Port, host order
  */
-static void tcp_sock_init4(const struct ctx *c, int ns, const struct in_addr *addr,
+static void tcp_sock_init4(const struct ctx *c, const struct in_addr *addr,
 			   const char *ifname, in_port_t port)
 {
-	union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = ns };
+	in_port_t idx = port + c->tcp.fwd_in.delta[port];
+	union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.index = idx };
 	bool spliced = false, tap = true;
 	int s;
 
@@ -3006,14 +3006,9 @@ static void tcp_sock_init4(const struct ctx *c, int ns, const struct in_addr *ad
 		if (!addr)
 			addr = &c->ip4.addr;
 
-		tap = !ns && !IN4_IS_ADDR_LOOPBACK(addr);
+		tap = !IN4_IS_ADDR_LOOPBACK(addr);
 	}
 
-	if (ns)
-		tref.tcp.index = (in_port_t)(port + c->tcp.fwd_out.delta[port]);
-	else
-		tref.tcp.index = (in_port_t)(port + c->tcp.fwd_in.delta[port]);
-
 	if (tap) {
 		s = sock_l4(c, AF_INET, IPPROTO_TCP, addr, ifname, port,
 			    tref.u32);
@@ -3039,29 +3034,25 @@ static void tcp_sock_init4(const struct ctx *c, int ns, const struct in_addr *ad
 		else
 			s = -1;
 
-		if (c->tcp.fwd_out.mode == FWD_AUTO) {
-			if (ns)
-				tcp_sock_ns[port][V4] = s;
-			else
-				tcp_sock_init_lo[port][V4] = s;
-		}
+		if (c->tcp.fwd_out.mode == FWD_AUTO)
+			tcp_sock_init_lo[port][V4] = s;
 	}
 }
 
 /**
  * tcp_sock_init6() - Initialise listening sockets for a given IPv6 port
  * @c:		Execution context
- * @ns:		In pasta mode, if set, bind with loopback address in namespace
  * @addr:	Pointer to address for binding, NULL if not configured
  * @ifname:	Name of interface to bind to, NULL if not configured
  * @port:	Port, host order
  */
-static void tcp_sock_init6(const struct ctx *c, int ns,
+static void tcp_sock_init6(const struct ctx *c,
 			   const struct in6_addr *addr, const char *ifname,
 			   in_port_t port)
 {
-	union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = ns,
-				     .tcp.v6 = 1 };
+	in_port_t idx = port + c->tcp.fwd_in.delta[port];
+	union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.v6 = 1,
+				     .tcp.index = idx };
 	bool spliced = false, tap = true;
 	int s;
 
@@ -3073,14 +3064,9 @@ static void tcp_sock_init6(const struct ctx *c, int ns,
 		if (!addr)
 			addr = &c->ip6.addr;
 
-		tap = !ns && !IN6_IS_ADDR_LOOPBACK(addr);
+		tap = !IN6_IS_ADDR_LOOPBACK(addr);
 	}
 
-	if (ns)
-		tref.tcp.index = (in_port_t)(port + c->tcp.fwd_out.delta[port]);
-	else
-		tref.tcp.index = (in_port_t)(port + c->tcp.fwd_in.delta[port]);
-
 	if (tap) {
 		s = sock_l4(c, AF_INET6, IPPROTO_TCP, addr, ifname, port,
 			    tref.u32);
@@ -3105,40 +3091,99 @@ static void tcp_sock_init6(const struct ctx *c, int ns,
 		else
 			s = -1;
 
-		if (c->tcp.fwd_out.mode == FWD_AUTO) {
-			if (ns)
-				tcp_sock_ns[port][V6] = s;
-			else
-				tcp_sock_init_lo[port][V6] = s;
-		}
+		if (c->tcp.fwd_out.mode == FWD_AUTO)
+			tcp_sock_init_lo[port][V6] = s;
 	}
 }
 
 /**
- * tcp_sock_init() - Initialise listening sockets for a given port
+ * tcp_sock_init() - Create listening sockets for a given host ("inbound") port
  * @c:		Execution context
- * @ns:		In pasta mode, if set, bind with loopback address in namespace
  * @af:		Address family to select a specific IP version, or AF_UNSPEC
  * @addr:	Pointer to address for binding, NULL if not configured
  * @ifname:	Name of interface to bind to, NULL if not configured
  * @port:	Port, host order
  */
-void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
-		   const void *addr, const char *ifname, in_port_t port)
+void tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr,
+		   const char *ifname, in_port_t port)
 {
 	if ((af == AF_INET  || af == AF_UNSPEC) && c->ifi4)
-		tcp_sock_init4(c, ns, addr, ifname, port);
+		tcp_sock_init4(c, addr, ifname, port);
 	if ((af == AF_INET6 || af == AF_UNSPEC) && c->ifi6)
-		tcp_sock_init6(c, ns, addr, ifname, port);
+		tcp_sock_init6(c, addr, ifname, port);
+}
+
+/**
+ * tcp_ns_sock_init4() - Init socket to listen for outbound IPv4 connections
+ * @c:		Execution context
+ * @port:	Port, host order
+ */
+static void tcp_ns_sock_init4(const struct ctx *c, in_port_t port)
+{
+	in_port_t idx = port + c->tcp.fwd_out.delta[port];
+	union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = 1,
+				     .tcp.splice = 1, .tcp.index = idx };
+	struct in_addr loopback = { htonl(INADDR_LOOPBACK) };
+	int s;
+
+	assert(c->mode == MODE_PASTA);
+
+	s = sock_l4(c, AF_INET, IPPROTO_TCP, &loopback, NULL, port, tref.u32);
+	if (s >= 0)
+		tcp_sock_set_bufsize(c, s);
+	else
+		s = -1;
+
+	if (c->tcp.fwd_out.mode == FWD_AUTO)
+		tcp_sock_ns[port][V4] = s;
 }
 
 /**
- * tcp_sock_init_ns() - Bind sockets in namespace for outbound connections
+ * tcp_ns_sock_init6() - Init socket to listen for outbound IPv6 connections
+ * @c:		Execution context
+ * @port:	Port, host order
+ */
+static void tcp_ns_sock_init6(const struct ctx *c, in_port_t port)
+{
+	in_port_t idx = port + c->tcp.fwd_out.delta[port];
+	union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = 1,
+				     .tcp.splice = 1, .tcp.v6 = 1,
+				     .tcp.index = idx };
+	int s;
+
+	assert(c->mode == MODE_PASTA);
+
+	s = sock_l4(c, AF_INET6, IPPROTO_TCP, &in6addr_loopback, NULL, port,
+		    tref.u32);
+	if (s >= 0)
+		tcp_sock_set_bufsize(c, s);
+	else
+		s = -1;
+
+	if (c->tcp.fwd_out.mode == FWD_AUTO)
+		tcp_sock_ns[port][V6] = s;
+}
+
+/**
+ * tcp_ns_sock_init() - Init socket to listen for spliced outbound connections
+ * @c:		Execution context
+ * @port:	Port, host order
+ */
+void tcp_ns_sock_init(const struct ctx *c, in_port_t port)
+{
+	if (c->ifi4)
+		tcp_ns_sock_init4(c, port);
+	if (c->ifi6)
+		tcp_ns_sock_init6(c, port);
+}
+
+/**
+ * tcp_ns_socks_init() - Bind sockets in namespace for outbound connections
  * @arg:	Execution context
  *
  * Return: 0
  */
-static int tcp_sock_init_ns(void *arg)
+static int tcp_ns_socks_init(void *arg)
 {
 	struct ctx *c = (struct ctx *)arg;
 	unsigned port;
@@ -3149,7 +3194,7 @@ static int tcp_sock_init_ns(void *arg)
 		if (!bitmap_isset(c->tcp.fwd_out.map, port))
 			continue;
 
-		tcp_sock_init(c, 1, AF_UNSPEC, NULL, NULL, port);
+		tcp_ns_sock_init(c, port);
 	}
 
 	return 0;
@@ -3279,7 +3324,7 @@ int tcp_init(struct ctx *c)
 	if (c->mode == MODE_PASTA) {
 		tcp_splice_init(c);
 
-		NS_CALL(tcp_sock_init_ns, c);
+		NS_CALL(tcp_ns_socks_init, c);
 
 		refill_arg.ns = 1;
 		NS_CALL(tcp_sock_refill, &refill_arg);
@@ -3364,8 +3409,7 @@ static int tcp_port_rebind(void *arg)
 
 			if ((a->c->ifi4 && tcp_sock_ns[port][V4] == -1) ||
 			    (a->c->ifi6 && tcp_sock_ns[port][V6] == -1))
-				tcp_sock_init(a->c, 1, AF_UNSPEC, NULL, NULL,
-					      port);
+				tcp_ns_sock_init(a->c, port);
 		}
 	} else {
 		for (port = 0; port < NUM_PORTS; port++) {
@@ -3398,7 +3442,7 @@ static int tcp_port_rebind(void *arg)
 
 			if ((a->c->ifi4 && tcp_sock_init_ext[port][V4] == -1) ||
 			    (a->c->ifi6 && tcp_sock_init_ext[port][V6] == -1))
-				tcp_sock_init(a->c, 0, AF_UNSPEC, NULL, NULL,
+				tcp_sock_init(a->c, AF_UNSPEC, NULL, NULL,
 					      port);
 		}
 	}
diff --git a/tcp.h b/tcp.h
index 49738ef..f4ed298 100644
--- a/tcp.h
+++ b/tcp.h
@@ -19,8 +19,8 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 		      const struct timespec *now);
 int tcp_tap_handler(struct ctx *c, int af, const void *addr,
 		    const struct pool *p, const struct timespec *now);
-void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
-		   const void *addr, const char *ifname, in_port_t port);
+void tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr,
+		   const char *ifname, in_port_t port);
 int tcp_init(struct ctx *c);
 void tcp_timer(struct ctx *c, const struct timespec *ts);
 void tcp_defer_handler(struct ctx *c);
-- 
@@ -19,8 +19,8 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 		      const struct timespec *now);
 int tcp_tap_handler(struct ctx *c, int af, const void *addr,
 		    const struct pool *p, const struct timespec *now);
-void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
-		   const void *addr, const char *ifname, in_port_t port);
+void tcp_sock_init(const struct ctx *c, sa_family_t af, const void *addr,
+		   const char *ifname, in_port_t port);
 int tcp_init(struct ctx *c);
 void tcp_timer(struct ctx *c, const struct timespec *ts);
 void tcp_defer_handler(struct ctx *c);
-- 
2.38.1


  parent reply	other threads:[~2022-11-17  5:59 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-17  5:58 [PATCH v2 00/32] Use dual stack sockets to listen for inbound TCP connections David Gibson
2022-11-17  5:58 ` [PATCH v2 01/32] clang-tidy: Suppress warning about assignments in if statements David Gibson
2022-11-17  5:58 ` [PATCH v2 02/32] style: Minor corrections to function comments David Gibson
2022-11-17  5:58 ` [PATCH v2 03/32] tcp_splice: #include tcp_splice.h in tcp_splice.c David Gibson
2022-11-17  5:58 ` [PATCH v2 04/32] tcp: Remove unused TCP_MAX_SOCKS constant David Gibson
2022-11-17  5:58 ` [PATCH v2 05/32] tcp: Better helpers for converting between connection pointer and index David Gibson
2022-11-17  5:58 ` [PATCH v2 06/32] tcp_splice: Helpers for converting from index to/from tcp_splice_conn David Gibson
2022-11-17  5:58 ` [PATCH v2 07/32] tcp: Move connection state structures into a shared header David Gibson
2022-11-17  5:58 ` [PATCH v2 08/32] tcp: Add connection union type David Gibson
2022-11-18  0:25   ` Stefano Brivio
2022-11-18  1:10     ` David Gibson
2022-11-19  8:39       ` Stefano Brivio
2022-11-17  5:58 ` [PATCH v2 09/32] tcp: Improved helpers to update connections after moving David Gibson
2022-11-17  5:58 ` [PATCH v2 10/32] tcp: Unify spliced and non-spliced connection tables David Gibson
2022-11-17  5:58 ` [PATCH v2 11/32] tcp: Unify tcp_defer_handler and tcp_splice_defer_handler() David Gibson
2022-11-17  5:58 ` [PATCH v2 12/32] tcp: Partially unify tcp_timer() and tcp_splice_timer() David Gibson
2022-11-17  5:58 ` [PATCH v2 13/32] tcp: Unify the IN_EPOLL flag David Gibson
2022-11-17  5:58 ` David Gibson [this message]
2022-11-17  5:58 ` [PATCH v2 15/32] tcp: Unify part of spliced and non-spliced conn_from_sock path David Gibson
2022-11-17  5:58 ` [PATCH v2 16/32] tcp: Use the same sockets to listen for spliced and non-spliced connections David Gibson
2022-11-17  5:58 ` [PATCH v2 17/32] tcp: Remove splice from tcp_epoll_ref David Gibson
2022-11-17  5:58 ` [PATCH v2 18/32] tcp: Don't store hash bucket in connection structures David Gibson
2022-11-17  5:58 ` [PATCH v2 19/32] inany: Helper functions for handling addresses which could be IPv4 or IPv6 David Gibson
2022-11-17  5:58 ` [PATCH v2 20/32] tcp: Hash IPv4 and IPv4-mapped-IPv6 addresses the same David Gibson
2022-11-17  5:58 ` [PATCH v2 21/32] tcp: Take tcp_hash_insert() address from struct tcp_conn David Gibson
2022-11-17  5:58 ` [PATCH v2 22/32] tcp: Simplify tcp_hash_match() to take an inany_addr David Gibson
2022-11-17  5:58 ` [PATCH v2 23/32] tcp: Unify initial sequence number calculation for IPv4 and IPv6 David Gibson
2022-11-17  5:59 ` [PATCH v2 24/32] tcp: Have tcp_seq_init() take its parameters from struct tcp_conn David Gibson
2022-11-17  5:59 ` [PATCH v2 25/32] tcp: Fix small errors in tcp_seq_init() time handling David Gibson
2022-11-17  5:59 ` [PATCH v2 26/32] tcp: Remove v6 flag from tcp_epoll_ref David Gibson
2022-11-17  5:59 ` [PATCH v2 27/32] tcp: NAT IPv4-mapped IPv6 addresses like IPv4 addresses David Gibson
2022-11-17  5:59 ` [PATCH v2 28/32] tcp_splice: Allow splicing of connections from IPv4-mapped loopback David Gibson
2022-11-17  5:59 ` [PATCH v2 29/32] tcp: Consolidate tcp_sock_init[46] David Gibson
2022-11-17  5:59 ` [PATCH v2 30/32] util: Allow sock_l4() to open dual stack sockets David Gibson
2022-11-17  5:59 ` [PATCH v2 31/32] util: Always return -1 on error in sock_l4() David Gibson
2022-11-17  5:59 ` [PATCH v2 32/32] tcp: Use dual stack sockets for port forwarding when possible David Gibson
2022-11-25  9:22 ` [PATCH v2 00/32] Use dual stack sockets to listen for inbound TCP connections Stefano Brivio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221117055908.2782981-15-david@gibson.dropbear.id.au \
    --to=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    --cc=sbrivio@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).