public inbox for passt-dev@passt.top
 help / color / mirror / code / Atom feed
From: Stefano Brivio <sbrivio@redhat.com>
To: passt-dev@passt.top
Cc: David Gibson <david@gibson.dropbear.id.au>
Subject: [PATCH v2 3/3] tcp: Don't create 'tap' socket for ports that are bound to loopback only
Date: Wed, 12 Oct 2022 17:45:54 +0200	[thread overview]
Message-ID: <20221012154554.1650667-4-sbrivio@redhat.com> (raw)
In-Reply-To: <20221012154554.1650667-1-sbrivio@redhat.com>

If the user specifies an explicit loopback address for a port
binding, we're going to use that address for the 'tap' socket, and
the same exact address for the 'spliced' socket (because those are,
by definition, only bound to loopback addresses).

This means that the second binding will fail, and, unexpectedly, the
port is forwarded, but via tap device, which means the source address
in the namespace won't be a loopback address.

Make it explicit under which conditions we're creating which kind of
socket, by refactoring tcp_sock_init() into two separate functions
for IPv4 and IPv6 and gathering those conditions at the beginning.

Also, don't create spliced sockets if the user specifies explicitly
a non-loopback address, those are harmless but not desired either.

Fixes: 3c6ae625101a ("conf, tcp, udp: Allow address specification for forwarded ports")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
v2:
  - use new IPV4_IS_LOOPBACK macro in tcp_conn_from_sock() too

 tcp.c  | 183 +++++++++++++++++++++++++++++++++++----------------------
 util.h |   3 +
 2 files changed, 117 insertions(+), 69 deletions(-)

diff --git a/tcp.c b/tcp.c
index 0d4ce57..e6979a9 100644
--- a/tcp.c
+++ b/tcp.c
@@ -275,6 +275,7 @@
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <stdint.h>
+#include <stdbool.h>
 #include <stddef.h>
 #include <string.h>
 #include <sys/epoll.h>
@@ -2906,8 +2907,8 @@ static void tcp_conn_from_sock(struct ctx *c, union epoll_ref ref,
 		memset(&conn->a.a4.zero,   0, sizeof(conn->a.a4.zero));
 		memset(&conn->a.a4.one, 0xff, sizeof(conn->a.a4.one));
 
-		if (s_addr >> IN_CLASSA_NSHIFT == IN_LOOPBACKNET ||
-		    s_addr == INADDR_ANY || htonl(s_addr) == c->ip4.addr_seen)
+		if (IPV4_IS_LOOPBACK(s_addr) || s_addr == INADDR_ANY ||
+		    htonl(s_addr) == c->ip4.addr_seen)
 			s_addr = ntohl(c->ip4.gw);
 
 		s_addr = htonl(s_addr);
@@ -3073,109 +3074,153 @@ void tcp_sock_handler(struct ctx *c, union epoll_ref ref, uint32_t events,
 }
 
 /**
- * tcp_sock_init() - Initialise listening sockets for a given port
+ * tcp_sock_init4() - Initialise listening sockets for a given IPv4 port
  * @c:		Execution context
  * @ns:		In pasta mode, if set, bind with loopback address in namespace
- * @af:		Address family to select a specific IP version, or AF_UNSPEC
  * @addr:	Pointer to address for binding, NULL if not configured
  * @ifname:	Name of interface to bind to, NULL if not configured
  * @port:	Port, host order
  */
-void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
-		   const void *addr, const char *ifname, in_port_t port)
+static void tcp_sock_init4(const struct ctx *c, int ns, const in_addr_t *addr,
+			   const char *ifname, in_port_t port)
 {
 	union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = ns };
-	const void *bind_addr;
+	bool spliced = false, tap = true;
 	int s;
 
+	if (c->mode == MODE_PASTA) {
+		spliced = !addr ||
+			  ntohl(*addr) == INADDR_ANY ||
+			  IPV4_IS_LOOPBACK(ntohl(*addr));
+
+		if (!addr)
+			addr = &c->ip4.addr;
+
+		tap = !ns && !IPV4_IS_LOOPBACK(ntohl(*addr));
+	}
+
 	if (ns)
 		tref.tcp.index = (in_port_t)(port + c->tcp.fwd_out.delta[port]);
 	else
 		tref.tcp.index = (in_port_t)(port + c->tcp.fwd_in.delta[port]);
 
-	if (af == AF_INET || af == AF_UNSPEC) {
-		if (!addr && c->mode == MODE_PASTA)
-			bind_addr = &c->ip4.addr;
+	if (tap) {
+		s = sock_l4(c, AF_INET, IPPROTO_TCP, addr, ifname, port,
+			    tref.u32);
+		if (s >= 0)
+			tcp_sock_set_bufsize(c, s);
 		else
-			bind_addr = addr;
+			s = -1;
 
-		tref.tcp.v6 = 0;
-		tref.tcp.splice = 0;
+		if (c->tcp.fwd_in.mode == FWD_AUTO)
+			tcp_sock_init_ext[port][V4] = s;
+	}
 
-		if (!ns) {
-			s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname,
-				    port, tref.u32);
-			if (s >= 0)
-				tcp_sock_set_bufsize(c, s);
-			else
-				s = -1;
+	if (spliced) {
+		tref.tcp.splice = 1;
 
-			if (c->tcp.fwd_in.mode == FWD_AUTO)
-				tcp_sock_init_ext[port][V4] = s;
-		}
+		addr = &(uint32_t){ htonl(INADDR_LOOPBACK) };
 
-		if (c->mode == MODE_PASTA) {
-			bind_addr = &(uint32_t){ htonl(INADDR_LOOPBACK) };
+		s = sock_l4(c, AF_INET, IPPROTO_TCP, addr, ifname, port,
+			    tref.u32);
+		if (s >= 0)
+			tcp_sock_set_bufsize(c, s);
+		else
+			s = -1;
 
-			tref.tcp.splice = 1;
-			s = sock_l4(c, AF_INET, IPPROTO_TCP, bind_addr, ifname,
-				    port, tref.u32);
-			if (s >= 0)
-				tcp_sock_set_bufsize(c, s);
+		if (c->tcp.fwd_out.mode == FWD_AUTO) {
+			if (ns)
+				tcp_sock_ns[port][V4] = s;
 			else
-				s = -1;
-
-			if (c->tcp.fwd_out.mode == FWD_AUTO) {
-				if (ns)
-					tcp_sock_ns[port][V4] = s;
-				else
-					tcp_sock_init_lo[port][V4] = s;
-			}
+				tcp_sock_init_lo[port][V4] = s;
 		}
 	}
+}
+
+/**
+ * tcp_sock_init6() - Initialise listening sockets for a given IPv6 port
+ * @c:		Execution context
+ * @ns:		In pasta mode, if set, bind with loopback address in namespace
+ * @addr:	Pointer to address for binding, NULL if not configured
+ * @ifname:	Name of interface to bind to, NULL if not configured
+ * @port:	Port, host order
+ */
+static void tcp_sock_init6(const struct ctx *c, int ns,
+			   const struct in6_addr *addr, const char *ifname,
+			   in_port_t port)
+{
+	union tcp_epoll_ref tref = { .tcp.listen = 1, .tcp.outbound = ns,
+				     .tcp.v6 = 1 };
+	bool spliced = false, tap = true;
+	int s;
+
+	if (c->mode == MODE_PASTA) {
+		spliced = !addr ||
+			  IN6_IS_ADDR_UNSPECIFIED(addr) ||
+			  IN6_IS_ADDR_LOOPBACK(addr);
+
+		if (!addr)
+			addr = &c->ip6.addr;
+
+		tap = !ns && !IN6_IS_ADDR_LOOPBACK(addr);
+	}
+
+	if (ns)
+		tref.tcp.index = (in_port_t)(port + c->tcp.fwd_out.delta[port]);
+	else
+		tref.tcp.index = (in_port_t)(port + c->tcp.fwd_in.delta[port]);
 
-	if (af == AF_INET6 || af == AF_UNSPEC) {
-		if (!addr && c->mode == MODE_PASTA)
-			bind_addr = &c->ip6.addr;
+	if (tap) {
+		s = sock_l4(c, AF_INET6, IPPROTO_TCP, addr, ifname, port,
+			    tref.u32);
+		if (s >= 0)
+			tcp_sock_set_bufsize(c, s);
 		else
-			bind_addr = addr;
+			s = -1;
 
-		tref.tcp.v6 = 1;
+		if (c->tcp.fwd_in.mode == FWD_AUTO)
+			tcp_sock_init_ext[port][V6] = s;
+	}
 
-		tref.tcp.splice = 0;
-		if (!ns) {
-			s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname,
-				    port, tref.u32);
-			if (s >= 0)
-				tcp_sock_set_bufsize(c, s);
-			else
-				s = -1;
+	if (spliced) {
+		tref.tcp.splice = 1;
 
-			if (c->tcp.fwd_in.mode == FWD_AUTO)
-				tcp_sock_init_ext[port][V6] = s;
-		}
+		addr = &in6addr_loopback;
 
-		if (c->mode == MODE_PASTA) {
-			bind_addr = &in6addr_loopback;
+		s = sock_l4(c, AF_INET6, IPPROTO_TCP, addr, ifname, port,
+			    tref.u32);
+		if (s >= 0)
+			tcp_sock_set_bufsize(c, s);
+		else
+			s = -1;
 
-			tref.tcp.splice = 1;
-			s = sock_l4(c, AF_INET6, IPPROTO_TCP, bind_addr, ifname,
-				    port, tref.u32);
-			if (s >= 0)
-				tcp_sock_set_bufsize(c, s);
+		if (c->tcp.fwd_out.mode == FWD_AUTO) {
+			if (ns)
+				tcp_sock_ns[port][V6] = s;
 			else
-				s = -1;
-
-			if (c->tcp.fwd_out.mode == FWD_AUTO) {
-				if (ns)
-					tcp_sock_ns[port][V6] = s;
-				else
-					tcp_sock_init_lo[port][V6] = s;
-			}
+				tcp_sock_init_lo[port][V6] = s;
 		}
 	}
 }
 
+/**
+ * tcp_sock_init() - Initialise listening sockets for a given port
+ * @c:		Execution context
+ * @ns:		In pasta mode, if set, bind with loopback address in namespace
+ * @af:		Address family to select a specific IP version, or AF_UNSPEC
+ * @addr:	Pointer to address for binding, NULL if not configured
+ * @ifname:	Name of interface to bind to, NULL if not configured
+ * @port:	Port, host order
+ */
+void tcp_sock_init(const struct ctx *c, int ns, sa_family_t af,
+		   const void *addr, const char *ifname, in_port_t port)
+{
+	if (af == AF_INET || af == AF_UNSPEC)
+		tcp_sock_init4(c, ns, addr, ifname, port);
+	if (af == AF_INET6 || af == AF_UNSPEC)
+		tcp_sock_init6(c, ns, addr, ifname, port);
+}
+
 /**
  * tcp_sock_init_ns() - Bind sockets in namespace for outbound connections
  * @arg:	Execution context
diff --git a/util.h b/util.h
index 7dc3d18..b1eadfd 100644
--- a/util.h
+++ b/util.h
@@ -81,6 +81,9 @@
 #define MAC_ZERO		((uint8_t [ETH_ALEN]){ 0 })
 #define MAC_IS_ZERO(addr)	(!memcmp((addr), MAC_ZERO, ETH_ALEN))
 
+#define IPV4_IS_LOOPBACK(addr)						\
+	((addr) >> IN_CLASSA_NSHIFT == IN_LOOPBACKNET)
+
 #define NS_FN_STACK_SIZE	(RLIMIT_STACK_VAL * 1024 / 4)
 #define NS_CALL(fn, arg)						\
 	do {								\
-- 
@@ -81,6 +81,9 @@
 #define MAC_ZERO		((uint8_t [ETH_ALEN]){ 0 })
 #define MAC_IS_ZERO(addr)	(!memcmp((addr), MAC_ZERO, ETH_ALEN))
 
+#define IPV4_IS_LOOPBACK(addr)						\
+	((addr) >> IN_CLASSA_NSHIFT == IN_LOOPBACKNET)
+
 #define NS_FN_STACK_SIZE	(RLIMIT_STACK_VAL * 1024 / 4)
 #define NS_CALL(fn, arg)						\
 	do {								\
-- 
2.35.1


      parent reply	other threads:[~2022-10-12 15:45 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-12 15:45 [PATCH v2 0/3] Fixes for spliced connections Stefano Brivio
2022-10-12 15:45 ` [PATCH v2 1/3] tcp, tcp_splice: Adjust comments to current meaning of inbound and outbound Stefano Brivio
2022-10-12 15:45 ` [PATCH v2 2/3] tcp, tcp_splice: Fix port remapping for inbound, spliced connections Stefano Brivio
2022-10-13  1:19   ` David Gibson
2022-10-12 15:45 ` Stefano Brivio [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221012154554.1650667-4-sbrivio@redhat.com \
    --to=sbrivio@redhat.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=passt-dev@passt.top \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://passt.top/passt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for IMAP folder(s).